LLVM 20.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG. For fast lookup and
174 /// deduplication, the index of the node in this vector is stored in the
175 /// node in SDNode::CombinerWorklistIndex.
177
178 /// This records all nodes attempted to be added to the worklist since we
179 /// considered a new worklist entry. As we keep do not add duplicate nodes
180 /// in the worklist, this is different from the tail of the worklist.
182
183 /// Map from candidate StoreNode to the pair of RootNode and count.
184 /// The count is used to track how many times we have seen the StoreNode
185 /// with the same RootNode bail out in dependence check. If we have seen
186 /// the bail out for the same pair many times over a limit, we won't
187 /// consider the StoreNode with the same RootNode as store merging
188 /// candidate again.
190
191 // AA - Used for DAG load/store alias analysis.
192 AliasAnalysis *AA;
193
194 /// When an instruction is simplified, add all users of the instruction to
195 /// the work lists because they might get more simplified now.
196 void AddUsersToWorklist(SDNode *N) {
197 for (SDNode *Node : N->uses())
198 AddToWorklist(Node);
199 }
200
201 /// Convenient shorthand to add a node and all of its user to the worklist.
202 void AddToWorklistWithUsers(SDNode *N) {
203 AddUsersToWorklist(N);
204 AddToWorklist(N);
205 }
206
207 // Prune potentially dangling nodes. This is called after
208 // any visit to a node, but should also be called during a visit after any
209 // failed combine which may have created a DAG node.
210 void clearAddedDanglingWorklistEntries() {
211 // Check any nodes added to the worklist to see if they are prunable.
212 while (!PruningList.empty()) {
213 auto *N = PruningList.pop_back_val();
214 if (N->use_empty())
215 recursivelyDeleteUnusedNodes(N);
216 }
217 }
218
219 SDNode *getNextWorklistEntry() {
220 // Before we do any work, remove nodes that are not in use.
221 clearAddedDanglingWorklistEntries();
222 SDNode *N = nullptr;
223 // The Worklist holds the SDNodes in order, but it may contain null
224 // entries.
225 while (!N && !Worklist.empty()) {
226 N = Worklist.pop_back_val();
227 }
228
229 if (N) {
230 assert(N->getCombinerWorklistIndex() >= 0 &&
231 "Found a worklist entry without a corresponding map entry!");
232 // Set to -2 to indicate that we combined the node.
233 N->setCombinerWorklistIndex(-2);
234 }
235 return N;
236 }
237
238 /// Call the node-specific routine that folds each particular type of node.
239 SDValue visit(SDNode *N);
240
241 public:
242 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
243 : DAG(D), TLI(D.getTargetLoweringInfo()),
244 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
245 ForCodeSize = DAG.shouldOptForSize();
246 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
247
248 MaximumLegalStoreInBits = 0;
249 // We use the minimum store size here, since that's all we can guarantee
250 // for the scalable vector types.
251 for (MVT VT : MVT::all_valuetypes())
252 if (EVT(VT).isSimple() && VT != MVT::Other &&
253 TLI.isTypeLegal(EVT(VT)) &&
254 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
255 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
256 }
257
258 void ConsiderForPruning(SDNode *N) {
259 // Mark this for potential pruning.
260 PruningList.insert(N);
261 }
262
263 /// Add to the worklist making sure its instance is at the back (next to be
264 /// processed.)
265 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
266 bool SkipIfCombinedBefore = false) {
267 assert(N->getOpcode() != ISD::DELETED_NODE &&
268 "Deleted Node added to Worklist");
269
270 // Skip handle nodes as they can't usefully be combined and confuse the
271 // zero-use deletion strategy.
272 if (N->getOpcode() == ISD::HANDLENODE)
273 return;
274
275 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
276 return;
277
278 if (IsCandidateForPruning)
279 ConsiderForPruning(N);
280
281 if (N->getCombinerWorklistIndex() < 0) {
282 N->setCombinerWorklistIndex(Worklist.size());
283 Worklist.push_back(N);
284 }
285 }
286
287 /// Remove all instances of N from the worklist.
288 void removeFromWorklist(SDNode *N) {
289 PruningList.remove(N);
290 StoreRootCountMap.erase(N);
291
292 int WorklistIndex = N->getCombinerWorklistIndex();
293 // If not in the worklist, the index might be -1 or -2 (was combined
294 // before). As the node gets deleted anyway, there's no need to update
295 // the index.
296 if (WorklistIndex < 0)
297 return; // Not in the worklist.
298
299 // Null out the entry rather than erasing it to avoid a linear operation.
300 Worklist[WorklistIndex] = nullptr;
301 N->setCombinerWorklistIndex(-1);
302 }
303
304 void deleteAndRecombine(SDNode *N);
305 bool recursivelyDeleteUnusedNodes(SDNode *N);
306
307 /// Replaces all uses of the results of one DAG node with new values.
308 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
309 bool AddTo = true);
310
311 /// Replaces all uses of the results of one DAG node with new values.
312 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
313 return CombineTo(N, &Res, 1, AddTo);
314 }
315
316 /// Replaces all uses of the results of one DAG node with new values.
317 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
318 bool AddTo = true) {
319 SDValue To[] = { Res0, Res1 };
320 return CombineTo(N, To, 2, AddTo);
321 }
322
323 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
324
325 private:
326 unsigned MaximumLegalStoreInBits;
327
328 /// Check the specified integer node value to see if it can be simplified or
329 /// if things it uses can be simplified by bit propagation.
330 /// If so, return true.
331 bool SimplifyDemandedBits(SDValue Op) {
332 unsigned BitWidth = Op.getScalarValueSizeInBits();
334 return SimplifyDemandedBits(Op, DemandedBits);
335 }
336
337 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
338 EVT VT = Op.getValueType();
339 APInt DemandedElts = VT.isFixedLengthVector()
341 : APInt(1, 1);
342 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
343 }
344
345 /// Check the specified vector node value to see if it can be simplified or
346 /// if things it uses can be simplified as it only uses some of the
347 /// elements. If so, return true.
348 bool SimplifyDemandedVectorElts(SDValue Op) {
349 // TODO: For now just pretend it cannot be simplified.
350 if (Op.getValueType().isScalableVector())
351 return false;
352
353 unsigned NumElts = Op.getValueType().getVectorNumElements();
354 APInt DemandedElts = APInt::getAllOnes(NumElts);
355 return SimplifyDemandedVectorElts(Op, DemandedElts);
356 }
357
358 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
359 const APInt &DemandedElts,
360 bool AssumeSingleUse = false);
361 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
362 bool AssumeSingleUse = false);
363
364 bool CombineToPreIndexedLoadStore(SDNode *N);
365 bool CombineToPostIndexedLoadStore(SDNode *N);
366 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
367 bool SliceUpLoad(SDNode *N);
368
369 // Looks up the chain to find a unique (unaliased) store feeding the passed
370 // load. If no such store is found, returns a nullptr.
371 // Note: This will look past a CALLSEQ_START if the load is chained to it so
372 // so that it can find stack stores for byval params.
373 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
374 // Scalars have size 0 to distinguish from singleton vectors.
375 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
376 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
377 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
378
379 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
380 /// load.
381 ///
382 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
383 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
384 /// \param EltNo index of the vector element to load.
385 /// \param OriginalLoad load that EVE came from to be replaced.
386 /// \returns EVE on success SDValue() on failure.
387 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
388 SDValue EltNo,
389 LoadSDNode *OriginalLoad);
390 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
391 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
392 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
393 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
394 SDValue PromoteIntBinOp(SDValue Op);
395 SDValue PromoteIntShiftOp(SDValue Op);
396 SDValue PromoteExtend(SDValue Op);
397 bool PromoteLoad(SDValue Op);
398
399 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
400 SDValue RHS, SDValue True, SDValue False,
402
403 /// Call the node-specific routine that knows how to fold each
404 /// particular type of node. If that doesn't do anything, try the
405 /// target-specific DAG combines.
406 SDValue combine(SDNode *N);
407
408 // Visitation implementation - Implement dag node combining for different
409 // node types. The semantics are as follows:
410 // Return Value:
411 // SDValue.getNode() == 0 - No change was made
412 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
413 // otherwise - N should be replaced by the returned Operand.
414 //
415 SDValue visitTokenFactor(SDNode *N);
416 SDValue visitMERGE_VALUES(SDNode *N);
417 SDValue visitADD(SDNode *N);
418 SDValue visitADDLike(SDNode *N);
419 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
420 SDValue visitSUB(SDNode *N);
421 SDValue visitADDSAT(SDNode *N);
422 SDValue visitSUBSAT(SDNode *N);
423 SDValue visitADDC(SDNode *N);
424 SDValue visitADDO(SDNode *N);
425 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
426 SDValue visitSUBC(SDNode *N);
427 SDValue visitSUBO(SDNode *N);
428 SDValue visitADDE(SDNode *N);
429 SDValue visitUADDO_CARRY(SDNode *N);
430 SDValue visitSADDO_CARRY(SDNode *N);
431 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
432 SDNode *N);
433 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
434 SDNode *N);
435 SDValue visitSUBE(SDNode *N);
436 SDValue visitUSUBO_CARRY(SDNode *N);
437 SDValue visitSSUBO_CARRY(SDNode *N);
438 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
439 SDValue visitMULFIX(SDNode *N);
440 SDValue useDivRem(SDNode *N);
441 SDValue visitSDIV(SDNode *N);
442 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
443 SDValue visitUDIV(SDNode *N);
444 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
445 SDValue visitREM(SDNode *N);
446 SDValue visitMULHU(SDNode *N);
447 SDValue visitMULHS(SDNode *N);
448 SDValue visitAVG(SDNode *N);
449 SDValue visitABD(SDNode *N);
450 SDValue visitSMUL_LOHI(SDNode *N);
451 SDValue visitUMUL_LOHI(SDNode *N);
452 SDValue visitMULO(SDNode *N);
453 SDValue visitIMINMAX(SDNode *N);
454 SDValue visitAND(SDNode *N);
455 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitOR(SDNode *N);
457 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
458 SDValue visitXOR(SDNode *N);
459 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
460 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
461 SDValue visitSHL(SDNode *N);
462 SDValue visitSRA(SDNode *N);
463 SDValue visitSRL(SDNode *N);
464 SDValue visitFunnelShift(SDNode *N);
465 SDValue visitSHLSAT(SDNode *N);
466 SDValue visitRotate(SDNode *N);
467 SDValue visitABS(SDNode *N);
468 SDValue visitBSWAP(SDNode *N);
469 SDValue visitBITREVERSE(SDNode *N);
470 SDValue visitCTLZ(SDNode *N);
471 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
472 SDValue visitCTTZ(SDNode *N);
473 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
474 SDValue visitCTPOP(SDNode *N);
475 SDValue visitSELECT(SDNode *N);
476 SDValue visitVSELECT(SDNode *N);
477 SDValue visitVP_SELECT(SDNode *N);
478 SDValue visitSELECT_CC(SDNode *N);
479 SDValue visitSETCC(SDNode *N);
480 SDValue visitSETCCCARRY(SDNode *N);
481 SDValue visitSIGN_EXTEND(SDNode *N);
482 SDValue visitZERO_EXTEND(SDNode *N);
483 SDValue visitANY_EXTEND(SDNode *N);
484 SDValue visitAssertExt(SDNode *N);
485 SDValue visitAssertAlign(SDNode *N);
486 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
487 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
488 SDValue visitTRUNCATE(SDNode *N);
489 SDValue visitBITCAST(SDNode *N);
490 SDValue visitFREEZE(SDNode *N);
491 SDValue visitBUILD_PAIR(SDNode *N);
492 SDValue visitFADD(SDNode *N);
493 SDValue visitVP_FADD(SDNode *N);
494 SDValue visitVP_FSUB(SDNode *N);
495 SDValue visitSTRICT_FADD(SDNode *N);
496 SDValue visitFSUB(SDNode *N);
497 SDValue visitFMUL(SDNode *N);
498 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
499 SDValue visitFMAD(SDNode *N);
500 SDValue visitFDIV(SDNode *N);
501 SDValue visitFREM(SDNode *N);
502 SDValue visitFSQRT(SDNode *N);
503 SDValue visitFCOPYSIGN(SDNode *N);
504 SDValue visitFPOW(SDNode *N);
505 SDValue visitSINT_TO_FP(SDNode *N);
506 SDValue visitUINT_TO_FP(SDNode *N);
507 SDValue visitFP_TO_SINT(SDNode *N);
508 SDValue visitFP_TO_UINT(SDNode *N);
509 SDValue visitXRINT(SDNode *N);
510 SDValue visitFP_ROUND(SDNode *N);
511 SDValue visitFP_EXTEND(SDNode *N);
512 SDValue visitFNEG(SDNode *N);
513 SDValue visitFABS(SDNode *N);
514 SDValue visitFCEIL(SDNode *N);
515 SDValue visitFTRUNC(SDNode *N);
516 SDValue visitFFREXP(SDNode *N);
517 SDValue visitFFLOOR(SDNode *N);
518 SDValue visitFMinMax(SDNode *N);
519 SDValue visitBRCOND(SDNode *N);
520 SDValue visitBR_CC(SDNode *N);
521 SDValue visitLOAD(SDNode *N);
522
523 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
524 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
525 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
526
527 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
528
529 SDValue visitSTORE(SDNode *N);
530 SDValue visitATOMIC_STORE(SDNode *N);
531 SDValue visitLIFETIME_END(SDNode *N);
532 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
533 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
534 SDValue visitBUILD_VECTOR(SDNode *N);
535 SDValue visitCONCAT_VECTORS(SDNode *N);
536 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
537 SDValue visitVECTOR_SHUFFLE(SDNode *N);
538 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
539 SDValue visitINSERT_SUBVECTOR(SDNode *N);
540 SDValue visitVECTOR_COMPRESS(SDNode *N);
541 SDValue visitMLOAD(SDNode *N);
542 SDValue visitMSTORE(SDNode *N);
543 SDValue visitMGATHER(SDNode *N);
544 SDValue visitMSCATTER(SDNode *N);
545 SDValue visitVPGATHER(SDNode *N);
546 SDValue visitVPSCATTER(SDNode *N);
547 SDValue visitVP_STRIDED_LOAD(SDNode *N);
548 SDValue visitVP_STRIDED_STORE(SDNode *N);
549 SDValue visitFP_TO_FP16(SDNode *N);
550 SDValue visitFP16_TO_FP(SDNode *N);
551 SDValue visitFP_TO_BF16(SDNode *N);
552 SDValue visitBF16_TO_FP(SDNode *N);
553 SDValue visitVECREDUCE(SDNode *N);
554 SDValue visitVPOp(SDNode *N);
555 SDValue visitGET_FPENV_MEM(SDNode *N);
556 SDValue visitSET_FPENV_MEM(SDNode *N);
557
558 template <class MatchContextClass>
559 SDValue visitFADDForFMACombine(SDNode *N);
560 template <class MatchContextClass>
561 SDValue visitFSUBForFMACombine(SDNode *N);
562 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
563
564 SDValue XformToShuffleWithZero(SDNode *N);
565 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
566 const SDLoc &DL,
567 SDNode *N,
568 SDValue N0,
569 SDValue N1);
570 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
571 SDValue N1, SDNodeFlags Flags);
572 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
573 SDValue N1, SDNodeFlags Flags);
574 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
575 EVT VT, SDValue N0, SDValue N1,
576 SDNodeFlags Flags = SDNodeFlags());
577
578 SDValue visitShiftByConstant(SDNode *N);
579
580 SDValue foldSelectOfConstants(SDNode *N);
581 SDValue foldVSelectOfConstants(SDNode *N);
582 SDValue foldBinOpIntoSelect(SDNode *BO);
583 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
584 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
585 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
586 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
588 bool NotExtCompare = false);
589 SDValue convertSelectOfFPConstantsToLoadOffset(
590 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
592 SDValue foldSignChangeInBitcast(SDNode *N);
593 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
595 SDValue foldSelectOfBinops(SDNode *N);
596 SDValue foldSextSetcc(SDNode *N);
597 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
598 const SDLoc &DL);
599 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
600 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
601 SDValue unfoldMaskedMerge(SDNode *N);
602 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
603 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
604 const SDLoc &DL, bool foldBooleans);
605 SDValue rebuildSetCC(SDValue N);
606
607 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
608 SDValue &CC, bool MatchStrict = false) const;
609 bool isOneUseSetCC(SDValue N) const;
610
611 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
612 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
613
614 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
615 unsigned HiOp);
616 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
617 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
618 const TargetLowering &TLI);
619
620 SDValue CombineExtLoad(SDNode *N);
621 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
622 SDValue combineRepeatedFPDivisors(SDNode *N);
623 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
624 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
625 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
626 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
627 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
628 SDValue BuildSDIV(SDNode *N);
629 SDValue BuildSDIVPow2(SDNode *N);
630 SDValue BuildUDIV(SDNode *N);
631 SDValue BuildSREMPow2(SDNode *N);
632 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
633 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
634 bool KnownNeverZero = false,
635 bool InexpensiveOnly = false,
636 std::optional<EVT> OutVT = std::nullopt);
637 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
638 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
639 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
640 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
641 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
642 SDNodeFlags Flags, bool Reciprocal);
643 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
644 SDNodeFlags Flags, bool Reciprocal);
645 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
646 bool DemandHighBits = true);
647 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
648 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
649 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
650 unsigned PosOpcode, unsigned NegOpcode,
651 const SDLoc &DL);
652 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
653 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
654 unsigned PosOpcode, unsigned NegOpcode,
655 const SDLoc &DL);
656 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
657 SDValue MatchLoadCombine(SDNode *N);
658 SDValue mergeTruncStores(StoreSDNode *N);
659 SDValue reduceLoadWidth(SDNode *N);
660 SDValue ReduceLoadOpStoreWidth(SDNode *N);
662 SDValue TransformFPLoadStorePair(SDNode *N);
663 SDValue convertBuildVecZextToZext(SDNode *N);
664 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
665 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
666 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
667 SDValue reduceBuildVecToShuffle(SDNode *N);
668 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
669 ArrayRef<int> VectorMask, SDValue VecIn1,
670 SDValue VecIn2, unsigned LeftIdx,
671 bool DidSplitVec);
672 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
673
674 /// Walk up chain skipping non-aliasing memory nodes,
675 /// looking for aliasing nodes and adding them to the Aliases vector.
676 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
677 SmallVectorImpl<SDValue> &Aliases);
678
679 /// Return true if there is any possibility that the two addresses overlap.
680 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
681
682 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
683 /// chain (aliasing node.)
684 SDValue FindBetterChain(SDNode *N, SDValue Chain);
685
686 /// Try to replace a store and any possibly adjacent stores on
687 /// consecutive chains with better chains. Return true only if St is
688 /// replaced.
689 ///
690 /// Notice that other chains may still be replaced even if the function
691 /// returns false.
692 bool findBetterNeighborChains(StoreSDNode *St);
693
694 // Helper for findBetterNeighborChains. Walk up store chain add additional
695 // chained stores that do not overlap and can be parallelized.
696 bool parallelizeChainedStores(StoreSDNode *St);
697
698 /// Holds a pointer to an LSBaseSDNode as well as information on where it
699 /// is located in a sequence of memory operations connected by a chain.
700 struct MemOpLink {
701 // Ptr to the mem node.
702 LSBaseSDNode *MemNode;
703
704 // Offset from the base ptr.
705 int64_t OffsetFromBase;
706
707 MemOpLink(LSBaseSDNode *N, int64_t Offset)
708 : MemNode(N), OffsetFromBase(Offset) {}
709 };
710
711 // Classify the origin of a stored value.
712 enum class StoreSource { Unknown, Constant, Extract, Load };
713 StoreSource getStoreSource(SDValue StoreVal) {
714 switch (StoreVal.getOpcode()) {
715 case ISD::Constant:
716 case ISD::ConstantFP:
717 return StoreSource::Constant;
721 return StoreSource::Constant;
722 return StoreSource::Unknown;
725 return StoreSource::Extract;
726 case ISD::LOAD:
727 return StoreSource::Load;
728 default:
729 return StoreSource::Unknown;
730 }
731 }
732
733 /// This is a helper function for visitMUL to check the profitability
734 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
735 /// MulNode is the original multiply, AddNode is (add x, c1),
736 /// and ConstNode is c2.
737 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
738 SDValue ConstNode);
739
740 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
741 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
742 /// the type of the loaded value to be extended.
743 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
744 EVT LoadResultTy, EVT &ExtVT);
745
746 /// Helper function to calculate whether the given Load/Store can have its
747 /// width reduced to ExtVT.
748 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
749 EVT &MemVT, unsigned ShAmt = 0);
750
751 /// Used by BackwardsPropagateMask to find suitable loads.
752 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
753 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
754 ConstantSDNode *Mask, SDNode *&NodeToMask);
755 /// Attempt to propagate a given AND node back to load leaves so that they
756 /// can be combined into narrow loads.
757 bool BackwardsPropagateMask(SDNode *N);
758
759 /// Helper function for mergeConsecutiveStores which merges the component
760 /// store chains.
761 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
762 unsigned NumStores);
763
764 /// Helper function for mergeConsecutiveStores which checks if all the store
765 /// nodes have the same underlying object. We can still reuse the first
766 /// store's pointer info if all the stores are from the same object.
767 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
768
769 /// This is a helper function for mergeConsecutiveStores. When the source
770 /// elements of the consecutive stores are all constants or all extracted
771 /// vector elements, try to merge them into one larger store introducing
772 /// bitcasts if necessary. \return True if a merged store was created.
773 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
774 EVT MemVT, unsigned NumStores,
775 bool IsConstantSrc, bool UseVector,
776 bool UseTrunc);
777
778 /// This is a helper function for mergeConsecutiveStores. Stores that
779 /// potentially may be merged with St are placed in StoreNodes. RootNode is
780 /// a chain predecessor to all store candidates.
781 void getStoreMergeCandidates(StoreSDNode *St,
782 SmallVectorImpl<MemOpLink> &StoreNodes,
783 SDNode *&Root);
784
785 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
786 /// have indirect dependency through their operands. RootNode is the
787 /// predecessor to all stores calculated by getStoreMergeCandidates and is
788 /// used to prune the dependency check. \return True if safe to merge.
789 bool checkMergeStoreCandidatesForDependencies(
790 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
791 SDNode *RootNode);
792
793 /// This is a helper function for mergeConsecutiveStores. Given a list of
794 /// store candidates, find the first N that are consecutive in memory.
795 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
796 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
797 int64_t ElementSizeBytes) const;
798
799 /// This is a helper function for mergeConsecutiveStores. It is used for
800 /// store chains that are composed entirely of constant values.
801 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
802 unsigned NumConsecutiveStores,
803 EVT MemVT, SDNode *Root, bool AllowVectors);
804
805 /// This is a helper function for mergeConsecutiveStores. It is used for
806 /// store chains that are composed entirely of extracted vector elements.
807 /// When extracting multiple vector elements, try to store them in one
808 /// vector store rather than a sequence of scalar stores.
809 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
810 unsigned NumConsecutiveStores, EVT MemVT,
811 SDNode *Root);
812
813 /// This is a helper function for mergeConsecutiveStores. It is used for
814 /// store chains that are composed entirely of loaded values.
815 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
816 unsigned NumConsecutiveStores, EVT MemVT,
817 SDNode *Root, bool AllowVectors,
818 bool IsNonTemporalStore, bool IsNonTemporalLoad);
819
820 /// Merge consecutive store operations into a wide store.
821 /// This optimization uses wide integers or vectors when possible.
822 /// \return true if stores were merged.
823 bool mergeConsecutiveStores(StoreSDNode *St);
824
825 /// Try to transform a truncation where C is a constant:
826 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
827 ///
828 /// \p N needs to be a truncation and its first operand an AND. Other
829 /// requirements are checked by the function (e.g. that trunc is
830 /// single-use) and if missed an empty SDValue is returned.
831 SDValue distributeTruncateThroughAnd(SDNode *N);
832
833 /// Helper function to determine whether the target supports operation
834 /// given by \p Opcode for type \p VT, that is, whether the operation
835 /// is legal or custom before legalizing operations, and whether is
836 /// legal (but not custom) after legalization.
837 bool hasOperation(unsigned Opcode, EVT VT) {
838 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
839 }
840
841 public:
842 /// Runs the dag combiner on all nodes in the work list
843 void Run(CombineLevel AtLevel);
844
845 SelectionDAG &getDAG() const { return DAG; }
846
847 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
848 EVT getShiftAmountTy(EVT LHSTy) {
849 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
850 }
851
852 /// This method returns true if we are running before type legalization or
853 /// if the specified VT is legal.
854 bool isTypeLegal(const EVT &VT) {
855 if (!LegalTypes) return true;
856 return TLI.isTypeLegal(VT);
857 }
858
859 /// Convenience wrapper around TargetLowering::getSetCCResultType
860 EVT getSetCCResultType(EVT VT) const {
861 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
862 }
863
864 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
865 SDValue OrigLoad, SDValue ExtLoad,
866 ISD::NodeType ExtType);
867 };
868
869/// This class is a DAGUpdateListener that removes any deleted
870/// nodes from the worklist.
871class WorklistRemover : public SelectionDAG::DAGUpdateListener {
872 DAGCombiner &DC;
873
874public:
875 explicit WorklistRemover(DAGCombiner &dc)
876 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
877
878 void NodeDeleted(SDNode *N, SDNode *E) override {
879 DC.removeFromWorklist(N);
880 }
881};
882
883class WorklistInserter : public SelectionDAG::DAGUpdateListener {
884 DAGCombiner &DC;
885
886public:
887 explicit WorklistInserter(DAGCombiner &dc)
888 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
889
890 // FIXME: Ideally we could add N to the worklist, but this causes exponential
891 // compile time costs in large DAGs, e.g. Halide.
892 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
893};
894
895} // end anonymous namespace
896
897//===----------------------------------------------------------------------===//
898// TargetLowering::DAGCombinerInfo implementation
899//===----------------------------------------------------------------------===//
900
902 ((DAGCombiner*)DC)->AddToWorklist(N);
903}
904
906CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
907 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
908}
909
911CombineTo(SDNode *N, SDValue Res, bool AddTo) {
912 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
913}
914
916CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
917 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
918}
919
922 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
923}
924
927 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
928}
929
930//===----------------------------------------------------------------------===//
931// Helper Functions
932//===----------------------------------------------------------------------===//
933
934void DAGCombiner::deleteAndRecombine(SDNode *N) {
935 removeFromWorklist(N);
936
937 // If the operands of this node are only used by the node, they will now be
938 // dead. Make sure to re-visit them and recursively delete dead nodes.
939 for (const SDValue &Op : N->ops())
940 // For an operand generating multiple values, one of the values may
941 // become dead allowing further simplification (e.g. split index
942 // arithmetic from an indexed load).
943 if (Op->hasOneUse() || Op->getNumValues() > 1)
944 AddToWorklist(Op.getNode());
945
946 DAG.DeleteNode(N);
947}
948
949// APInts must be the same size for most operations, this helper
950// function zero extends the shorter of the pair so that they match.
951// We provide an Offset so that we can create bitwidths that won't overflow.
952static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
953 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
954 LHS = LHS.zext(Bits);
955 RHS = RHS.zext(Bits);
956}
957
958// Return true if this node is a setcc, or is a select_cc
959// that selects between the target values used for true and false, making it
960// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
961// the appropriate nodes based on the type of node we are checking. This
962// simplifies life a bit for the callers.
963bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
964 SDValue &CC, bool MatchStrict) const {
965 if (N.getOpcode() == ISD::SETCC) {
966 LHS = N.getOperand(0);
967 RHS = N.getOperand(1);
968 CC = N.getOperand(2);
969 return true;
970 }
971
972 if (MatchStrict &&
973 (N.getOpcode() == ISD::STRICT_FSETCC ||
974 N.getOpcode() == ISD::STRICT_FSETCCS)) {
975 LHS = N.getOperand(1);
976 RHS = N.getOperand(2);
977 CC = N.getOperand(3);
978 return true;
979 }
980
981 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
982 !TLI.isConstFalseVal(N.getOperand(3)))
983 return false;
984
985 if (TLI.getBooleanContents(N.getValueType()) ==
987 return false;
988
989 LHS = N.getOperand(0);
990 RHS = N.getOperand(1);
991 CC = N.getOperand(4);
992 return true;
993}
994
995/// Return true if this is a SetCC-equivalent operation with only one use.
996/// If this is true, it allows the users to invert the operation for free when
997/// it is profitable to do so.
998bool DAGCombiner::isOneUseSetCC(SDValue N) const {
999 SDValue N0, N1, N2;
1000 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1001 return true;
1002 return false;
1003}
1004
1006 if (!ScalarTy.isSimple())
1007 return false;
1008
1009 uint64_t MaskForTy = 0ULL;
1010 switch (ScalarTy.getSimpleVT().SimpleTy) {
1011 case MVT::i8:
1012 MaskForTy = 0xFFULL;
1013 break;
1014 case MVT::i16:
1015 MaskForTy = 0xFFFFULL;
1016 break;
1017 case MVT::i32:
1018 MaskForTy = 0xFFFFFFFFULL;
1019 break;
1020 default:
1021 return false;
1022 break;
1023 }
1024
1025 APInt Val;
1026 if (ISD::isConstantSplatVector(N, Val))
1027 return Val.getLimitedValue() == MaskForTy;
1028
1029 return false;
1030}
1031
1032// Determines if it is a constant integer or a splat/build vector of constant
1033// integers (and undefs).
1034// Do not permit build vector implicit truncation.
1035static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1036 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1037 return !(Const->isOpaque() && NoOpaques);
1038 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1039 return false;
1040 unsigned BitWidth = N.getScalarValueSizeInBits();
1041 for (const SDValue &Op : N->op_values()) {
1042 if (Op.isUndef())
1043 continue;
1044 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1045 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1046 (Const->isOpaque() && NoOpaques))
1047 return false;
1048 }
1049 return true;
1050}
1051
1052// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1053// undef's.
1054static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1055 if (V.getOpcode() != ISD::BUILD_VECTOR)
1056 return false;
1057 return isConstantOrConstantVector(V, NoOpaques) ||
1059}
1060
1061// Determine if this an indexed load with an opaque target constant index.
1062static bool canSplitIdx(LoadSDNode *LD) {
1063 return MaySplitLoadIndex &&
1064 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1065 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1066}
1067
1068bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1069 const SDLoc &DL,
1070 SDNode *N,
1071 SDValue N0,
1072 SDValue N1) {
1073 // Currently this only tries to ensure we don't undo the GEP splits done by
1074 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1075 // we check if the following transformation would be problematic:
1076 // (load/store (add, (add, x, offset1), offset2)) ->
1077 // (load/store (add, x, offset1+offset2)).
1078
1079 // (load/store (add, (add, x, y), offset2)) ->
1080 // (load/store (add, (add, x, offset2), y)).
1081
1082 if (N0.getOpcode() != ISD::ADD)
1083 return false;
1084
1085 // Check for vscale addressing modes.
1086 // (load/store (add/sub (add x, y), vscale))
1087 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1088 // (load/store (add/sub (add x, y), (mul vscale, C)))
1089 if ((N1.getOpcode() == ISD::VSCALE ||
1090 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1091 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1092 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1093 N1.getValueType().getFixedSizeInBits() <= 64) {
1094 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1095 ? N1.getConstantOperandVal(0)
1096 : (N1.getOperand(0).getConstantOperandVal(0) *
1097 (N1.getOpcode() == ISD::SHL
1098 ? (1LL << N1.getConstantOperandVal(1))
1099 : N1.getConstantOperandVal(1)));
1100 if (Opc == ISD::SUB)
1101 ScalableOffset = -ScalableOffset;
1102 if (all_of(N->uses(), [&](SDNode *Node) {
1103 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1104 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1106 AM.HasBaseReg = true;
1107 AM.ScalableOffset = ScalableOffset;
1108 EVT VT = LoadStore->getMemoryVT();
1109 unsigned AS = LoadStore->getAddressSpace();
1110 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1111 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1112 AS);
1113 }
1114 return false;
1115 }))
1116 return true;
1117 }
1118
1119 if (Opc != ISD::ADD)
1120 return false;
1121
1122 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1123 if (!C2)
1124 return false;
1125
1126 const APInt &C2APIntVal = C2->getAPIntValue();
1127 if (C2APIntVal.getSignificantBits() > 64)
1128 return false;
1129
1130 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1131 if (N0.hasOneUse())
1132 return false;
1133
1134 const APInt &C1APIntVal = C1->getAPIntValue();
1135 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1136 if (CombinedValueIntVal.getSignificantBits() > 64)
1137 return false;
1138 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1139
1140 for (SDNode *Node : N->uses()) {
1141 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1142 // Is x[offset2] already not a legal addressing mode? If so then
1143 // reassociating the constants breaks nothing (we test offset2 because
1144 // that's the one we hope to fold into the load or store).
1146 AM.HasBaseReg = true;
1147 AM.BaseOffs = C2APIntVal.getSExtValue();
1148 EVT VT = LoadStore->getMemoryVT();
1149 unsigned AS = LoadStore->getAddressSpace();
1150 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1151 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1152 continue;
1153
1154 // Would x[offset1+offset2] still be a legal addressing mode?
1155 AM.BaseOffs = CombinedValue;
1156 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1157 return true;
1158 }
1159 }
1160 } else {
1161 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1162 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1163 return false;
1164
1165 for (SDNode *Node : N->uses()) {
1166 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1167 if (!LoadStore)
1168 return false;
1169
1170 // Is x[offset2] a legal addressing mode? If so then
1171 // reassociating the constants breaks address pattern
1173 AM.HasBaseReg = true;
1174 AM.BaseOffs = C2APIntVal.getSExtValue();
1175 EVT VT = LoadStore->getMemoryVT();
1176 unsigned AS = LoadStore->getAddressSpace();
1177 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1178 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1179 return false;
1180 }
1181 return true;
1182 }
1183
1184 return false;
1185}
1186
1187/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1188/// \p N0 is the same kind of operation as \p Opc.
1189SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1190 SDValue N0, SDValue N1,
1191 SDNodeFlags Flags) {
1192 EVT VT = N0.getValueType();
1193
1194 if (N0.getOpcode() != Opc)
1195 return SDValue();
1196
1197 SDValue N00 = N0.getOperand(0);
1198 SDValue N01 = N0.getOperand(1);
1199
1201 SDNodeFlags NewFlags;
1202 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1203 Flags.hasNoUnsignedWrap())
1204 NewFlags.setNoUnsignedWrap(true);
1205
1207 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1208 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1209 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1210 return SDValue();
1211 }
1212 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1213 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1214 // iff (op x, c1) has one use
1215 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1216 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1217 }
1218 }
1219
1220 // Check for repeated operand logic simplifications.
1221 if (Opc == ISD::AND || Opc == ISD::OR) {
1222 // (N00 & N01) & N00 --> N00 & N01
1223 // (N00 & N01) & N01 --> N00 & N01
1224 // (N00 | N01) | N00 --> N00 | N01
1225 // (N00 | N01) | N01 --> N00 | N01
1226 if (N1 == N00 || N1 == N01)
1227 return N0;
1228 }
1229 if (Opc == ISD::XOR) {
1230 // (N00 ^ N01) ^ N00 --> N01
1231 if (N1 == N00)
1232 return N01;
1233 // (N00 ^ N01) ^ N01 --> N00
1234 if (N1 == N01)
1235 return N00;
1236 }
1237
1238 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1239 if (N1 != N01) {
1240 // Reassociate if (op N00, N1) already exist
1241 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1242 // if Op (Op N00, N1), N01 already exist
1243 // we need to stop reassciate to avoid dead loop
1244 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1245 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1246 }
1247 }
1248
1249 if (N1 != N00) {
1250 // Reassociate if (op N01, N1) already exist
1251 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1252 // if Op (Op N01, N1), N00 already exist
1253 // we need to stop reassciate to avoid dead loop
1254 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1255 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1256 }
1257 }
1258
1259 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1260 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1261 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1262 // comparisons with the same predicate. This enables optimizations as the
1263 // following one:
1264 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1265 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1266 if (Opc == ISD::AND || Opc == ISD::OR) {
1267 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1268 N01->getOpcode() == ISD::SETCC) {
1269 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1270 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1271 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1272 if (CC1 == CC00 && CC1 != CC01) {
1273 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1274 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1275 }
1276 if (CC1 == CC01 && CC1 != CC00) {
1277 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1278 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1279 }
1280 }
1281 }
1282 }
1283
1284 return SDValue();
1285}
1286
1287/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1288/// same kind of operation as \p Opc.
1289SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1290 SDValue N1, SDNodeFlags Flags) {
1291 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1292
1293 // Floating-point reassociation is not allowed without loose FP math.
1294 if (N0.getValueType().isFloatingPoint() ||
1296 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1297 return SDValue();
1298
1299 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1300 return Combined;
1301 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1302 return Combined;
1303 return SDValue();
1304}
1305
1306// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1307// Note that we only expect Flags to be passed from FP operations. For integer
1308// operations they need to be dropped.
1309SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1310 const SDLoc &DL, EVT VT, SDValue N0,
1311 SDValue N1, SDNodeFlags Flags) {
1312 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1313 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1314 N0->hasOneUse() && N1->hasOneUse() &&
1316 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1317 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1318 return DAG.getNode(RedOpc, DL, VT,
1319 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1320 N0.getOperand(0), N1.getOperand(0)));
1321 }
1322 return SDValue();
1323}
1324
1325SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1326 bool AddTo) {
1327 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1328 ++NodesCombined;
1329 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1330 To[0].dump(&DAG);
1331 dbgs() << " and " << NumTo - 1 << " other values\n");
1332 for (unsigned i = 0, e = NumTo; i != e; ++i)
1333 assert((!To[i].getNode() ||
1334 N->getValueType(i) == To[i].getValueType()) &&
1335 "Cannot combine value to value of different type!");
1336
1337 WorklistRemover DeadNodes(*this);
1338 DAG.ReplaceAllUsesWith(N, To);
1339 if (AddTo) {
1340 // Push the new nodes and any users onto the worklist
1341 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1342 if (To[i].getNode())
1343 AddToWorklistWithUsers(To[i].getNode());
1344 }
1345 }
1346
1347 // Finally, if the node is now dead, remove it from the graph. The node
1348 // may not be dead if the replacement process recursively simplified to
1349 // something else needing this node.
1350 if (N->use_empty())
1351 deleteAndRecombine(N);
1352 return SDValue(N, 0);
1353}
1354
1355void DAGCombiner::
1356CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1357 // Replace the old value with the new one.
1358 ++NodesCombined;
1359 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1360 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1361
1362 // Replace all uses.
1363 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1364
1365 // Push the new node and any (possibly new) users onto the worklist.
1366 AddToWorklistWithUsers(TLO.New.getNode());
1367
1368 // Finally, if the node is now dead, remove it from the graph.
1369 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1370}
1371
1372/// Check the specified integer node value to see if it can be simplified or if
1373/// things it uses can be simplified by bit propagation. If so, return true.
1374bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1375 const APInt &DemandedElts,
1376 bool AssumeSingleUse) {
1377 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1378 KnownBits Known;
1379 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1380 AssumeSingleUse))
1381 return false;
1382
1383 // Revisit the node.
1384 AddToWorklist(Op.getNode());
1385
1386 CommitTargetLoweringOpt(TLO);
1387 return true;
1388}
1389
1390/// Check the specified vector node value to see if it can be simplified or
1391/// if things it uses can be simplified as it only uses some of the elements.
1392/// If so, return true.
1393bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1394 const APInt &DemandedElts,
1395 bool AssumeSingleUse) {
1396 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1397 APInt KnownUndef, KnownZero;
1398 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1399 TLO, 0, AssumeSingleUse))
1400 return false;
1401
1402 // Revisit the node.
1403 AddToWorklist(Op.getNode());
1404
1405 CommitTargetLoweringOpt(TLO);
1406 return true;
1407}
1408
1409void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1410 SDLoc DL(Load);
1411 EVT VT = Load->getValueType(0);
1412 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1413
1414 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1415 Trunc.dump(&DAG); dbgs() << '\n');
1416
1417 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1418 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1419
1420 AddToWorklist(Trunc.getNode());
1421 recursivelyDeleteUnusedNodes(Load);
1422}
1423
1424SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1425 Replace = false;
1426 SDLoc DL(Op);
1427 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1428 LoadSDNode *LD = cast<LoadSDNode>(Op);
1429 EVT MemVT = LD->getMemoryVT();
1431 : LD->getExtensionType();
1432 Replace = true;
1433 return DAG.getExtLoad(ExtType, DL, PVT,
1434 LD->getChain(), LD->getBasePtr(),
1435 MemVT, LD->getMemOperand());
1436 }
1437
1438 unsigned Opc = Op.getOpcode();
1439 switch (Opc) {
1440 default: break;
1441 case ISD::AssertSext:
1442 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1443 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1444 break;
1445 case ISD::AssertZext:
1446 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1447 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1448 break;
1449 case ISD::Constant: {
1450 unsigned ExtOpc =
1451 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1452 return DAG.getNode(ExtOpc, DL, PVT, Op);
1453 }
1454 }
1455
1456 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1457 return SDValue();
1458 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1459}
1460
1461SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1463 return SDValue();
1464 EVT OldVT = Op.getValueType();
1465 SDLoc DL(Op);
1466 bool Replace = false;
1467 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1468 if (!NewOp.getNode())
1469 return SDValue();
1470 AddToWorklist(NewOp.getNode());
1471
1472 if (Replace)
1473 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1474 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1475 DAG.getValueType(OldVT));
1476}
1477
1478SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1479 EVT OldVT = Op.getValueType();
1480 SDLoc DL(Op);
1481 bool Replace = false;
1482 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1483 if (!NewOp.getNode())
1484 return SDValue();
1485 AddToWorklist(NewOp.getNode());
1486
1487 if (Replace)
1488 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1489 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1490}
1491
1492/// Promote the specified integer binary operation if the target indicates it is
1493/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1494/// i32 since i16 instructions are longer.
1495SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1496 if (!LegalOperations)
1497 return SDValue();
1498
1499 EVT VT = Op.getValueType();
1500 if (VT.isVector() || !VT.isInteger())
1501 return SDValue();
1502
1503 // If operation type is 'undesirable', e.g. i16 on x86, consider
1504 // promoting it.
1505 unsigned Opc = Op.getOpcode();
1506 if (TLI.isTypeDesirableForOp(Opc, VT))
1507 return SDValue();
1508
1509 EVT PVT = VT;
1510 // Consult target whether it is a good idea to promote this operation and
1511 // what's the right type to promote it to.
1512 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1513 assert(PVT != VT && "Don't know what type to promote to!");
1514
1515 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1516
1517 bool Replace0 = false;
1518 SDValue N0 = Op.getOperand(0);
1519 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1520
1521 bool Replace1 = false;
1522 SDValue N1 = Op.getOperand(1);
1523 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1524 SDLoc DL(Op);
1525
1526 SDValue RV =
1527 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1528
1529 // We are always replacing N0/N1's use in N and only need additional
1530 // replacements if there are additional uses.
1531 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1532 // (SDValue) here because the node may reference multiple values
1533 // (for example, the chain value of a load node).
1534 Replace0 &= !N0->hasOneUse();
1535 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1536
1537 // Combine Op here so it is preserved past replacements.
1538 CombineTo(Op.getNode(), RV);
1539
1540 // If operands have a use ordering, make sure we deal with
1541 // predecessor first.
1542 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1543 std::swap(N0, N1);
1544 std::swap(NN0, NN1);
1545 }
1546
1547 if (Replace0) {
1548 AddToWorklist(NN0.getNode());
1549 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1550 }
1551 if (Replace1) {
1552 AddToWorklist(NN1.getNode());
1553 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1554 }
1555 return Op;
1556 }
1557 return SDValue();
1558}
1559
1560/// Promote the specified integer shift operation if the target indicates it is
1561/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1562/// i32 since i16 instructions are longer.
1563SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1564 if (!LegalOperations)
1565 return SDValue();
1566
1567 EVT VT = Op.getValueType();
1568 if (VT.isVector() || !VT.isInteger())
1569 return SDValue();
1570
1571 // If operation type is 'undesirable', e.g. i16 on x86, consider
1572 // promoting it.
1573 unsigned Opc = Op.getOpcode();
1574 if (TLI.isTypeDesirableForOp(Opc, VT))
1575 return SDValue();
1576
1577 EVT PVT = VT;
1578 // Consult target whether it is a good idea to promote this operation and
1579 // what's the right type to promote it to.
1580 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1581 assert(PVT != VT && "Don't know what type to promote to!");
1582
1583 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1584
1585 bool Replace = false;
1586 SDValue N0 = Op.getOperand(0);
1587 if (Opc == ISD::SRA)
1588 N0 = SExtPromoteOperand(N0, PVT);
1589 else if (Opc == ISD::SRL)
1590 N0 = ZExtPromoteOperand(N0, PVT);
1591 else
1592 N0 = PromoteOperand(N0, PVT, Replace);
1593
1594 if (!N0.getNode())
1595 return SDValue();
1596
1597 SDLoc DL(Op);
1598 SDValue N1 = Op.getOperand(1);
1599 SDValue RV =
1600 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1601
1602 if (Replace)
1603 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1604
1605 // Deal with Op being deleted.
1606 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1607 return RV;
1608 }
1609 return SDValue();
1610}
1611
1612SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1613 if (!LegalOperations)
1614 return SDValue();
1615
1616 EVT VT = Op.getValueType();
1617 if (VT.isVector() || !VT.isInteger())
1618 return SDValue();
1619
1620 // If operation type is 'undesirable', e.g. i16 on x86, consider
1621 // promoting it.
1622 unsigned Opc = Op.getOpcode();
1623 if (TLI.isTypeDesirableForOp(Opc, VT))
1624 return SDValue();
1625
1626 EVT PVT = VT;
1627 // Consult target whether it is a good idea to promote this operation and
1628 // what's the right type to promote it to.
1629 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1630 assert(PVT != VT && "Don't know what type to promote to!");
1631 // fold (aext (aext x)) -> (aext x)
1632 // fold (aext (zext x)) -> (zext x)
1633 // fold (aext (sext x)) -> (sext x)
1634 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1635 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1636 }
1637 return SDValue();
1638}
1639
1640bool DAGCombiner::PromoteLoad(SDValue Op) {
1641 if (!LegalOperations)
1642 return false;
1643
1644 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1645 return false;
1646
1647 EVT VT = Op.getValueType();
1648 if (VT.isVector() || !VT.isInteger())
1649 return false;
1650
1651 // If operation type is 'undesirable', e.g. i16 on x86, consider
1652 // promoting it.
1653 unsigned Opc = Op.getOpcode();
1654 if (TLI.isTypeDesirableForOp(Opc, VT))
1655 return false;
1656
1657 EVT PVT = VT;
1658 // Consult target whether it is a good idea to promote this operation and
1659 // what's the right type to promote it to.
1660 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1661 assert(PVT != VT && "Don't know what type to promote to!");
1662
1663 SDLoc DL(Op);
1664 SDNode *N = Op.getNode();
1665 LoadSDNode *LD = cast<LoadSDNode>(N);
1666 EVT MemVT = LD->getMemoryVT();
1668 : LD->getExtensionType();
1669 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1670 LD->getChain(), LD->getBasePtr(),
1671 MemVT, LD->getMemOperand());
1672 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1673
1674 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1675 Result.dump(&DAG); dbgs() << '\n');
1676
1678 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1679
1680 AddToWorklist(Result.getNode());
1681 recursivelyDeleteUnusedNodes(N);
1682 return true;
1683 }
1684
1685 return false;
1686}
1687
1688/// Recursively delete a node which has no uses and any operands for
1689/// which it is the only use.
1690///
1691/// Note that this both deletes the nodes and removes them from the worklist.
1692/// It also adds any nodes who have had a user deleted to the worklist as they
1693/// may now have only one use and subject to other combines.
1694bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1695 if (!N->use_empty())
1696 return false;
1697
1699 Nodes.insert(N);
1700 do {
1701 N = Nodes.pop_back_val();
1702 if (!N)
1703 continue;
1704
1705 if (N->use_empty()) {
1706 for (const SDValue &ChildN : N->op_values())
1707 Nodes.insert(ChildN.getNode());
1708
1709 removeFromWorklist(N);
1710 DAG.DeleteNode(N);
1711 } else {
1712 AddToWorklist(N);
1713 }
1714 } while (!Nodes.empty());
1715 return true;
1716}
1717
1718//===----------------------------------------------------------------------===//
1719// Main DAG Combiner implementation
1720//===----------------------------------------------------------------------===//
1721
1722void DAGCombiner::Run(CombineLevel AtLevel) {
1723 // set the instance variables, so that the various visit routines may use it.
1724 Level = AtLevel;
1725 LegalDAG = Level >= AfterLegalizeDAG;
1726 LegalOperations = Level >= AfterLegalizeVectorOps;
1727 LegalTypes = Level >= AfterLegalizeTypes;
1728
1729 WorklistInserter AddNodes(*this);
1730
1731 // Add all the dag nodes to the worklist.
1732 //
1733 // Note: All nodes are not added to PruningList here, this is because the only
1734 // nodes which can be deleted are those which have no uses and all other nodes
1735 // which would otherwise be added to the worklist by the first call to
1736 // getNextWorklistEntry are already present in it.
1737 for (SDNode &Node : DAG.allnodes())
1738 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1739
1740 // Create a dummy node (which is not added to allnodes), that adds a reference
1741 // to the root node, preventing it from being deleted, and tracking any
1742 // changes of the root.
1743 HandleSDNode Dummy(DAG.getRoot());
1744
1745 // While we have a valid worklist entry node, try to combine it.
1746 while (SDNode *N = getNextWorklistEntry()) {
1747 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1748 // N is deleted from the DAG, since they too may now be dead or may have a
1749 // reduced number of uses, allowing other xforms.
1750 if (recursivelyDeleteUnusedNodes(N))
1751 continue;
1752
1753 WorklistRemover DeadNodes(*this);
1754
1755 // If this combine is running after legalizing the DAG, re-legalize any
1756 // nodes pulled off the worklist.
1757 if (LegalDAG) {
1758 SmallSetVector<SDNode *, 16> UpdatedNodes;
1759 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1760
1761 for (SDNode *LN : UpdatedNodes)
1762 AddToWorklistWithUsers(LN);
1763
1764 if (!NIsValid)
1765 continue;
1766 }
1767
1768 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1769
1770 // Add any operands of the new node which have not yet been combined to the
1771 // worklist as well. getNextWorklistEntry flags nodes that have been
1772 // combined before. Because the worklist uniques things already, this won't
1773 // repeatedly process the same operand.
1774 for (const SDValue &ChildN : N->op_values())
1775 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1776 /*SkipIfCombinedBefore=*/true);
1777
1778 SDValue RV = combine(N);
1779
1780 if (!RV.getNode())
1781 continue;
1782
1783 ++NodesCombined;
1784
1785 // If we get back the same node we passed in, rather than a new node or
1786 // zero, we know that the node must have defined multiple values and
1787 // CombineTo was used. Since CombineTo takes care of the worklist
1788 // mechanics for us, we have no work to do in this case.
1789 if (RV.getNode() == N)
1790 continue;
1791
1792 assert(N->getOpcode() != ISD::DELETED_NODE &&
1793 RV.getOpcode() != ISD::DELETED_NODE &&
1794 "Node was deleted but visit returned new node!");
1795
1796 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1797
1798 if (N->getNumValues() == RV->getNumValues())
1799 DAG.ReplaceAllUsesWith(N, RV.getNode());
1800 else {
1801 assert(N->getValueType(0) == RV.getValueType() &&
1802 N->getNumValues() == 1 && "Type mismatch");
1803 DAG.ReplaceAllUsesWith(N, &RV);
1804 }
1805
1806 // Push the new node and any users onto the worklist. Omit this if the
1807 // new node is the EntryToken (e.g. if a store managed to get optimized
1808 // out), because re-visiting the EntryToken and its users will not uncover
1809 // any additional opportunities, but there may be a large number of such
1810 // users, potentially causing compile time explosion.
1811 if (RV.getOpcode() != ISD::EntryToken)
1812 AddToWorklistWithUsers(RV.getNode());
1813
1814 // Finally, if the node is now dead, remove it from the graph. The node
1815 // may not be dead if the replacement process recursively simplified to
1816 // something else needing this node. This will also take care of adding any
1817 // operands which have lost a user to the worklist.
1818 recursivelyDeleteUnusedNodes(N);
1819 }
1820
1821 // If the root changed (e.g. it was a dead load, update the root).
1822 DAG.setRoot(Dummy.getValue());
1823 DAG.RemoveDeadNodes();
1824}
1825
1826SDValue DAGCombiner::visit(SDNode *N) {
1827 // clang-format off
1828 switch (N->getOpcode()) {
1829 default: break;
1830 case ISD::TokenFactor: return visitTokenFactor(N);
1831 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1832 case ISD::ADD: return visitADD(N);
1833 case ISD::SUB: return visitSUB(N);
1834 case ISD::SADDSAT:
1835 case ISD::UADDSAT: return visitADDSAT(N);
1836 case ISD::SSUBSAT:
1837 case ISD::USUBSAT: return visitSUBSAT(N);
1838 case ISD::ADDC: return visitADDC(N);
1839 case ISD::SADDO:
1840 case ISD::UADDO: return visitADDO(N);
1841 case ISD::SUBC: return visitSUBC(N);
1842 case ISD::SSUBO:
1843 case ISD::USUBO: return visitSUBO(N);
1844 case ISD::ADDE: return visitADDE(N);
1845 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1846 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1847 case ISD::SUBE: return visitSUBE(N);
1848 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1849 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1850 case ISD::SMULFIX:
1851 case ISD::SMULFIXSAT:
1852 case ISD::UMULFIX:
1853 case ISD::UMULFIXSAT: return visitMULFIX(N);
1854 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1855 case ISD::SDIV: return visitSDIV(N);
1856 case ISD::UDIV: return visitUDIV(N);
1857 case ISD::SREM:
1858 case ISD::UREM: return visitREM(N);
1859 case ISD::MULHU: return visitMULHU(N);
1860 case ISD::MULHS: return visitMULHS(N);
1861 case ISD::AVGFLOORS:
1862 case ISD::AVGFLOORU:
1863 case ISD::AVGCEILS:
1864 case ISD::AVGCEILU: return visitAVG(N);
1865 case ISD::ABDS:
1866 case ISD::ABDU: return visitABD(N);
1867 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1868 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1869 case ISD::SMULO:
1870 case ISD::UMULO: return visitMULO(N);
1871 case ISD::SMIN:
1872 case ISD::SMAX:
1873 case ISD::UMIN:
1874 case ISD::UMAX: return visitIMINMAX(N);
1875 case ISD::AND: return visitAND(N);
1876 case ISD::OR: return visitOR(N);
1877 case ISD::XOR: return visitXOR(N);
1878 case ISD::SHL: return visitSHL(N);
1879 case ISD::SRA: return visitSRA(N);
1880 case ISD::SRL: return visitSRL(N);
1881 case ISD::ROTR:
1882 case ISD::ROTL: return visitRotate(N);
1883 case ISD::FSHL:
1884 case ISD::FSHR: return visitFunnelShift(N);
1885 case ISD::SSHLSAT:
1886 case ISD::USHLSAT: return visitSHLSAT(N);
1887 case ISD::ABS: return visitABS(N);
1888 case ISD::BSWAP: return visitBSWAP(N);
1889 case ISD::BITREVERSE: return visitBITREVERSE(N);
1890 case ISD::CTLZ: return visitCTLZ(N);
1891 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1892 case ISD::CTTZ: return visitCTTZ(N);
1893 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1894 case ISD::CTPOP: return visitCTPOP(N);
1895 case ISD::SELECT: return visitSELECT(N);
1896 case ISD::VSELECT: return visitVSELECT(N);
1897 case ISD::SELECT_CC: return visitSELECT_CC(N);
1898 case ISD::SETCC: return visitSETCC(N);
1899 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1900 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1901 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1902 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1903 case ISD::AssertSext:
1904 case ISD::AssertZext: return visitAssertExt(N);
1905 case ISD::AssertAlign: return visitAssertAlign(N);
1906 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1909 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1910 case ISD::TRUNCATE: return visitTRUNCATE(N);
1911 case ISD::BITCAST: return visitBITCAST(N);
1912 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1913 case ISD::FADD: return visitFADD(N);
1914 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1915 case ISD::FSUB: return visitFSUB(N);
1916 case ISD::FMUL: return visitFMUL(N);
1917 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1918 case ISD::FMAD: return visitFMAD(N);
1919 case ISD::FDIV: return visitFDIV(N);
1920 case ISD::FREM: return visitFREM(N);
1921 case ISD::FSQRT: return visitFSQRT(N);
1922 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1923 case ISD::FPOW: return visitFPOW(N);
1924 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1925 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1926 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1927 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1928 case ISD::LRINT:
1929 case ISD::LLRINT: return visitXRINT(N);
1930 case ISD::FP_ROUND: return visitFP_ROUND(N);
1931 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1932 case ISD::FNEG: return visitFNEG(N);
1933 case ISD::FABS: return visitFABS(N);
1934 case ISD::FFLOOR: return visitFFLOOR(N);
1935 case ISD::FMINNUM:
1936 case ISD::FMAXNUM:
1937 case ISD::FMINIMUM:
1938 case ISD::FMAXIMUM: return visitFMinMax(N);
1939 case ISD::FCEIL: return visitFCEIL(N);
1940 case ISD::FTRUNC: return visitFTRUNC(N);
1941 case ISD::FFREXP: return visitFFREXP(N);
1942 case ISD::BRCOND: return visitBRCOND(N);
1943 case ISD::BR_CC: return visitBR_CC(N);
1944 case ISD::LOAD: return visitLOAD(N);
1945 case ISD::STORE: return visitSTORE(N);
1946 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1947 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1948 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1949 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1950 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1951 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1952 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1953 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1954 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1955 case ISD::MGATHER: return visitMGATHER(N);
1956 case ISD::MLOAD: return visitMLOAD(N);
1957 case ISD::MSCATTER: return visitMSCATTER(N);
1958 case ISD::MSTORE: return visitMSTORE(N);
1959 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
1960 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1961 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1962 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1963 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1964 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1965 case ISD::FREEZE: return visitFREEZE(N);
1966 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1967 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1970 case ISD::VECREDUCE_ADD:
1971 case ISD::VECREDUCE_MUL:
1972 case ISD::VECREDUCE_AND:
1973 case ISD::VECREDUCE_OR:
1974 case ISD::VECREDUCE_XOR:
1982 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1983#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1984#include "llvm/IR/VPIntrinsics.def"
1985 return visitVPOp(N);
1986 }
1987 // clang-format on
1988 return SDValue();
1989}
1990
1991SDValue DAGCombiner::combine(SDNode *N) {
1992 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1993 return SDValue();
1994
1995 SDValue RV;
1996 if (!DisableGenericCombines)
1997 RV = visit(N);
1998
1999 // If nothing happened, try a target-specific DAG combine.
2000 if (!RV.getNode()) {
2001 assert(N->getOpcode() != ISD::DELETED_NODE &&
2002 "Node was deleted but visit returned NULL!");
2003
2004 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2005 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2006
2007 // Expose the DAG combiner to the target combiner impls.
2009 DagCombineInfo(DAG, Level, false, this);
2010
2011 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2012 }
2013 }
2014
2015 // If nothing happened still, try promoting the operation.
2016 if (!RV.getNode()) {
2017 switch (N->getOpcode()) {
2018 default: break;
2019 case ISD::ADD:
2020 case ISD::SUB:
2021 case ISD::MUL:
2022 case ISD::AND:
2023 case ISD::OR:
2024 case ISD::XOR:
2025 RV = PromoteIntBinOp(SDValue(N, 0));
2026 break;
2027 case ISD::SHL:
2028 case ISD::SRA:
2029 case ISD::SRL:
2030 RV = PromoteIntShiftOp(SDValue(N, 0));
2031 break;
2032 case ISD::SIGN_EXTEND:
2033 case ISD::ZERO_EXTEND:
2034 case ISD::ANY_EXTEND:
2035 RV = PromoteExtend(SDValue(N, 0));
2036 break;
2037 case ISD::LOAD:
2038 if (PromoteLoad(SDValue(N, 0)))
2039 RV = SDValue(N, 0);
2040 break;
2041 }
2042 }
2043
2044 // If N is a commutative binary node, try to eliminate it if the commuted
2045 // version is already present in the DAG.
2046 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2047 SDValue N0 = N->getOperand(0);
2048 SDValue N1 = N->getOperand(1);
2049
2050 // Constant operands are canonicalized to RHS.
2051 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2052 SDValue Ops[] = {N1, N0};
2053 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2054 N->getFlags());
2055 if (CSENode)
2056 return SDValue(CSENode, 0);
2057 }
2058 }
2059
2060 return RV;
2061}
2062
2063/// Given a node, return its input chain if it has one, otherwise return a null
2064/// sd operand.
2066 if (unsigned NumOps = N->getNumOperands()) {
2067 if (N->getOperand(0).getValueType() == MVT::Other)
2068 return N->getOperand(0);
2069 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2070 return N->getOperand(NumOps-1);
2071 for (unsigned i = 1; i < NumOps-1; ++i)
2072 if (N->getOperand(i).getValueType() == MVT::Other)
2073 return N->getOperand(i);
2074 }
2075 return SDValue();
2076}
2077
2078SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2079 // If N has two operands, where one has an input chain equal to the other,
2080 // the 'other' chain is redundant.
2081 if (N->getNumOperands() == 2) {
2082 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2083 return N->getOperand(0);
2084 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2085 return N->getOperand(1);
2086 }
2087
2088 // Don't simplify token factors if optnone.
2089 if (OptLevel == CodeGenOptLevel::None)
2090 return SDValue();
2091
2092 // Don't simplify the token factor if the node itself has too many operands.
2093 if (N->getNumOperands() > TokenFactorInlineLimit)
2094 return SDValue();
2095
2096 // If the sole user is a token factor, we should make sure we have a
2097 // chance to merge them together. This prevents TF chains from inhibiting
2098 // optimizations.
2099 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2100 AddToWorklist(*(N->use_begin()));
2101
2102 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2103 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2105 bool Changed = false; // If we should replace this token factor.
2106
2107 // Start out with this token factor.
2108 TFs.push_back(N);
2109
2110 // Iterate through token factors. The TFs grows when new token factors are
2111 // encountered.
2112 for (unsigned i = 0; i < TFs.size(); ++i) {
2113 // Limit number of nodes to inline, to avoid quadratic compile times.
2114 // We have to add the outstanding Token Factors to Ops, otherwise we might
2115 // drop Ops from the resulting Token Factors.
2116 if (Ops.size() > TokenFactorInlineLimit) {
2117 for (unsigned j = i; j < TFs.size(); j++)
2118 Ops.emplace_back(TFs[j], 0);
2119 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2120 // combiner worklist later.
2121 TFs.resize(i);
2122 break;
2123 }
2124
2125 SDNode *TF = TFs[i];
2126 // Check each of the operands.
2127 for (const SDValue &Op : TF->op_values()) {
2128 switch (Op.getOpcode()) {
2129 case ISD::EntryToken:
2130 // Entry tokens don't need to be added to the list. They are
2131 // redundant.
2132 Changed = true;
2133 break;
2134
2135 case ISD::TokenFactor:
2136 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2137 // Queue up for processing.
2138 TFs.push_back(Op.getNode());
2139 Changed = true;
2140 break;
2141 }
2142 [[fallthrough]];
2143
2144 default:
2145 // Only add if it isn't already in the list.
2146 if (SeenOps.insert(Op.getNode()).second)
2147 Ops.push_back(Op);
2148 else
2149 Changed = true;
2150 break;
2151 }
2152 }
2153 }
2154
2155 // Re-visit inlined Token Factors, to clean them up in case they have been
2156 // removed. Skip the first Token Factor, as this is the current node.
2157 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2158 AddToWorklist(TFs[i]);
2159
2160 // Remove Nodes that are chained to another node in the list. Do so
2161 // by walking up chains breath-first stopping when we've seen
2162 // another operand. In general we must climb to the EntryNode, but we can exit
2163 // early if we find all remaining work is associated with just one operand as
2164 // no further pruning is possible.
2165
2166 // List of nodes to search through and original Ops from which they originate.
2168 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2169 SmallPtrSet<SDNode *, 16> SeenChains;
2170 bool DidPruneOps = false;
2171
2172 unsigned NumLeftToConsider = 0;
2173 for (const SDValue &Op : Ops) {
2174 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2175 OpWorkCount.push_back(1);
2176 }
2177
2178 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2179 // If this is an Op, we can remove the op from the list. Remark any
2180 // search associated with it as from the current OpNumber.
2181 if (SeenOps.contains(Op)) {
2182 Changed = true;
2183 DidPruneOps = true;
2184 unsigned OrigOpNumber = 0;
2185 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2186 OrigOpNumber++;
2187 assert((OrigOpNumber != Ops.size()) &&
2188 "expected to find TokenFactor Operand");
2189 // Re-mark worklist from OrigOpNumber to OpNumber
2190 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2191 if (Worklist[i].second == OrigOpNumber) {
2192 Worklist[i].second = OpNumber;
2193 }
2194 }
2195 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2196 OpWorkCount[OrigOpNumber] = 0;
2197 NumLeftToConsider--;
2198 }
2199 // Add if it's a new chain
2200 if (SeenChains.insert(Op).second) {
2201 OpWorkCount[OpNumber]++;
2202 Worklist.push_back(std::make_pair(Op, OpNumber));
2203 }
2204 };
2205
2206 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2207 // We need at least be consider at least 2 Ops to prune.
2208 if (NumLeftToConsider <= 1)
2209 break;
2210 auto CurNode = Worklist[i].first;
2211 auto CurOpNumber = Worklist[i].second;
2212 assert((OpWorkCount[CurOpNumber] > 0) &&
2213 "Node should not appear in worklist");
2214 switch (CurNode->getOpcode()) {
2215 case ISD::EntryToken:
2216 // Hitting EntryToken is the only way for the search to terminate without
2217 // hitting
2218 // another operand's search. Prevent us from marking this operand
2219 // considered.
2220 NumLeftToConsider++;
2221 break;
2222 case ISD::TokenFactor:
2223 for (const SDValue &Op : CurNode->op_values())
2224 AddToWorklist(i, Op.getNode(), CurOpNumber);
2225 break;
2227 case ISD::LIFETIME_END:
2228 case ISD::CopyFromReg:
2229 case ISD::CopyToReg:
2230 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2231 break;
2232 default:
2233 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2234 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2235 break;
2236 }
2237 OpWorkCount[CurOpNumber]--;
2238 if (OpWorkCount[CurOpNumber] == 0)
2239 NumLeftToConsider--;
2240 }
2241
2242 // If we've changed things around then replace token factor.
2243 if (Changed) {
2245 if (Ops.empty()) {
2246 // The entry token is the only possible outcome.
2247 Result = DAG.getEntryNode();
2248 } else {
2249 if (DidPruneOps) {
2250 SmallVector<SDValue, 8> PrunedOps;
2251 //
2252 for (const SDValue &Op : Ops) {
2253 if (SeenChains.count(Op.getNode()) == 0)
2254 PrunedOps.push_back(Op);
2255 }
2256 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2257 } else {
2258 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2259 }
2260 }
2261 return Result;
2262 }
2263 return SDValue();
2264}
2265
2266/// MERGE_VALUES can always be eliminated.
2267SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2268 WorklistRemover DeadNodes(*this);
2269 // Replacing results may cause a different MERGE_VALUES to suddenly
2270 // be CSE'd with N, and carry its uses with it. Iterate until no
2271 // uses remain, to ensure that the node can be safely deleted.
2272 // First add the users of this node to the work list so that they
2273 // can be tried again once they have new operands.
2274 AddUsersToWorklist(N);
2275 do {
2276 // Do as a single replacement to avoid rewalking use lists.
2278 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2279 Ops.push_back(N->getOperand(i));
2280 DAG.ReplaceAllUsesWith(N, Ops.data());
2281 } while (!N->use_empty());
2282 deleteAndRecombine(N);
2283 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2284}
2285
2286/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2287/// ConstantSDNode pointer else nullptr.
2289 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2290 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2291}
2292
2293// isTruncateOf - If N is a truncate of some other value, return true, record
2294// the value being truncated in Op and which of Op's bits are zero/one in Known.
2295// This function computes KnownBits to avoid a duplicated call to
2296// computeKnownBits in the caller.
2298 KnownBits &Known) {
2299 if (N->getOpcode() == ISD::TRUNCATE) {
2300 Op = N->getOperand(0);
2301 Known = DAG.computeKnownBits(Op);
2302 return true;
2303 }
2304
2305 if (N.getValueType().getScalarType() != MVT::i1 ||
2306 !sd_match(
2308 return false;
2309
2310 Known = DAG.computeKnownBits(Op);
2311 return (Known.Zero | 1).isAllOnes();
2312}
2313
2314/// Return true if 'Use' is a load or a store that uses N as its base pointer
2315/// and that N may be folded in the load / store addressing mode.
2317 const TargetLowering &TLI) {
2318 EVT VT;
2319 unsigned AS;
2320
2321 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2322 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2323 return false;
2324 VT = LD->getMemoryVT();
2325 AS = LD->getAddressSpace();
2326 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2327 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2328 return false;
2329 VT = ST->getMemoryVT();
2330 AS = ST->getAddressSpace();
2331 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2332 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2333 return false;
2334 VT = LD->getMemoryVT();
2335 AS = LD->getAddressSpace();
2336 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2337 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2338 return false;
2339 VT = ST->getMemoryVT();
2340 AS = ST->getAddressSpace();
2341 } else {
2342 return false;
2343 }
2344
2346 if (N->getOpcode() == ISD::ADD) {
2347 AM.HasBaseReg = true;
2348 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2349 if (Offset)
2350 // [reg +/- imm]
2351 AM.BaseOffs = Offset->getSExtValue();
2352 else
2353 // [reg +/- reg]
2354 AM.Scale = 1;
2355 } else if (N->getOpcode() == ISD::SUB) {
2356 AM.HasBaseReg = true;
2357 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2358 if (Offset)
2359 // [reg +/- imm]
2360 AM.BaseOffs = -Offset->getSExtValue();
2361 else
2362 // [reg +/- reg]
2363 AM.Scale = 1;
2364 } else {
2365 return false;
2366 }
2367
2368 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2369 VT.getTypeForEVT(*DAG.getContext()), AS);
2370}
2371
2372/// This inverts a canonicalization in IR that replaces a variable select arm
2373/// with an identity constant. Codegen improves if we re-use the variable
2374/// operand rather than load a constant. This can also be converted into a
2375/// masked vector operation if the target supports it.
2377 bool ShouldCommuteOperands) {
2378 // Match a select as operand 1. The identity constant that we are looking for
2379 // is only valid as operand 1 of a non-commutative binop.
2380 SDValue N0 = N->getOperand(0);
2381 SDValue N1 = N->getOperand(1);
2382 if (ShouldCommuteOperands)
2383 std::swap(N0, N1);
2384
2385 // TODO: Should this apply to scalar select too?
2386 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2387 return SDValue();
2388
2389 // We can't hoist all instructions because of immediate UB (not speculatable).
2390 // For example div/rem by zero.
2392 return SDValue();
2393
2394 unsigned Opcode = N->getOpcode();
2395 EVT VT = N->getValueType(0);
2396 SDValue Cond = N1.getOperand(0);
2397 SDValue TVal = N1.getOperand(1);
2398 SDValue FVal = N1.getOperand(2);
2399
2400 // This transform increases uses of N0, so freeze it to be safe.
2401 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2402 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2403 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2404 SDValue F0 = DAG.getFreeze(N0);
2405 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2406 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2407 }
2408 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2409 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2410 SDValue F0 = DAG.getFreeze(N0);
2411 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2412 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2413 }
2414
2415 return SDValue();
2416}
2417
2418SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2419 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2420 "Unexpected binary operator");
2421
2422 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2423 auto BinOpcode = BO->getOpcode();
2424 EVT VT = BO->getValueType(0);
2425 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2426 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2427 return Sel;
2428
2429 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2430 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2431 return Sel;
2432 }
2433
2434 // Don't do this unless the old select is going away. We want to eliminate the
2435 // binary operator, not replace a binop with a select.
2436 // TODO: Handle ISD::SELECT_CC.
2437 unsigned SelOpNo = 0;
2438 SDValue Sel = BO->getOperand(0);
2439 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2440 SelOpNo = 1;
2441 Sel = BO->getOperand(1);
2442
2443 // Peek through trunc to shift amount type.
2444 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2445 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2446 // This is valid when the truncated bits of x are already zero.
2447 SDValue Op;
2448 KnownBits Known;
2449 if (isTruncateOf(DAG, Sel, Op, Known) &&
2451 Sel = Op;
2452 }
2453 }
2454
2455 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2456 return SDValue();
2457
2458 SDValue CT = Sel.getOperand(1);
2459 if (!isConstantOrConstantVector(CT, true) &&
2461 return SDValue();
2462
2463 SDValue CF = Sel.getOperand(2);
2464 if (!isConstantOrConstantVector(CF, true) &&
2466 return SDValue();
2467
2468 // Bail out if any constants are opaque because we can't constant fold those.
2469 // The exception is "and" and "or" with either 0 or -1 in which case we can
2470 // propagate non constant operands into select. I.e.:
2471 // and (select Cond, 0, -1), X --> select Cond, 0, X
2472 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2473 bool CanFoldNonConst =
2474 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2477
2478 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2479 if (!CanFoldNonConst &&
2480 !isConstantOrConstantVector(CBO, true) &&
2482 return SDValue();
2483
2484 SDLoc DL(Sel);
2485 SDValue NewCT, NewCF;
2486
2487 if (CanFoldNonConst) {
2488 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2489 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2490 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2491 NewCT = CT;
2492 else
2493 NewCT = CBO;
2494
2495 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2496 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2497 NewCF = CF;
2498 else
2499 NewCF = CBO;
2500 } else {
2501 // We have a select-of-constants followed by a binary operator with a
2502 // constant. Eliminate the binop by pulling the constant math into the
2503 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2504 // CBO, CF + CBO
2505 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2506 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2507 if (!NewCT)
2508 return SDValue();
2509
2510 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2511 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2512 if (!NewCF)
2513 return SDValue();
2514 }
2515
2516 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2517 SelectOp->setFlags(BO->getFlags());
2518 return SelectOp;
2519}
2520
2522 SelectionDAG &DAG) {
2523 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2524 "Expecting add or sub");
2525
2526 // Match a constant operand and a zext operand for the math instruction:
2527 // add Z, C
2528 // sub C, Z
2529 bool IsAdd = N->getOpcode() == ISD::ADD;
2530 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2531 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2532 auto *CN = dyn_cast<ConstantSDNode>(C);
2533 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2534 return SDValue();
2535
2536 // Match the zext operand as a setcc of a boolean.
2537 if (Z.getOperand(0).getValueType() != MVT::i1)
2538 return SDValue();
2539
2540 // Match the compare as: setcc (X & 1), 0, eq.
2541 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2543 return SDValue();
2544
2545 // We are adding/subtracting a constant and an inverted low bit. Turn that
2546 // into a subtract/add of the low bit with incremented/decremented constant:
2547 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2548 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2549 EVT VT = C.getValueType();
2550 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2551 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2552 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2553 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2554}
2555
2556// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2557SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2558 SDValue N0 = N->getOperand(0);
2559 EVT VT = N0.getValueType();
2560 SDValue A, B;
2561
2562 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2565 m_SpecificInt(1))))) {
2566 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2567 }
2568 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2571 m_SpecificInt(1))))) {
2572 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2573 }
2574 return SDValue();
2575}
2576
2577/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2578/// a shift and add with a different constant.
2580 SelectionDAG &DAG) {
2581 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2582 "Expecting add or sub");
2583
2584 // We need a constant operand for the add/sub, and the other operand is a
2585 // logical shift right: add (srl), C or sub C, (srl).
2586 bool IsAdd = N->getOpcode() == ISD::ADD;
2587 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2588 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2589 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2590 ShiftOp.getOpcode() != ISD::SRL)
2591 return SDValue();
2592
2593 // The shift must be of a 'not' value.
2594 SDValue Not = ShiftOp.getOperand(0);
2595 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2596 return SDValue();
2597
2598 // The shift must be moving the sign bit to the least-significant-bit.
2599 EVT VT = ShiftOp.getValueType();
2600 SDValue ShAmt = ShiftOp.getOperand(1);
2601 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2602 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2603 return SDValue();
2604
2605 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2606 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2607 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2608 if (SDValue NewC = DAG.FoldConstantArithmetic(
2609 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2610 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2611 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2612 Not.getOperand(0), ShAmt);
2613 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2614 }
2615
2616 return SDValue();
2617}
2618
2619static bool
2621 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2622 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2623}
2624
2625/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2626/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2627/// are no common bits set in the operands).
2628SDValue DAGCombiner::visitADDLike(SDNode *N) {
2629 SDValue N0 = N->getOperand(0);
2630 SDValue N1 = N->getOperand(1);
2631 EVT VT = N0.getValueType();
2632 SDLoc DL(N);
2633
2634 // fold (add x, undef) -> undef
2635 if (N0.isUndef())
2636 return N0;
2637 if (N1.isUndef())
2638 return N1;
2639
2640 // fold (add c1, c2) -> c1+c2
2641 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2642 return C;
2643
2644 // canonicalize constant to RHS
2647 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2648
2649 if (areBitwiseNotOfEachother(N0, N1))
2650 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2651
2652 // fold vector ops
2653 if (VT.isVector()) {
2654 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2655 return FoldedVOp;
2656
2657 // fold (add x, 0) -> x, vector edition
2659 return N0;
2660 }
2661
2662 // fold (add x, 0) -> x
2663 if (isNullConstant(N1))
2664 return N0;
2665
2666 if (N0.getOpcode() == ISD::SUB) {
2667 SDValue N00 = N0.getOperand(0);
2668 SDValue N01 = N0.getOperand(1);
2669
2670 // fold ((A-c1)+c2) -> (A+(c2-c1))
2671 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2672 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2673
2674 // fold ((c1-A)+c2) -> (c1+c2)-A
2675 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2676 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2677 }
2678
2679 // add (sext i1 X), 1 -> zext (not i1 X)
2680 // We don't transform this pattern:
2681 // add (zext i1 X), -1 -> sext (not i1 X)
2682 // because most (?) targets generate better code for the zext form.
2683 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2684 isOneOrOneSplat(N1)) {
2685 SDValue X = N0.getOperand(0);
2686 if ((!LegalOperations ||
2687 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2689 X.getScalarValueSizeInBits() == 1) {
2690 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2691 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2692 }
2693 }
2694
2695 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2696 // iff (or x, c0) is equivalent to (add x, c0).
2697 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2698 // iff (xor x, c0) is equivalent to (add x, c0).
2699 if (DAG.isADDLike(N0)) {
2700 SDValue N01 = N0.getOperand(1);
2701 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2702 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2703 }
2704
2705 if (SDValue NewSel = foldBinOpIntoSelect(N))
2706 return NewSel;
2707
2708 // reassociate add
2709 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2710 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2711 return RADD;
2712
2713 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2714 // equivalent to (add x, c).
2715 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2716 // equivalent to (add x, c).
2717 // Do this optimization only when adding c does not introduce instructions
2718 // for adding carries.
2719 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2720 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2721 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2722 // If N0's type does not split or is a sign mask, it does not introduce
2723 // add carry.
2724 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2725 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2728 if (NoAddCarry)
2729 return DAG.getNode(
2730 ISD::ADD, DL, VT,
2731 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2732 N0.getOperand(1));
2733 }
2734 return SDValue();
2735 };
2736 if (SDValue Add = ReassociateAddOr(N0, N1))
2737 return Add;
2738 if (SDValue Add = ReassociateAddOr(N1, N0))
2739 return Add;
2740
2741 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2742 if (SDValue SD =
2743 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2744 return SD;
2745 }
2746
2747 SDValue A, B, C, D;
2748
2749 // fold ((0-A) + B) -> B-A
2750 if (sd_match(N0, m_Neg(m_Value(A))))
2751 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2752
2753 // fold (A + (0-B)) -> A-B
2754 if (sd_match(N1, m_Neg(m_Value(B))))
2755 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2756
2757 // fold (A+(B-A)) -> B
2758 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2759 return B;
2760
2761 // fold ((B-A)+A) -> B
2762 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2763 return B;
2764
2765 // fold ((A-B)+(C-A)) -> (C-B)
2766 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2768 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2769
2770 // fold ((A-B)+(B-C)) -> (A-C)
2771 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2773 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2774
2775 // fold (A+(B-(A+C))) to (B-C)
2776 // fold (A+(B-(C+A))) to (B-C)
2777 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2778 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2779
2780 // fold (A+((B-A)+or-C)) to (B+or-C)
2781 if (sd_match(N1,
2783 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2784 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2785
2786 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2787 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2788 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2790 return DAG.getNode(ISD::SUB, DL, VT,
2791 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2792 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2793
2794 // fold (add (umax X, C), -C) --> (usubsat X, C)
2795 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2796 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2797 return (!Max && !Op) ||
2798 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2799 };
2800 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2801 /*AllowUndefs*/ true))
2802 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2803 N0.getOperand(1));
2804 }
2805
2807 return SDValue(N, 0);
2808
2809 if (isOneOrOneSplat(N1)) {
2810 // fold (add (xor a, -1), 1) -> (sub 0, a)
2811 if (isBitwiseNot(N0))
2812 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2813 N0.getOperand(0));
2814
2815 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2816 if (N0.getOpcode() == ISD::ADD) {
2817 SDValue A, Xor;
2818
2819 if (isBitwiseNot(N0.getOperand(0))) {
2820 A = N0.getOperand(1);
2821 Xor = N0.getOperand(0);
2822 } else if (isBitwiseNot(N0.getOperand(1))) {
2823 A = N0.getOperand(0);
2824 Xor = N0.getOperand(1);
2825 }
2826
2827 if (Xor)
2828 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2829 }
2830
2831 // Look for:
2832 // add (add x, y), 1
2833 // And if the target does not like this form then turn into:
2834 // sub y, (xor x, -1)
2835 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2836 N0.hasOneUse() &&
2837 // Limit this to after legalization if the add has wrap flags
2838 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2839 !N->getFlags().hasNoSignedWrap()))) {
2840 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2841 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2842 }
2843 }
2844
2845 // (x - y) + -1 -> add (xor y, -1), x
2846 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2847 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2848 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2849 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2850 }
2851
2852 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2853 // This can help if the inner add has multiple uses.
2854 APInt CM, CA;
2855 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2856 if (VT.getScalarSizeInBits() <= 64) {
2858 m_ConstInt(CM)))) &&
2860 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2862 // If all the inputs are nuw, the outputs can be nuw. If all the input
2863 // are _also_ nsw the outputs can be too.
2864 if (N->getFlags().hasNoUnsignedWrap() &&
2865 N0->getFlags().hasNoUnsignedWrap() &&
2867 Flags.setNoUnsignedWrap(true);
2868 if (N->getFlags().hasNoSignedWrap() &&
2869 N0->getFlags().hasNoSignedWrap() &&
2871 Flags.setNoSignedWrap(true);
2872 }
2873 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2874 DAG.getConstant(CM, DL, VT), Flags);
2875 return DAG.getNode(
2876 ISD::ADD, DL, VT, Mul,
2877 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2878 }
2879 // Also look in case there is an intermediate add.
2880 if (sd_match(N0, m_OneUse(m_Add(
2882 m_ConstInt(CM))),
2883 m_Value(B)))) &&
2885 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2887 // If all the inputs are nuw, the outputs can be nuw. If all the input
2888 // are _also_ nsw the outputs can be too.
2889 SDValue OMul =
2890 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2891 if (N->getFlags().hasNoUnsignedWrap() &&
2892 N0->getFlags().hasNoUnsignedWrap() &&
2893 OMul->getFlags().hasNoUnsignedWrap() &&
2894 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2895 Flags.setNoUnsignedWrap(true);
2896 if (N->getFlags().hasNoSignedWrap() &&
2897 N0->getFlags().hasNoSignedWrap() &&
2898 OMul->getFlags().hasNoSignedWrap() &&
2899 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2900 Flags.setNoSignedWrap(true);
2901 }
2902 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2903 DAG.getConstant(CM, DL, VT), Flags);
2904 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2905 return DAG.getNode(
2906 ISD::ADD, DL, VT, Add,
2907 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2908 }
2909 }
2910 }
2911
2912 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2913 return Combined;
2914
2915 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2916 return Combined;
2917
2918 return SDValue();
2919}
2920
2921// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2922SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2923 SDValue N0 = N->getOperand(0);
2924 EVT VT = N0.getValueType();
2925 SDValue A, B;
2926
2927 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2930 m_SpecificInt(1))))) {
2931 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2932 }
2933 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2936 m_SpecificInt(1))))) {
2937 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2938 }
2939
2940 return SDValue();
2941}
2942
2943SDValue DAGCombiner::visitADD(SDNode *N) {
2944 SDValue N0 = N->getOperand(0);
2945 SDValue N1 = N->getOperand(1);
2946 EVT VT = N0.getValueType();
2947 SDLoc DL(N);
2948
2949 if (SDValue Combined = visitADDLike(N))
2950 return Combined;
2951
2952 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2953 return V;
2954
2955 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2956 return V;
2957
2958 // Try to match AVGFLOOR fixedwidth pattern
2959 if (SDValue V = foldAddToAvg(N, DL))
2960 return V;
2961
2962 // fold (a+b) -> (a|b) iff a and b share no bits.
2963 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2964 DAG.haveNoCommonBitsSet(N0, N1)) {
2966 Flags.setDisjoint(true);
2967 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2968 }
2969
2970 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2971 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2972 const APInt &C0 = N0->getConstantOperandAPInt(0);
2973 const APInt &C1 = N1->getConstantOperandAPInt(0);
2974 return DAG.getVScale(DL, VT, C0 + C1);
2975 }
2976
2977 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2978 if (N0.getOpcode() == ISD::ADD &&
2979 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2980 N1.getOpcode() == ISD::VSCALE) {
2981 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2982 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2983 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2984 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2985 }
2986
2987 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2988 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2989 N1.getOpcode() == ISD::STEP_VECTOR) {
2990 const APInt &C0 = N0->getConstantOperandAPInt(0);
2991 const APInt &C1 = N1->getConstantOperandAPInt(0);
2992 APInt NewStep = C0 + C1;
2993 return DAG.getStepVector(DL, VT, NewStep);
2994 }
2995
2996 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2997 if (N0.getOpcode() == ISD::ADD &&
2999 N1.getOpcode() == ISD::STEP_VECTOR) {
3000 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3001 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3002 APInt NewStep = SV0 + SV1;
3003 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3004 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3005 }
3006
3007 return SDValue();
3008}
3009
3010SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3011 unsigned Opcode = N->getOpcode();
3012 SDValue N0 = N->getOperand(0);
3013 SDValue N1 = N->getOperand(1);
3014 EVT VT = N0.getValueType();
3015 bool IsSigned = Opcode == ISD::SADDSAT;
3016 SDLoc DL(N);
3017
3018 // fold (add_sat x, undef) -> -1
3019 if (N0.isUndef() || N1.isUndef())
3020 return DAG.getAllOnesConstant(DL, VT);
3021
3022 // fold (add_sat c1, c2) -> c3
3023 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3024 return C;
3025
3026 // canonicalize constant to RHS
3029 return DAG.getNode(Opcode, DL, VT, N1, N0);
3030
3031 // fold vector ops
3032 if (VT.isVector()) {
3033 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3034 return FoldedVOp;
3035
3036 // fold (add_sat x, 0) -> x, vector edition
3038 return N0;
3039 }
3040
3041 // fold (add_sat x, 0) -> x
3042 if (isNullConstant(N1))
3043 return N0;
3044
3045 // If it cannot overflow, transform into an add.
3046 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3047 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3048
3049 return SDValue();
3050}
3051
3053 bool ForceCarryReconstruction = false) {
3054 bool Masked = false;
3055
3056 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3057 while (true) {
3058 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3059 V = V.getOperand(0);
3060 continue;
3061 }
3062
3063 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3064 if (ForceCarryReconstruction)
3065 return V;
3066
3067 Masked = true;
3068 V = V.getOperand(0);
3069 continue;
3070 }
3071
3072 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3073 return V;
3074
3075 break;
3076 }
3077
3078 // If this is not a carry, return.
3079 if (V.getResNo() != 1)
3080 return SDValue();
3081
3082 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3083 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3084 return SDValue();
3085
3086 EVT VT = V->getValueType(0);
3087 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3088 return SDValue();
3089
3090 // If the result is masked, then no matter what kind of bool it is we can
3091 // return. If it isn't, then we need to make sure the bool type is either 0 or
3092 // 1 and not other values.
3093 if (Masked ||
3094 TLI.getBooleanContents(V.getValueType()) ==
3096 return V;
3097
3098 return SDValue();
3099}
3100
3101/// Given the operands of an add/sub operation, see if the 2nd operand is a
3102/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3103/// the opcode and bypass the mask operation.
3104static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3105 SelectionDAG &DAG, const SDLoc &DL) {
3106 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3107 N1 = N1.getOperand(0);
3108
3109 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3110 return SDValue();
3111
3112 EVT VT = N0.getValueType();
3113 SDValue N10 = N1.getOperand(0);
3114 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3115 N10 = N10.getOperand(0);
3116
3117 if (N10.getValueType() != VT)
3118 return SDValue();
3119
3120 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3121 return SDValue();
3122
3123 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3124 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3125 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3126}
3127
3128/// Helper for doing combines based on N0 and N1 being added to each other.
3129SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3130 SDNode *LocReference) {
3131 EVT VT = N0.getValueType();
3132 SDLoc DL(LocReference);
3133
3134 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3135 SDValue Y, N;
3136 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3137 return DAG.getNode(ISD::SUB, DL, VT, N0,
3138 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3139
3140 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3141 return V;
3142
3143 // Look for:
3144 // add (add x, 1), y
3145 // And if the target does not like this form then turn into:
3146 // sub y, (xor x, -1)
3147 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3148 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3149 // Limit this to after legalization if the add has wrap flags
3150 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3151 !N0->getFlags().hasNoSignedWrap()))) {
3152 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3153 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3154 }
3155
3156 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3157 // Hoist one-use subtraction by non-opaque constant:
3158 // (x - C) + y -> (x + y) - C
3159 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3160 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3161 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3162 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3163 }
3164 // Hoist one-use subtraction from non-opaque constant:
3165 // (C - x) + y -> (y - x) + C
3166 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3167 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3168 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3169 }
3170 }
3171
3172 // add (mul x, C), x -> mul x, C+1
3173 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3174 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3175 N0.hasOneUse()) {
3176 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3177 DAG.getConstant(1, DL, VT));
3178 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3179 }
3180
3181 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3182 // rather than 'add 0/-1' (the zext should get folded).
3183 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3184 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3185 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3187 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3188 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3189 }
3190
3191 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3192 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3193 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3194 if (TN->getVT() == MVT::i1) {
3195 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3196 DAG.getConstant(1, DL, VT));
3197 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3198 }
3199 }
3200
3201 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3202 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3203 N1.getResNo() == 0)
3204 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3205 N0, N1.getOperand(0), N1.getOperand(2));
3206
3207 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3209 if (SDValue Carry = getAsCarry(TLI, N1))
3210 return DAG.getNode(ISD::UADDO_CARRY, DL,
3211 DAG.getVTList(VT, Carry.getValueType()), N0,
3212 DAG.getConstant(0, DL, VT), Carry);
3213
3214 return SDValue();
3215}
3216
3217SDValue DAGCombiner::visitADDC(SDNode *N) {
3218 SDValue N0 = N->getOperand(0);
3219 SDValue N1 = N->getOperand(1);
3220 EVT VT = N0.getValueType();
3221 SDLoc DL(N);
3222
3223 // If the flag result is dead, turn this into an ADD.
3224 if (!N->hasAnyUseOfValue(1))
3225 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3226 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3227
3228 // canonicalize constant to RHS.
3229 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3230 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3231 if (N0C && !N1C)
3232 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3233
3234 // fold (addc x, 0) -> x + no carry out
3235 if (isNullConstant(N1))
3236 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3237 DL, MVT::Glue));
3238
3239 // If it cannot overflow, transform into an add.
3241 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3242 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3243
3244 return SDValue();
3245}
3246
3247/**
3248 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3249 * then the flip also occurs if computing the inverse is the same cost.
3250 * This function returns an empty SDValue in case it cannot flip the boolean
3251 * without increasing the cost of the computation. If you want to flip a boolean
3252 * no matter what, use DAG.getLogicalNOT.
3253 */
3255 const TargetLowering &TLI,
3256 bool Force) {
3257 if (Force && isa<ConstantSDNode>(V))
3258 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3259
3260 if (V.getOpcode() != ISD::XOR)
3261 return SDValue();
3262
3263 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3264 if (!Const)
3265 return SDValue();
3266
3267 EVT VT = V.getValueType();
3268
3269 bool IsFlip = false;
3270 switch(TLI.getBooleanContents(VT)) {
3272 IsFlip = Const->isOne();
3273 break;
3275 IsFlip = Const->isAllOnes();
3276 break;
3278 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3279 break;
3280 }
3281
3282 if (IsFlip)
3283 return V.getOperand(0);
3284 if (Force)
3285 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3286 return SDValue();
3287}
3288
3289SDValue DAGCombiner::visitADDO(SDNode *N) {
3290 SDValue N0 = N->getOperand(0);
3291 SDValue N1 = N->getOperand(1);
3292 EVT VT = N0.getValueType();
3293 bool IsSigned = (ISD::SADDO == N->getOpcode());
3294
3295 EVT CarryVT = N->getValueType(1);
3296 SDLoc DL(N);
3297
3298 // If the flag result is dead, turn this into an ADD.
3299 if (!N->hasAnyUseOfValue(1))
3300 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3301 DAG.getUNDEF(CarryVT));
3302
3303 // canonicalize constant to RHS.
3306 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3307
3308 // fold (addo x, 0) -> x + no carry out
3309 if (isNullOrNullSplat(N1))
3310 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3311
3312 // If it cannot overflow, transform into an add.
3313 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3314 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3315 DAG.getConstant(0, DL, CarryVT));
3316
3317 if (IsSigned) {
3318 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3319 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3320 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3321 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3322 } else {
3323 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3324 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3325 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3326 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3327 return CombineTo(
3328 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3329 }
3330
3331 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3332 return Combined;
3333
3334 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3335 return Combined;
3336 }
3337
3338 return SDValue();
3339}
3340
3341SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3342 EVT VT = N0.getValueType();
3343 if (VT.isVector())
3344 return SDValue();
3345
3346 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3347 // If Y + 1 cannot overflow.
3348 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3349 SDValue Y = N1.getOperand(0);
3350 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3352 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3353 N1.getOperand(2));
3354 }
3355
3356 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3358 if (SDValue Carry = getAsCarry(TLI, N1))
3359 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3360 DAG.getConstant(0, SDLoc(N), VT), Carry);
3361
3362 return SDValue();
3363}
3364
3365SDValue DAGCombiner::visitADDE(SDNode *N) {
3366 SDValue N0 = N->getOperand(0);
3367 SDValue N1 = N->getOperand(1);
3368 SDValue CarryIn = N->getOperand(2);
3369
3370 // canonicalize constant to RHS
3371 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3372 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3373 if (N0C && !N1C)
3374 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3375 N1, N0, CarryIn);
3376
3377 // fold (adde x, y, false) -> (addc x, y)
3378 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3379 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3380
3381 return SDValue();
3382}
3383
3384SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3385 SDValue N0 = N->getOperand(0);
3386 SDValue N1 = N->getOperand(1);
3387 SDValue CarryIn = N->getOperand(2);
3388 SDLoc DL(N);
3389
3390 // canonicalize constant to RHS
3391 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3392 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3393 if (N0C && !N1C)
3394 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3395
3396 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3397 if (isNullConstant(CarryIn)) {
3398 if (!LegalOperations ||
3399 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3400 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3401 }
3402
3403 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3404 if (isNullConstant(N0) && isNullConstant(N1)) {
3405 EVT VT = N0.getValueType();
3406 EVT CarryVT = CarryIn.getValueType();
3407 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3408 AddToWorklist(CarryExt.getNode());
3409 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3410 DAG.getConstant(1, DL, VT)),
3411 DAG.getConstant(0, DL, CarryVT));
3412 }
3413
3414 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3415 return Combined;
3416
3417 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3418 return Combined;
3419
3420 // We want to avoid useless duplication.
3421 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3422 // not a binary operation, this is not really possible to leverage this
3423 // existing mechanism for it. However, if more operations require the same
3424 // deduplication logic, then it may be worth generalize.
3425 SDValue Ops[] = {N1, N0, CarryIn};
3426 SDNode *CSENode =
3427 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3428 if (CSENode)
3429 return SDValue(CSENode, 0);
3430
3431 return SDValue();
3432}
3433
3434/**
3435 * If we are facing some sort of diamond carry propagation pattern try to
3436 * break it up to generate something like:
3437 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3438 *
3439 * The end result is usually an increase in operation required, but because the
3440 * carry is now linearized, other transforms can kick in and optimize the DAG.
3441 *
3442 * Patterns typically look something like
3443 * (uaddo A, B)
3444 * / \
3445 * Carry Sum
3446 * | \
3447 * | (uaddo_carry *, 0, Z)
3448 * | /
3449 * \ Carry
3450 * | /
3451 * (uaddo_carry X, *, *)
3452 *
3453 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3454 * produce a combine with a single path for carry propagation.
3455 */
3457 SelectionDAG &DAG, SDValue X,
3458 SDValue Carry0, SDValue Carry1,
3459 SDNode *N) {
3460 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3461 return SDValue();
3462 if (Carry1.getOpcode() != ISD::UADDO)
3463 return SDValue();
3464
3465 SDValue Z;
3466
3467 /**
3468 * First look for a suitable Z. It will present itself in the form of
3469 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3470 */
3471 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3472 isNullConstant(Carry0.getOperand(1))) {
3473 Z = Carry0.getOperand(2);
3474 } else if (Carry0.getOpcode() == ISD::UADDO &&
3475 isOneConstant(Carry0.getOperand(1))) {
3476 EVT VT = Carry0->getValueType(1);
3477 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3478 } else {
3479 // We couldn't find a suitable Z.
3480 return SDValue();
3481 }
3482
3483
3484 auto cancelDiamond = [&](SDValue A,SDValue B) {
3485 SDLoc DL(N);
3486 SDValue NewY =
3487 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3488 Combiner.AddToWorklist(NewY.getNode());
3489 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3490 DAG.getConstant(0, DL, X.getValueType()),
3491 NewY.getValue(1));
3492 };
3493
3494 /**
3495 * (uaddo A, B)
3496 * |
3497 * Sum
3498 * |
3499 * (uaddo_carry *, 0, Z)
3500 */
3501 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3502 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3503 }
3504
3505 /**
3506 * (uaddo_carry A, 0, Z)
3507 * |
3508 * Sum
3509 * |
3510 * (uaddo *, B)
3511 */
3512 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3513 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3514 }
3515
3516 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3517 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3518 }
3519
3520 return SDValue();
3521}
3522
3523// If we are facing some sort of diamond carry/borrow in/out pattern try to
3524// match patterns like:
3525//
3526// (uaddo A, B) CarryIn
3527// | \ |
3528// | \ |
3529// PartialSum PartialCarryOutX /
3530// | | /
3531// | ____|____________/
3532// | / |
3533// (uaddo *, *) \________
3534// | \ \
3535// | \ |
3536// | PartialCarryOutY |
3537// | \ |
3538// | \ /
3539// AddCarrySum | ______/
3540// | /
3541// CarryOut = (or *, *)
3542//
3543// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3544//
3545// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3546//
3547// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3548// with a single path for carry/borrow out propagation.
3550 SDValue N0, SDValue N1, SDNode *N) {
3551 SDValue Carry0 = getAsCarry(TLI, N0);
3552 if (!Carry0)
3553 return SDValue();
3554 SDValue Carry1 = getAsCarry(TLI, N1);
3555 if (!Carry1)
3556 return SDValue();
3557
3558 unsigned Opcode = Carry0.getOpcode();
3559 if (Opcode != Carry1.getOpcode())
3560 return SDValue();
3561 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3562 return SDValue();
3563 // Guarantee identical type of CarryOut
3564 EVT CarryOutType = N->getValueType(0);
3565 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3566 CarryOutType != Carry1.getValue(1).getValueType())
3567 return SDValue();
3568
3569 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3570 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3571 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3572 std::swap(Carry0, Carry1);
3573
3574 // Check if nodes are connected in expected way.
3575 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3576 Carry1.getOperand(1) != Carry0.getValue(0))
3577 return SDValue();
3578
3579 // The carry in value must be on the righthand side for subtraction.
3580 unsigned CarryInOperandNum =
3581 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3582 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3583 return SDValue();
3584 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3585
3586 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3587 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3588 return SDValue();
3589
3590 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3591 CarryIn = getAsCarry(TLI, CarryIn, true);
3592 if (!CarryIn)
3593 return SDValue();
3594
3595 SDLoc DL(N);
3596 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3597 Carry1->getValueType(0));
3598 SDValue Merged =
3599 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3600 Carry0.getOperand(1), CarryIn);
3601
3602 // Please note that because we have proven that the result of the UADDO/USUBO
3603 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3604 // therefore prove that if the first UADDO/USUBO overflows, the second
3605 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3606 // maximum value.
3607 //
3608 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3609 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3610 //
3611 // This is important because it means that OR and XOR can be used to merge
3612 // carry flags; and that AND can return a constant zero.
3613 //
3614 // TODO: match other operations that can merge flags (ADD, etc)
3615 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3616 if (N->getOpcode() == ISD::AND)
3617 return DAG.getConstant(0, DL, CarryOutType);
3618 return Merged.getValue(1);
3619}
3620
3621SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3622 SDValue CarryIn, SDNode *N) {
3623 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3624 // carry.
3625 if (isBitwiseNot(N0))
3626 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3627 SDLoc DL(N);
3628 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3629 N0.getOperand(0), NotC);
3630 return CombineTo(
3631 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3632 }
3633
3634 // Iff the flag result is dead:
3635 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3636 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3637 // or the dependency between the instructions.
3638 if ((N0.getOpcode() == ISD::ADD ||
3639 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3640 N0.getValue(1) != CarryIn)) &&
3641 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3642 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3643 N0.getOperand(0), N0.getOperand(1), CarryIn);
3644
3645 /**
3646 * When one of the uaddo_carry argument is itself a carry, we may be facing
3647 * a diamond carry propagation. In which case we try to transform the DAG
3648 * to ensure linear carry propagation if that is possible.
3649 */
3650 if (auto Y = getAsCarry(TLI, N1)) {
3651 // Because both are carries, Y and Z can be swapped.
3652 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3653 return R;
3654 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3655 return R;
3656 }
3657
3658 return SDValue();
3659}
3660
3661SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3662 SDValue CarryIn, SDNode *N) {
3663 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3664 if (isBitwiseNot(N0)) {
3665 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3666 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3667 N0.getOperand(0), NotC);
3668 }
3669
3670 return SDValue();
3671}
3672
3673SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3674 SDValue N0 = N->getOperand(0);
3675 SDValue N1 = N->getOperand(1);
3676 SDValue CarryIn = N->getOperand(2);
3677 SDLoc DL(N);
3678
3679 // canonicalize constant to RHS
3680 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3681 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3682 if (N0C && !N1C)
3683 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3684
3685 // fold (saddo_carry x, y, false) -> (saddo x, y)
3686 if (isNullConstant(CarryIn)) {
3687 if (!LegalOperations ||
3688 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3689 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3690 }
3691
3692 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3693 return Combined;
3694
3695 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3696 return Combined;
3697
3698 return SDValue();
3699}
3700
3701// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3702// clamp/truncation if necessary.
3703static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3704 SDValue RHS, SelectionDAG &DAG,
3705 const SDLoc &DL) {
3706 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3707 "Illegal truncation");
3708
3709 if (DstVT == SrcVT)
3710 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3711
3712 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3713 // clamping RHS.
3715 DstVT.getScalarSizeInBits());
3716 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3717 return SDValue();
3718
3719 SDValue SatLimit =
3721 DstVT.getScalarSizeInBits()),
3722 DL, SrcVT);
3723 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3724 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3725 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3726 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3727}
3728
3729// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3730// usubsat(a,b), optionally as a truncated type.
3731SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3732 if (N->getOpcode() != ISD::SUB ||
3733 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3734 return SDValue();
3735
3736 EVT SubVT = N->getValueType(0);
3737 SDValue Op0 = N->getOperand(0);
3738 SDValue Op1 = N->getOperand(1);
3739
3740 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3741 // they may be converted to usubsat(a,b).
3742 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3743 SDValue MaxLHS = Op0.getOperand(0);
3744 SDValue MaxRHS = Op0.getOperand(1);
3745 if (MaxLHS == Op1)
3746 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3747 if (MaxRHS == Op1)
3748 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3749 }
3750
3751 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3752 SDValue MinLHS = Op1.getOperand(0);
3753 SDValue MinRHS = Op1.getOperand(1);
3754 if (MinLHS == Op0)
3755 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3756 if (MinRHS == Op0)
3757 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3758 }
3759
3760 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3761 if (Op1.getOpcode() == ISD::TRUNCATE &&
3762 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3763 Op1.getOperand(0).hasOneUse()) {
3764 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3765 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3766 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3767 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3768 DAG, DL);
3769 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3770 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3771 DAG, DL);
3772 }
3773
3774 return SDValue();
3775}
3776
3777// Since it may not be valid to emit a fold to zero for vector initializers
3778// check if we can before folding.
3779static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3780 SelectionDAG &DAG, bool LegalOperations) {
3781 if (!VT.isVector())
3782 return DAG.getConstant(0, DL, VT);
3783 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3784 return DAG.getConstant(0, DL, VT);
3785 return SDValue();
3786}
3787
3788SDValue DAGCombiner::visitSUB(SDNode *N) {
3789 SDValue N0 = N->getOperand(0);
3790 SDValue N1 = N->getOperand(1);
3791 EVT VT = N0.getValueType();
3792 unsigned BitWidth = VT.getScalarSizeInBits();
3793 SDLoc DL(N);
3794
3795 auto PeekThroughFreeze = [](SDValue N) {
3796 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3797 return N->getOperand(0);
3798 return N;
3799 };
3800
3801 // fold (sub x, x) -> 0
3802 // FIXME: Refactor this and xor and other similar operations together.
3803 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3804 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3805
3806 // fold (sub c1, c2) -> c3
3807 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3808 return C;
3809
3810 // fold vector ops
3811 if (VT.isVector()) {
3812 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3813 return FoldedVOp;
3814
3815 // fold (sub x, 0) -> x, vector edition
3817 return N0;
3818 }
3819
3820 if (SDValue NewSel = foldBinOpIntoSelect(N))
3821 return NewSel;
3822
3823 // fold (sub x, c) -> (add x, -c)
3825 return DAG.getNode(ISD::ADD, DL, VT, N0,
3826 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3827
3828 if (isNullOrNullSplat(N0)) {
3829 // Right-shifting everything out but the sign bit followed by negation is
3830 // the same as flipping arithmetic/logical shift type without the negation:
3831 // -(X >>u 31) -> (X >>s 31)
3832 // -(X >>s 31) -> (X >>u 31)
3833 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3835 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3836 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3837 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3838 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3839 }
3840 }
3841
3842 // 0 - X --> 0 if the sub is NUW.
3843 if (N->getFlags().hasNoUnsignedWrap())
3844 return N0;
3845
3847 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3848 // N1 must be 0 because negating the minimum signed value is undefined.
3849 if (N->getFlags().hasNoSignedWrap())
3850 return N0;
3851
3852 // 0 - X --> X if X is 0 or the minimum signed value.
3853 return N1;
3854 }
3855
3856 // Convert 0 - abs(x).
3857 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3859 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3860 return Result;
3861
3862 // Fold neg(splat(neg(x)) -> splat(x)
3863 if (VT.isVector()) {
3864 SDValue N1S = DAG.getSplatValue(N1, true);
3865 if (N1S && N1S.getOpcode() == ISD::SUB &&
3866 isNullConstant(N1S.getOperand(0)))
3867 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3868 }
3869 }
3870
3871 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3873 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3874
3875 // fold (A - (0-B)) -> A+B
3876 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3877 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3878
3879 // fold A-(A-B) -> B
3880 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3881 return N1.getOperand(1);
3882
3883 // fold (A+B)-A -> B
3884 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3885 return N0.getOperand(1);
3886
3887 // fold (A+B)-B -> A
3888 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3889 return N0.getOperand(0);
3890
3891 // fold (A+C1)-C2 -> A+(C1-C2)
3892 if (N0.getOpcode() == ISD::ADD) {
3893 SDValue N01 = N0.getOperand(1);
3894 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3895 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3896 }
3897
3898 // fold C2-(A+C1) -> (C2-C1)-A
3899 if (N1.getOpcode() == ISD::ADD) {
3900 SDValue N11 = N1.getOperand(1);
3901 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3902 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3903 }
3904
3905 // fold (A-C1)-C2 -> A-(C1+C2)
3906 if (N0.getOpcode() == ISD::SUB) {
3907 SDValue N01 = N0.getOperand(1);
3908 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3909 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3910 }
3911
3912 // fold (c1-A)-c2 -> (c1-c2)-A
3913 if (N0.getOpcode() == ISD::SUB) {
3914 SDValue N00 = N0.getOperand(0);
3915 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3916 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3917 }
3918
3919 SDValue A, B, C;
3920
3921 // fold ((A+(B+C))-B) -> A+C
3922 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3923 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3924
3925 // fold ((A+(B-C))-B) -> A-C
3926 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3927 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3928
3929 // fold ((A-(B-C))-C) -> A-B
3930 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3931 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3932
3933 // fold (A-(B-C)) -> A+(C-B)
3934 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3935 return DAG.getNode(ISD::ADD, DL, VT, N0,
3936 DAG.getNode(ISD::SUB, DL, VT, C, B));
3937
3938 // A - (A & B) -> A & (~B)
3939 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3940 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3941 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3942
3943 // fold (A - (-B * C)) -> (A + (B * C))
3944 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3945 return DAG.getNode(ISD::ADD, DL, VT, N0,
3946 DAG.getNode(ISD::MUL, DL, VT, B, C));
3947
3948 // If either operand of a sub is undef, the result is undef
3949 if (N0.isUndef())
3950 return N0;
3951 if (N1.isUndef())
3952 return N1;
3953
3954 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3955 return V;
3956
3957 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3958 return V;
3959
3960 // Try to match AVGCEIL fixedwidth pattern
3961 if (SDValue V = foldSubToAvg(N, DL))
3962 return V;
3963
3964 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3965 return V;
3966
3967 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3968 return V;
3969
3970 // (A - B) - 1 -> add (xor B, -1), A
3972 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3973
3974 // Look for:
3975 // sub y, (xor x, -1)
3976 // And if the target does not like this form then turn into:
3977 // add (add x, y), 1
3978 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3979 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3980 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3981 }
3982
3983 // Hoist one-use addition by non-opaque constant:
3984 // (x + C) - y -> (x - y) + C
3985 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
3986 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3987 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3988 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3989 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3990 }
3991 // y - (x + C) -> (y - x) - C
3992 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3993 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3994 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3995 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3996 }
3997 // (x - C) - y -> (x - y) - C
3998 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3999 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4000 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4001 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4002 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4003 }
4004 // (C - x) - y -> C - (x + y)
4005 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4006 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4007 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4008 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4009 }
4010
4011 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4012 // rather than 'sub 0/1' (the sext should get folded).
4013 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4014 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4015 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4016 TLI.getBooleanContents(VT) ==
4018 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4019 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4020 }
4021
4022 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4023 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4025 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4026 return DAG.getNode(ISD::ABS, DL, VT, A);
4027
4028 // If the relocation model supports it, consider symbol offsets.
4029 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4030 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4031 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4032 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4033 if (GA->getGlobal() == GB->getGlobal())
4034 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4035 DL, VT);
4036 }
4037
4038 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4039 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4040 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4041 if (TN->getVT() == MVT::i1) {
4042 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4043 DAG.getConstant(1, DL, VT));
4044 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4045 }
4046 }
4047
4048 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4049 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4050 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4051 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4052 }
4053
4054 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4055 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4056 APInt NewStep = -N1.getConstantOperandAPInt(0);
4057 return DAG.getNode(ISD::ADD, DL, VT, N0,
4058 DAG.getStepVector(DL, VT, NewStep));
4059 }
4060
4061 // Prefer an add for more folding potential and possibly better codegen:
4062 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4063 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4064 SDValue ShAmt = N1.getOperand(1);
4065 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4066 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4067 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4068 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4069 }
4070 }
4071
4072 // As with the previous fold, prefer add for more folding potential.
4073 // Subtracting SMIN/0 is the same as adding SMIN/0:
4074 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4075 if (N1.getOpcode() == ISD::SHL) {
4077 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4078 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4079 }
4080
4081 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4082 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4083 N0.getResNo() == 0 && N0.hasOneUse())
4084 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4085 N0.getOperand(0), N1, N0.getOperand(2));
4086
4088 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4089 if (SDValue Carry = getAsCarry(TLI, N0)) {
4090 SDValue X = N1;
4091 SDValue Zero = DAG.getConstant(0, DL, VT);
4092 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4093 return DAG.getNode(ISD::UADDO_CARRY, DL,
4094 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4095 Carry);
4096 }
4097 }
4098
4099 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4100 // sub C0, X --> xor X, C0
4101 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4102 if (!C0->isOpaque()) {
4103 const APInt &C0Val = C0->getAPIntValue();
4104 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4105 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4106 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4107 }
4108 }
4109
4110 // smax(a,b) - smin(a,b) --> abds(a,b)
4111 if (hasOperation(ISD::ABDS, VT) &&
4112 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4114 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4115
4116 // umax(a,b) - umin(a,b) --> abdu(a,b)
4117 if (hasOperation(ISD::ABDU, VT) &&
4118 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4120 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4121
4122 return SDValue();
4123}
4124
4125SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4126 unsigned Opcode = N->getOpcode();
4127 SDValue N0 = N->getOperand(0);
4128 SDValue N1 = N->getOperand(1);
4129 EVT VT = N0.getValueType();
4130 bool IsSigned = Opcode == ISD::SSUBSAT;
4131 SDLoc DL(N);
4132
4133 // fold (sub_sat x, undef) -> 0
4134 if (N0.isUndef() || N1.isUndef())
4135 return DAG.getConstant(0, DL, VT);
4136
4137 // fold (sub_sat x, x) -> 0
4138 if (N0 == N1)
4139 return DAG.getConstant(0, DL, VT);
4140
4141 // fold (sub_sat c1, c2) -> c3
4142 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4143 return C;
4144
4145 // fold vector ops
4146 if (VT.isVector()) {
4147 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4148 return FoldedVOp;
4149
4150 // fold (sub_sat x, 0) -> x, vector edition
4152 return N0;
4153 }
4154
4155 // fold (sub_sat x, 0) -> x
4156 if (isNullConstant(N1))
4157 return N0;
4158
4159 // If it cannot overflow, transform into an sub.
4160 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4161 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4162
4163 return SDValue();
4164}
4165
4166SDValue DAGCombiner::visitSUBC(SDNode *N) {
4167 SDValue N0 = N->getOperand(0);
4168 SDValue N1 = N->getOperand(1);
4169 EVT VT = N0.getValueType();
4170 SDLoc DL(N);
4171
4172 // If the flag result is dead, turn this into an SUB.
4173 if (!N->hasAnyUseOfValue(1))
4174 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4175 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4176
4177 // fold (subc x, x) -> 0 + no borrow
4178 if (N0 == N1)
4179 return CombineTo(N, DAG.getConstant(0, DL, VT),
4180 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4181
4182 // fold (subc x, 0) -> x + no borrow
4183 if (isNullConstant(N1))
4184 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4185
4186 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4187 if (isAllOnesConstant(N0))
4188 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4189 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4190
4191 return SDValue();
4192}
4193
4194SDValue DAGCombiner::visitSUBO(SDNode *N) {
4195 SDValue N0 = N->getOperand(0);
4196 SDValue N1 = N->getOperand(1);
4197 EVT VT = N0.getValueType();
4198 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4199
4200 EVT CarryVT = N->getValueType(1);
4201 SDLoc DL(N);
4202
4203 // If the flag result is dead, turn this into an SUB.
4204 if (!N->hasAnyUseOfValue(1))
4205 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4206 DAG.getUNDEF(CarryVT));
4207
4208 // fold (subo x, x) -> 0 + no borrow
4209 if (N0 == N1)
4210 return CombineTo(N, DAG.getConstant(0, DL, VT),
4211 DAG.getConstant(0, DL, CarryVT));
4212
4213 // fold (subox, c) -> (addo x, -c)
4215 if (IsSigned && !N1C->isMinSignedValue())
4216 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4217 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4218
4219 // fold (subo x, 0) -> x + no borrow
4220 if (isNullOrNullSplat(N1))
4221 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4222
4223 // If it cannot overflow, transform into an sub.
4224 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4225 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4226 DAG.getConstant(0, DL, CarryVT));
4227
4228 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4229 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4230 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4231 DAG.getConstant(0, DL, CarryVT));
4232
4233 return SDValue();
4234}
4235
4236SDValue DAGCombiner::visitSUBE(SDNode *N) {
4237 SDValue N0 = N->getOperand(0);
4238 SDValue N1 = N->getOperand(1);
4239 SDValue CarryIn = N->getOperand(2);
4240
4241 // fold (sube x, y, false) -> (subc x, y)
4242 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4243 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4244
4245 return SDValue();
4246}
4247
4248SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4249 SDValue N0 = N->getOperand(0);
4250 SDValue N1 = N->getOperand(1);
4251 SDValue CarryIn = N->getOperand(2);
4252
4253 // fold (usubo_carry x, y, false) -> (usubo x, y)
4254 if (isNullConstant(CarryIn)) {
4255 if (!LegalOperations ||
4256 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4257 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4258 }
4259
4260 return SDValue();
4261}
4262
4263SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4264 SDValue N0 = N->getOperand(0);
4265 SDValue N1 = N->getOperand(1);
4266 SDValue CarryIn = N->getOperand(2);
4267
4268 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4269 if (isNullConstant(CarryIn)) {
4270 if (!LegalOperations ||
4271 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4272 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4273 }
4274
4275 return SDValue();
4276}
4277
4278// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4279// UMULFIXSAT here.
4280SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4281 SDValue N0 = N->getOperand(0);
4282 SDValue N1 = N->getOperand(1);
4283 SDValue Scale = N->getOperand(2);
4284 EVT VT = N0.getValueType();
4285
4286 // fold (mulfix x, undef, scale) -> 0
4287 if (N0.isUndef() || N1.isUndef())
4288 return DAG.getConstant(0, SDLoc(N), VT);
4289
4290 // Canonicalize constant to RHS (vector doesn't have to splat)
4293 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4294
4295 // fold (mulfix x, 0, scale) -> 0
4296 if (isNullConstant(N1))
4297 return DAG.getConstant(0, SDLoc(N), VT);
4298
4299 return SDValue();
4300}
4301
4302template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4303 SDValue N0 = N->getOperand(0);
4304 SDValue N1 = N->getOperand(1);
4305 EVT VT = N0.getValueType();
4306 unsigned BitWidth = VT.getScalarSizeInBits();
4307 SDLoc DL(N);
4308 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4309 MatchContextClass Matcher(DAG, TLI, N);
4310
4311 // fold (mul x, undef) -> 0
4312 if (N0.isUndef() || N1.isUndef())
4313 return DAG.getConstant(0, DL, VT);
4314
4315 // fold (mul c1, c2) -> c1*c2
4316 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4317 return C;
4318
4319 // canonicalize constant to RHS (vector doesn't have to splat)
4322 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4323
4324 bool N1IsConst = false;
4325 bool N1IsOpaqueConst = false;
4326 APInt ConstValue1;
4327
4328 // fold vector ops
4329 if (VT.isVector()) {
4330 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4331 if (!UseVP)
4332 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4333 return FoldedVOp;
4334
4335 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4336 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4337 "Splat APInt should be element width");
4338 } else {
4339 N1IsConst = isa<ConstantSDNode>(N1);
4340 if (N1IsConst) {
4341 ConstValue1 = N1->getAsAPIntVal();
4342 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4343 }
4344 }
4345
4346 // fold (mul x, 0) -> 0
4347 if (N1IsConst && ConstValue1.isZero())
4348 return N1;
4349
4350 // fold (mul x, 1) -> x
4351 if (N1IsConst && ConstValue1.isOne())
4352 return N0;
4353
4354 if (!UseVP)
4355 if (SDValue NewSel = foldBinOpIntoSelect(N))
4356 return NewSel;
4357
4358 // fold (mul x, -1) -> 0-x
4359 if (N1IsConst && ConstValue1.isAllOnes())
4360 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4361
4362 // fold (mul x, (1 << c)) -> x << c
4363 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4364 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4365 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4366 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4367 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4368 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4369 }
4370 }
4371
4372 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4373 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4374 unsigned Log2Val = (-ConstValue1).logBase2();
4375
4376 // FIXME: If the input is something that is easily negated (e.g. a
4377 // single-use add), we should put the negate there.
4378 return Matcher.getNode(
4379 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4380 Matcher.getNode(ISD::SHL, DL, VT, N0,
4381 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4382 }
4383
4384 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4385 // hi result is in use in case we hit this mid-legalization.
4386 if (!UseVP) {
4387 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4388 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4389 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4390 // TODO: Can we match commutable operands with getNodeIfExists?
4391 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4392 if (LoHi->hasAnyUseOfValue(1))
4393 return SDValue(LoHi, 0);
4394 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4395 if (LoHi->hasAnyUseOfValue(1))
4396 return SDValue(LoHi, 0);
4397 }
4398 }
4399 }
4400
4401 // Try to transform:
4402 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4403 // mul x, (2^N + 1) --> add (shl x, N), x
4404 // mul x, (2^N - 1) --> sub (shl x, N), x
4405 // Examples: x * 33 --> (x << 5) + x
4406 // x * 15 --> (x << 4) - x
4407 // x * -33 --> -((x << 5) + x)
4408 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4409 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4410 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4411 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4412 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4413 // x * 0xf800 --> (x << 16) - (x << 11)
4414 // x * -0x8800 --> -((x << 15) + (x << 11))
4415 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4416 if (!UseVP && N1IsConst &&
4417 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4418 // TODO: We could handle more general decomposition of any constant by
4419 // having the target set a limit on number of ops and making a
4420 // callback to determine that sequence (similar to sqrt expansion).
4421 unsigned MathOp = ISD::DELETED_NODE;
4422 APInt MulC = ConstValue1.abs();
4423 // The constant `2` should be treated as (2^0 + 1).
4424 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4425 MulC.lshrInPlace(TZeros);
4426 if ((MulC - 1).isPowerOf2())
4427 MathOp = ISD::ADD;
4428 else if ((MulC + 1).isPowerOf2())
4429 MathOp = ISD::SUB;
4430
4431 if (MathOp != ISD::DELETED_NODE) {
4432 unsigned ShAmt =
4433 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4434 ShAmt += TZeros;
4435 assert(ShAmt < BitWidth &&
4436 "multiply-by-constant generated out of bounds shift");
4437 SDValue Shl =
4438 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4439 SDValue R =
4440 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4441 DAG.getNode(ISD::SHL, DL, VT, N0,
4442 DAG.getConstant(TZeros, DL, VT)))
4443 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4444 if (ConstValue1.isNegative())
4445 R = DAG.getNegative(R, DL, VT);
4446 return R;
4447 }
4448 }
4449
4450 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4451 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4452 SDValue N01 = N0.getOperand(1);
4453 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4454 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4455 }
4456
4457 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4458 // use.
4459 {
4460 SDValue Sh, Y;
4461
4462 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4463 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4465 Sh = N0; Y = N1;
4466 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4468 Sh = N1; Y = N0;
4469 }
4470
4471 if (Sh.getNode()) {
4472 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4473 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4474 }
4475 }
4476
4477 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4478 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4482 return Matcher.getNode(
4483 ISD::ADD, DL, VT,
4484 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4485 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4486
4487 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4489 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4490 const APInt &C0 = N0.getConstantOperandAPInt(0);
4491 const APInt &C1 = NC1->getAPIntValue();
4492 return DAG.getVScale(DL, VT, C0 * C1);
4493 }
4494
4495 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4496 APInt MulVal;
4497 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4498 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4499 const APInt &C0 = N0.getConstantOperandAPInt(0);
4500 APInt NewStep = C0 * MulVal;
4501 return DAG.getStepVector(DL, VT, NewStep);
4502 }
4503
4504 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4505 SDValue X;
4506 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4508 N, Matcher,
4510 m_Deferred(X)))) {
4511 return Matcher.getNode(ISD::ABS, DL, VT, X);
4512 }
4513
4514 // Fold ((mul x, 0/undef) -> 0,
4515 // (mul x, 1) -> x) -> x)
4516 // -> and(x, mask)
4517 // We can replace vectors with '0' and '1' factors with a clearing mask.
4518 if (VT.isFixedLengthVector()) {
4519 unsigned NumElts = VT.getVectorNumElements();
4520 SmallBitVector ClearMask;
4521 ClearMask.reserve(NumElts);
4522 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4523 if (!V || V->isZero()) {
4524 ClearMask.push_back(true);
4525 return true;
4526 }
4527 ClearMask.push_back(false);
4528 return V->isOne();
4529 };
4530 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4531 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4532 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4533 EVT LegalSVT = N1.getOperand(0).getValueType();
4534 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4535 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4537 for (unsigned I = 0; I != NumElts; ++I)
4538 if (ClearMask[I])
4539 Mask[I] = Zero;
4540 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4541 }
4542 }
4543
4544 // reassociate mul
4545 // TODO: Change reassociateOps to support vp ops.
4546 if (!UseVP)
4547 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4548 return RMUL;
4549
4550 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4551 // TODO: Change reassociateReduction to support vp ops.
4552 if (!UseVP)
4553 if (SDValue SD =
4554 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4555 return SD;
4556
4557 // Simplify the operands using demanded-bits information.
4559 return SDValue(N, 0);
4560
4561 return SDValue();
4562}
4563
4564/// Return true if divmod libcall is available.
4566 const TargetLowering &TLI) {
4567 RTLIB::Libcall LC;
4568 EVT NodeType = Node->getValueType(0);
4569 if (!NodeType.isSimple())
4570 return false;
4571 switch (NodeType.getSimpleVT().SimpleTy) {
4572 default: return false; // No libcall for vector types.
4573 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4574 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4575 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4576 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4577 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4578 }
4579
4580 return TLI.getLibcallName(LC) != nullptr;
4581}
4582
4583/// Issue divrem if both quotient and remainder are needed.
4584SDValue DAGCombiner::useDivRem(SDNode *Node) {
4585 if (Node->use_empty())
4586 return SDValue(); // This is a dead node, leave it alone.
4587
4588 unsigned Opcode = Node->getOpcode();
4589 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4590 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4591
4592 // DivMod lib calls can still work on non-legal types if using lib-calls.
4593 EVT VT = Node->getValueType(0);
4594 if (VT.isVector() || !VT.isInteger())
4595 return SDValue();
4596
4597 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4598 return SDValue();
4599
4600 // If DIVREM is going to get expanded into a libcall,
4601 // but there is no libcall available, then don't combine.
4602 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4604 return SDValue();
4605
4606 // If div is legal, it's better to do the normal expansion
4607 unsigned OtherOpcode = 0;
4608 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4609 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4610 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4611 return SDValue();
4612 } else {
4613 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4614 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4615 return SDValue();
4616 }
4617
4618 SDValue Op0 = Node->getOperand(0);
4619 SDValue Op1 = Node->getOperand(1);
4620 SDValue combined;
4621 for (SDNode *User : Op0->uses()) {
4622 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4623 User->use_empty())
4624 continue;
4625 // Convert the other matching node(s), too;
4626 // otherwise, the DIVREM may get target-legalized into something
4627 // target-specific that we won't be able to recognize.
4628 unsigned UserOpc = User->getOpcode();
4629 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4630 User->getOperand(0) == Op0 &&
4631 User->getOperand(1) == Op1) {
4632 if (!combined) {
4633 if (UserOpc == OtherOpcode) {
4634 SDVTList VTs = DAG.getVTList(VT, VT);
4635 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4636 } else if (UserOpc == DivRemOpc) {
4637 combined = SDValue(User, 0);
4638 } else {
4639 assert(UserOpc == Opcode);
4640 continue;
4641 }
4642 }
4643 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4644 CombineTo(User, combined);
4645 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4646 CombineTo(User, combined.getValue(1));
4647 }
4648 }
4649 return combined;
4650}
4651
4653 SDValue N0 = N->getOperand(0);
4654 SDValue N1 = N->getOperand(1);
4655 EVT VT = N->getValueType(0);
4656 SDLoc DL(N);
4657
4658 unsigned Opc = N->getOpcode();
4659 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4661
4662 // X / undef -> undef
4663 // X % undef -> undef
4664 // X / 0 -> undef
4665 // X % 0 -> undef
4666 // NOTE: This includes vectors where any divisor element is zero/undef.
4667 if (DAG.isUndef(Opc, {N0, N1}))
4668 return DAG.getUNDEF(VT);
4669
4670 // undef / X -> 0
4671 // undef % X -> 0
4672 if (N0.isUndef())
4673 return DAG.getConstant(0, DL, VT);
4674
4675 // 0 / X -> 0
4676 // 0 % X -> 0
4678 if (N0C && N0C->isZero())
4679 return N0;
4680
4681 // X / X -> 1
4682 // X % X -> 0
4683 if (N0 == N1)
4684 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4685
4686 // X / 1 -> X
4687 // X % 1 -> 0
4688 // If this is a boolean op (single-bit element type), we can't have
4689 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4690 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4691 // it's a 1.
4692 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4693 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4694
4695 return SDValue();
4696}
4697
4698SDValue DAGCombiner::visitSDIV(SDNode *N) {
4699 SDValue N0 = N->getOperand(0);
4700 SDValue N1 = N->getOperand(1);
4701 EVT VT = N->getValueType(0);
4702 EVT CCVT = getSetCCResultType(VT);
4703 SDLoc DL(N);
4704
4705 // fold (sdiv c1, c2) -> c1/c2
4706 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4707 return C;
4708
4709 // fold vector ops
4710 if (VT.isVector())
4711 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4712 return FoldedVOp;
4713
4714 // fold (sdiv X, -1) -> 0-X
4716 if (N1C && N1C->isAllOnes())
4717 return DAG.getNegative(N0, DL, VT);
4718
4719 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4720 if (N1C && N1C->isMinSignedValue())
4721 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4722 DAG.getConstant(1, DL, VT),
4723 DAG.getConstant(0, DL, VT));
4724
4725 if (SDValue V = simplifyDivRem(N, DAG))
4726 return V;
4727
4728 if (SDValue NewSel = foldBinOpIntoSelect(N))
4729 return NewSel;
4730
4731 // If we know the sign bits of both operands are zero, strength reduce to a
4732 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4733 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4734 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4735
4736 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4737 // If the corresponding remainder node exists, update its users with
4738 // (Dividend - (Quotient * Divisor).
4739 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4740 { N0, N1 })) {
4741 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4742 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4743 AddToWorklist(Mul.getNode());
4744 AddToWorklist(Sub.getNode());
4745 CombineTo(RemNode, Sub);
4746 }
4747 return V;
4748 }
4749
4750 // sdiv, srem -> sdivrem
4751 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4752 // true. Otherwise, we break the simplification logic in visitREM().
4754 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4755 if (SDValue DivRem = useDivRem(N))
4756 return DivRem;
4757
4758 return SDValue();
4759}
4760
4761static bool isDivisorPowerOfTwo(SDValue Divisor) {
4762 // Helper for determining whether a value is a power-2 constant scalar or a
4763 // vector of such elements.
4764 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4765 if (C->isZero() || C->isOpaque())
4766 return false;
4767 if (C->getAPIntValue().isPowerOf2())
4768 return true;
4769 if (C->getAPIntValue().isNegatedPowerOf2())
4770 return true;
4771 return false;
4772 };
4773
4774 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4775}
4776
4777SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4778 SDLoc DL(N);
4779 EVT VT = N->getValueType(0);
4780 EVT CCVT = getSetCCResultType(VT);
4781 unsigned BitWidth = VT.getScalarSizeInBits();
4782
4783 // fold (sdiv X, pow2) -> simple ops after legalize
4784 // FIXME: We check for the exact bit here because the generic lowering gives
4785 // better results in that case. The target-specific lowering should learn how
4786 // to handle exact sdivs efficiently.
4787 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4788 // Target-specific implementation of sdiv x, pow2.
4789 if (SDValue Res = BuildSDIVPow2(N))
4790 return Res;
4791
4792 // Create constants that are functions of the shift amount value.
4793 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4794 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4795 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4796 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4797 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4798 if (!isConstantOrConstantVector(Inexact))
4799 return SDValue();
4800
4801 // Splat the sign bit into the register
4802 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4803 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4804 AddToWorklist(Sign.getNode());
4805
4806 // Add (N0 < 0) ? abs2 - 1 : 0;
4807 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4808 AddToWorklist(Srl.getNode());
4809 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4810 AddToWorklist(Add.getNode());
4811 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4812 AddToWorklist(Sra.getNode());
4813
4814 // Special case: (sdiv X, 1) -> X
4815 // Special Case: (sdiv X, -1) -> 0-X
4816 SDValue One = DAG.getConstant(1, DL, VT);
4818 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4819 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4820 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4821 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4822
4823 // If dividing by a positive value, we're done. Otherwise, the result must
4824 // be negated.
4825 SDValue Zero = DAG.getConstant(0, DL, VT);
4826 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4827
4828 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4829 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4830 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4831 return Res;
4832 }
4833
4834 // If integer divide is expensive and we satisfy the requirements, emit an
4835 // alternate sequence. Targets may check function attributes for size/speed
4836 // trade-offs.
4839 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4840 if (SDValue Op = BuildSDIV(N))
4841 return Op;
4842
4843 return SDValue();
4844}
4845
4846SDValue DAGCombiner::visitUDIV(SDNode *N) {
4847 SDValue N0 = N->getOperand(0);
4848 SDValue N1 = N->getOperand(1);
4849 EVT VT = N->getValueType(0);
4850 EVT CCVT = getSetCCResultType(VT);
4851 SDLoc DL(N);
4852
4853 // fold (udiv c1, c2) -> c1/c2
4854 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4855 return C;
4856
4857 // fold vector ops
4858 if (VT.isVector())
4859 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4860 return FoldedVOp;
4861
4862 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4864 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4865 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4866 DAG.getConstant(1, DL, VT),
4867 DAG.getConstant(0, DL, VT));
4868 }
4869
4870 if (SDValue V = simplifyDivRem(N, DAG))
4871 return V;
4872
4873 if (SDValue NewSel = foldBinOpIntoSelect(N))
4874 return NewSel;
4875
4876 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4877 // If the corresponding remainder node exists, update its users with
4878 // (Dividend - (Quotient * Divisor).
4879 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4880 { N0, N1 })) {
4881 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4882 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4883 AddToWorklist(Mul.getNode());
4884 AddToWorklist(Sub.getNode());
4885 CombineTo(RemNode, Sub);
4886 }
4887 return V;
4888 }
4889
4890 // sdiv, srem -> sdivrem
4891 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4892 // true. Otherwise, we break the simplification logic in visitREM().
4894 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4895 if (SDValue DivRem = useDivRem(N))
4896 return DivRem;
4897
4898 return SDValue();
4899}
4900
4901SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4902 SDLoc DL(N);
4903 EVT VT = N->getValueType(0);
4904
4905 // fold (udiv x, (1 << c)) -> x >>u c
4906 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4907 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4908 AddToWorklist(LogBase2.getNode());
4909
4910 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4911 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4912 AddToWorklist(Trunc.getNode());
4913 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4914 }
4915 }
4916
4917 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4918 if (N1.getOpcode() == ISD::SHL) {
4919 SDValue N10 = N1.getOperand(0);
4920 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4921 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4922 AddToWorklist(LogBase2.getNode());
4923
4924 EVT ADDVT = N1.getOperand(1).getValueType();
4925 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4926 AddToWorklist(Trunc.getNode());
4927 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4928 AddToWorklist(Add.getNode());
4929 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4930 }
4931 }
4932 }
4933
4934 // fold (udiv x, c) -> alternate
4937 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4938 if (SDValue Op = BuildUDIV(N))
4939 return Op;
4940
4941 return SDValue();
4942}
4943
4944SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4945 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4946 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4947 // Target-specific implementation of srem x, pow2.
4948 if (SDValue Res = BuildSREMPow2(N))
4949 return Res;
4950 }
4951 return SDValue();
4952}
4953
4954// handles ISD::SREM and ISD::UREM
4955SDValue DAGCombiner::visitREM(SDNode *N) {
4956 unsigned Opcode = N->getOpcode();
4957 SDValue N0 = N->getOperand(0);
4958 SDValue N1 = N->getOperand(1);
4959 EVT VT = N->getValueType(0);
4960 EVT CCVT = getSetCCResultType(VT);
4961
4962 bool isSigned = (Opcode == ISD::SREM);
4963 SDLoc DL(N);
4964
4965 // fold (rem c1, c2) -> c1%c2
4966 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4967 return C;
4968
4969 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4970 // Freeze the numerator to avoid a miscompile with an undefined value.
4971 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4972 CCVT.isVector() == VT.isVector()) {
4973 SDValue F0 = DAG.getFreeze(N0);
4974 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4975 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4976 }
4977
4978 if (SDValue V = simplifyDivRem(N, DAG))
4979 return V;
4980
4981 if (SDValue NewSel = foldBinOpIntoSelect(N))
4982 return NewSel;
4983
4984 if (isSigned) {
4985 // If we know the sign bits of both operands are zero, strength reduce to a
4986 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4987 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4988 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4989 } else {
4990 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4991 // fold (urem x, pow2) -> (and x, pow2-1)
4992 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4993 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4994 AddToWorklist(Add.getNode());
4995 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4996 }
4997 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4998 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4999 // TODO: We should sink the following into isKnownToBePowerOfTwo
5000 // using a OrZero parameter analogous to our handling in ValueTracking.
5001 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5003 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5004 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5005 AddToWorklist(Add.getNode());
5006 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5007 }
5008 }
5009
5011
5012 // If X/C can be simplified by the division-by-constant logic, lower
5013 // X%C to the equivalent of X-X/C*C.
5014 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5015 // speculative DIV must not cause a DIVREM conversion. We guard against this
5016 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5017 // combine will not return a DIVREM. Regardless, checking cheapness here
5018 // makes sense since the simplification results in fatter code.
5019 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5020 if (isSigned) {
5021 // check if we can build faster implementation for srem
5022 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5023 return OptimizedRem;
5024 }
5025
5026 SDValue OptimizedDiv =
5027 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5028 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5029 // If the equivalent Div node also exists, update its users.
5030 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5031 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5032 { N0, N1 }))
5033 CombineTo(DivNode, OptimizedDiv);
5034 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5035 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5036 AddToWorklist(OptimizedDiv.getNode());
5037 AddToWorklist(Mul.getNode());
5038 return Sub;
5039 }
5040 }
5041
5042 // sdiv, srem -> sdivrem
5043 if (SDValue DivRem = useDivRem(N))
5044 return DivRem.getValue(1);
5045
5046 return SDValue();
5047}
5048
5049SDValue DAGCombiner::visitMULHS(SDNode *N) {
5050 SDValue N0 = N->getOperand(0);
5051 SDValue N1 = N->getOperand(1);
5052 EVT VT = N->getValueType(0);
5053 SDLoc DL(N);
5054
5055 // fold (mulhs c1, c2)
5056 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5057 return C;
5058
5059 // canonicalize constant to RHS.
5062 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5063
5064 if (VT.isVector()) {
5065 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5066 return FoldedVOp;
5067
5068 // fold (mulhs x, 0) -> 0
5069 // do not return N1, because undef node may exist.
5071 return DAG.getConstant(0, DL, VT);
5072 }
5073
5074 // fold (mulhs x, 0) -> 0
5075 if (isNullConstant(N1))
5076 return N1;
5077
5078 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5079 if (isOneConstant(N1))
5080 return DAG.getNode(
5081 ISD::SRA, DL, VT, N0,
5083
5084 // fold (mulhs x, undef) -> 0
5085 if (N0.isUndef() || N1.isUndef())
5086 return DAG.getConstant(0, DL, VT);
5087
5088 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5089 // plus a shift.
5090 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5091 !VT.isVector()) {
5092 MVT Simple = VT.getSimpleVT();
5093 unsigned SimpleSize = Simple.getSizeInBits();
5094 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5095 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5096 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5097 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5098 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5099 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5100 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5101 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5102 }
5103 }
5104
5105 return SDValue();
5106}
5107
5108SDValue DAGCombiner::visitMULHU(SDNode *N) {
5109 SDValue N0 = N->getOperand(0);
5110 SDValue N1 = N->getOperand(1);
5111 EVT VT = N->getValueType(0);
5112 SDLoc DL(N);
5113
5114 // fold (mulhu c1, c2)
5115 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5116 return C;
5117
5118 // canonicalize constant to RHS.
5121 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5122
5123 if (VT.isVector()) {
5124 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5125 return FoldedVOp;
5126
5127 // fold (mulhu x, 0) -> 0
5128 // do not return N1, because undef node may exist.
5130 return DAG.getConstant(0, DL, VT);
5131 }
5132
5133 // fold (mulhu x, 0) -> 0
5134 if (isNullConstant(N1))
5135 return N1;
5136
5137 // fold (mulhu x, 1) -> 0
5138 if (isOneConstant(N1))
5139 return DAG.getConstant(0, DL, VT);
5140
5141 // fold (mulhu x, undef) -> 0
5142 if (N0.isUndef() || N1.isUndef())
5143 return DAG.getConstant(0, DL, VT);
5144
5145 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5146 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5147 hasOperation(ISD::SRL, VT)) {
5148 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5149 unsigned NumEltBits = VT.getScalarSizeInBits();
5150 SDValue SRLAmt = DAG.getNode(
5151 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5152 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5153 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5154 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5155 }
5156 }
5157
5158 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5159 // plus a shift.
5160 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5161 !VT.isVector()) {
5162 MVT Simple = VT.getSimpleVT();
5163 unsigned SimpleSize = Simple.getSizeInBits();
5164 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5165 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5166 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5167 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5168 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5169 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5170 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5171 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5172 }
5173 }
5174
5175 // Simplify the operands using demanded-bits information.
5176 // We don't have demanded bits support for MULHU so this just enables constant
5177 // folding based on known bits.
5179 return SDValue(N, 0);
5180
5181 return SDValue();
5182}
5183
5184SDValue DAGCombiner::visitAVG(SDNode *N) {
5185 unsigned Opcode = N->getOpcode();
5186 SDValue N0 = N->getOperand(0);
5187 SDValue N1 = N->getOperand(1);
5188 EVT VT = N->getValueType(0);
5189 SDLoc DL(N);
5190 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5191
5192 // fold (avg c1, c2)
5193 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5194 return C;
5195
5196 // canonicalize constant to RHS.
5199 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5200
5201 if (VT.isVector())
5202 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5203 return FoldedVOp;
5204
5205 // fold (avg x, undef) -> x
5206 if (N0.isUndef())
5207 return N1;
5208 if (N1.isUndef())
5209 return N0;
5210
5211 // fold (avg x, x) --> x
5212 if (N0 == N1 && Level >= AfterLegalizeTypes)
5213 return N0;
5214
5215 // fold (avgfloor x, 0) -> x >> 1
5216 SDValue X, Y;
5218 return DAG.getNode(ISD::SRA, DL, VT, X,
5219 DAG.getShiftAmountConstant(1, VT, DL));
5221 return DAG.getNode(ISD::SRL, DL, VT, X,
5222 DAG.getShiftAmountConstant(1, VT, DL));
5223
5224 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5225 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5226 if (!IsSigned &&
5227 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5228 X.getValueType() == Y.getValueType() &&
5229 hasOperation(Opcode, X.getValueType())) {
5230 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5231 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5232 }
5233 if (IsSigned &&
5234 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5235 X.getValueType() == Y.getValueType() &&
5236 hasOperation(Opcode, X.getValueType())) {
5237 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5238 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5239 }
5240
5241 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5242 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5243 // Check if avgflooru isn't legal/custom but avgceilu is.
5244 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5245 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5246 if (DAG.isKnownNeverZero(N1))
5247 return DAG.getNode(
5248 ISD::AVGCEILU, DL, VT, N0,
5249 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5250 if (DAG.isKnownNeverZero(N0))
5251 return DAG.getNode(
5252 ISD::AVGCEILU, DL, VT, N1,
5253 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5254 }
5255
5256 return SDValue();
5257}
5258
5259SDValue DAGCombiner::visitABD(SDNode *N) {
5260 unsigned Opcode = N->getOpcode();
5261 SDValue N0 = N->getOperand(0);
5262 SDValue N1 = N->getOperand(1);
5263 EVT VT = N->getValueType(0);
5264 SDLoc DL(N);
5265
5266 // fold (abd c1, c2)
5267 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5268 return C;
5269
5270 // canonicalize constant to RHS.
5273 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5274
5275 if (VT.isVector())
5276 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5277 return FoldedVOp;
5278
5279 // fold (abd x, undef) -> 0
5280 if (N0.isUndef() || N1.isUndef())
5281 return DAG.getConstant(0, DL, VT);
5282
5283 SDValue X;
5284
5285 // fold (abds x, 0) -> abs x
5287 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5288 return DAG.getNode(ISD::ABS, DL, VT, X);
5289
5290 // fold (abdu x, 0) -> x
5292 return X;
5293
5294 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5295 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5296 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5297 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5298
5299 return SDValue();
5300}
5301
5302/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5303/// give the opcodes for the two computations that are being performed. Return
5304/// true if a simplification was made.
5305SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5306 unsigned HiOp) {
5307 // If the high half is not needed, just compute the low half.
5308 bool HiExists = N->hasAnyUseOfValue(1);
5309 if (!HiExists && (!LegalOperations ||
5310 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5311 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5312 return CombineTo(N, Res, Res);
5313 }
5314
5315 // If the low half is not needed, just compute the high half.
5316 bool LoExists = N->hasAnyUseOfValue(0);
5317 if (!LoExists && (!LegalOperations ||
5318 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5319 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5320 return CombineTo(N, Res, Res);
5321 }
5322
5323 // If both halves are used, return as it is.
5324 if (LoExists && HiExists)
5325 return SDValue();
5326
5327 // If the two computed results can be simplified separately, separate them.
5328 if (LoExists) {
5329 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5330 AddToWorklist(Lo.getNode());
5331 SDValue LoOpt = combine(Lo.getNode());
5332 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5333 (!LegalOperations ||
5334 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5335 return CombineTo(N, LoOpt, LoOpt);
5336 }
5337
5338 if (HiExists) {
5339 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5340 AddToWorklist(Hi.getNode());
5341 SDValue HiOpt = combine(Hi.getNode());
5342 if (HiOpt.getNode() && HiOpt != Hi &&
5343 (!LegalOperations ||
5344 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5345 return CombineTo(N, HiOpt, HiOpt);
5346 }
5347
5348 return SDValue();
5349}
5350
5351SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5352 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5353 return Res;
5354
5355 SDValue N0 = N->getOperand(0);
5356 SDValue N1 = N->getOperand(1);
5357 EVT VT = N->getValueType(0);
5358 SDLoc DL(N);
5359
5360 // Constant fold.
5361 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5362 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5363
5364 // canonicalize constant to RHS (vector doesn't have to splat)
5367 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5368
5369 // If the type is twice as wide is legal, transform the mulhu to a wider
5370 // multiply plus a shift.
5371 if (VT.isSimple() && !VT.isVector()) {
5372 MVT Simple = VT.getSimpleVT();
5373 unsigned SimpleSize = Simple.getSizeInBits();
5374 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5375 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5376 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5377 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5378 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5379 // Compute the high part as N1.
5380 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5381 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5382 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5383 // Compute the low part as N0.
5384 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5385 return CombineTo(N, Lo, Hi);
5386 }
5387 }
5388
5389 return SDValue();
5390}
5391
5392SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5393 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5394 return Res;
5395
5396 SDValue N0 = N->getOperand(0);
5397 SDValue N1 = N->getOperand(1);
5398 EVT VT = N->getValueType(0);
5399 SDLoc DL(N);
5400
5401 // Constant fold.
5402 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5403 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5404
5405 // canonicalize constant to RHS (vector doesn't have to splat)
5408 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5409
5410 // (umul_lohi N0, 0) -> (0, 0)
5411 if (isNullConstant(N1)) {
5412 SDValue Zero = DAG.getConstant(0, DL, VT);
5413 return CombineTo(N, Zero, Zero);
5414 }
5415
5416 // (umul_lohi N0, 1) -> (N0, 0)
5417 if (isOneConstant(N1)) {
5418 SDValue Zero = DAG.getConstant(0, DL, VT);
5419 return CombineTo(N, N0, Zero);
5420 }
5421
5422 // If the type is twice as wide is legal, transform the mulhu to a wider
5423 // multiply plus a shift.
5424 if (VT.isSimple() && !VT.isVector()) {
5425 MVT Simple = VT.getSimpleVT();
5426 unsigned SimpleSize = Simple.getSizeInBits();
5427 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5428 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5429 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5430 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5431 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5432 // Compute the high part as N1.
5433 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5434 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5435 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5436 // Compute the low part as N0.
5437 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5438 return CombineTo(N, Lo, Hi);
5439 }
5440 }
5441
5442 return SDValue();
5443}
5444
5445SDValue DAGCombiner::visitMULO(SDNode *N) {
5446 SDValue N0 = N->getOperand(0);
5447 SDValue N1 = N->getOperand(1);
5448 EVT VT = N0.getValueType();
5449 bool IsSigned = (ISD::SMULO == N->getOpcode());
5450
5451 EVT CarryVT = N->getValueType(1);
5452 SDLoc DL(N);
5453
5456
5457 // fold operation with constant operands.
5458 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5459 // multiple results.
5460 if (N0C && N1C) {
5461 bool Overflow;
5462 APInt Result =
5463 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5464 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5465 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5466 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5467 }
5468
5469 // canonicalize constant to RHS.
5472 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5473
5474 // fold (mulo x, 0) -> 0 + no carry out
5475 if (isNullOrNullSplat(N1))
5476 return CombineTo(N, DAG.getConstant(0, DL, VT),
5477 DAG.getConstant(0, DL, CarryVT));
5478
5479 // (mulo x, 2) -> (addo x, x)
5480 // FIXME: This needs a freeze.
5481 if (N1C && N1C->getAPIntValue() == 2 &&
5482 (!IsSigned || VT.getScalarSizeInBits() > 2))
5483 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5484 N->getVTList(), N0, N0);
5485
5486 // A 1 bit SMULO overflows if both inputs are 1.
5487 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5488 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5489 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5490 DAG.getConstant(0, DL, VT), ISD::SETNE);
5491 return CombineTo(N, And, Cmp);
5492 }
5493
5494 // If it cannot overflow, transform into a mul.
5495 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5496 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5497 DAG.getConstant(0, DL, CarryVT));
5498 return SDValue();
5499}
5500
5501// Function to calculate whether the Min/Max pair of SDNodes (potentially
5502// swapped around) make a signed saturate pattern, clamping to between a signed
5503// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5504// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5505// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5506// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5508 SDValue N3, ISD::CondCode CC, unsigned &BW,
5509 bool &Unsigned, SelectionDAG &DAG) {
5510 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5511 ISD::CondCode CC) {
5512 // The compare and select operand should be the same or the select operands
5513 // should be truncated versions of the comparison.
5514 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5515 return 0;
5516 // The constants need to be the same or a truncated version of each other.
5519 if (!N1C || !N3C)
5520 return 0;
5521 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5522 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5523 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5524 return 0;
5525 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5526 };
5527
5528 // Check the initial value is a SMIN/SMAX equivalent.
5529 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5530 if (!Opcode0)
5531 return SDValue();
5532
5533 // We could only need one range check, if the fptosi could never produce
5534 // the upper value.
5535 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5536 if (isNullOrNullSplat(N3)) {
5537 EVT IntVT = N0.getValueType().getScalarType();
5538 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5539 if (FPVT.isSimple()) {
5540 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5541 const fltSemantics &Semantics = InputTy->getFltSemantics();
5542 uint32_t MinBitWidth =
5543 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5544 if (IntVT.getSizeInBits() >= MinBitWidth) {
5545 Unsigned = true;
5546 BW = PowerOf2Ceil(MinBitWidth);
5547 return N0;
5548 }
5549 }
5550 }
5551 }
5552
5553 SDValue N00, N01, N02, N03;
5554 ISD::CondCode N0CC;
5555 switch (N0.getOpcode()) {
5556 case ISD::SMIN:
5557 case ISD::SMAX:
5558 N00 = N02 = N0.getOperand(0);
5559 N01 = N03 = N0.getOperand(1);
5560 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5561 break;
5562 case ISD::SELECT_CC:
5563 N00 = N0.getOperand(0);
5564 N01 = N0.getOperand(1);
5565 N02 = N0.getOperand(2);
5566 N03 = N0.getOperand(3);
5567 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5568 break;
5569 case ISD::SELECT:
5570 case ISD::VSELECT:
5571 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5572 return SDValue();
5573 N00 = N0.getOperand(0).getOperand(0);
5574 N01 = N0.getOperand(0).getOperand(1);
5575 N02 = N0.getOperand(1);
5576 N03 = N0.getOperand(2);
5577 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5578 break;
5579 default:
5580 return SDValue();
5581 }
5582
5583 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5584 if (!Opcode1 || Opcode0 == Opcode1)
5585 return SDValue();
5586
5587 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5588 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5589 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5590 return SDValue();
5591
5592 const APInt &MinC = MinCOp->getAPIntValue();
5593 const APInt &MaxC = MaxCOp->getAPIntValue();
5594 APInt MinCPlus1 = MinC + 1;
5595 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5596 BW = MinCPlus1.exactLogBase2() + 1;
5597 Unsigned = false;
5598 return N02;
5599 }
5600
5601 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5602 BW = MinCPlus1.exactLogBase2();
5603 Unsigned = true;
5604 return N02;
5605 }
5606
5607 return SDValue();
5608}
5609
5612 SelectionDAG &DAG) {
5613 unsigned BW;
5614 bool Unsigned;
5615 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5616 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5617 return SDValue();
5618 EVT FPVT = Fp.getOperand(0).getValueType();
5619 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5620 if (FPVT.isVector())
5621 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5622 FPVT.getVectorElementCount());
5623 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5624 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5625 return SDValue();
5626 SDLoc DL(Fp);
5627 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5628 DAG.getValueType(NewVT.getScalarType()));
5629 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5630}
5631
5634 SelectionDAG &DAG) {
5635 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5636 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5637 // be truncated versions of the setcc (N0/N1).
5638 if ((N0 != N2 &&
5639 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5641 return SDValue();
5644 if (!N1C || !N3C)
5645 return SDValue();
5646 const APInt &C1 = N1C->getAPIntValue();
5647 const APInt &C3 = N3C->getAPIntValue();
5648 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5649 C1 != C3.zext(C1.getBitWidth()))
5650 return SDValue();
5651
5652 unsigned BW = (C1 + 1).exactLogBase2();
5653 EVT FPVT = N0.getOperand(0).getValueType();
5654 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5655 if (FPVT.isVector())
5656 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5657 FPVT.getVectorElementCount());
5659 FPVT, NewVT))
5660 return SDValue();
5661
5662 SDValue Sat =
5663 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5664 DAG.getValueType(NewVT.getScalarType()));
5665 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5666}
5667
5668SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5669 SDValue N0 = N->getOperand(0);
5670 SDValue N1 = N->getOperand(1);
5671 EVT VT = N0.getValueType();
5672 unsigned Opcode = N->getOpcode();
5673 SDLoc DL(N);
5674
5675 // fold operation with constant operands.
5676 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5677 return C;
5678
5679 // If the operands are the same, this is a no-op.
5680 if (N0 == N1)
5681 return N0;
5682
5683 // canonicalize constant to RHS
5686 return DAG.getNode(Opcode, DL, VT, N1, N0);
5687
5688 // fold vector ops
5689 if (VT.isVector())
5690 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5691 return FoldedVOp;
5692
5693 // reassociate minmax
5694 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5695 return RMINMAX;
5696
5697 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5698 // Only do this if:
5699 // 1. The current op isn't legal and the flipped is.
5700 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5701 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5702 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5703 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5704 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5705 unsigned AltOpcode;
5706 switch (Opcode) {
5707 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5708 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5709 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5710 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5711 default: llvm_unreachable("Unknown MINMAX opcode");
5712 }
5713 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5714 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5715 }
5716
5717 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5719 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5720 return S;
5721 if (Opcode == ISD::UMIN)
5722 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5723 return S;
5724
5725 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5726 auto ReductionOpcode = [](unsigned Opcode) {
5727 switch (Opcode) {
5728 case ISD::SMIN:
5729 return ISD::VECREDUCE_SMIN;
5730 case ISD::SMAX:
5731 return ISD::VECREDUCE_SMAX;
5732 case ISD::UMIN:
5733 return ISD::VECREDUCE_UMIN;
5734 case ISD::UMAX:
5735 return ISD::VECREDUCE_UMAX;
5736 default:
5737 llvm_unreachable("Unexpected opcode");
5738 }
5739 };
5740 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5741 SDLoc(N), VT, N0, N1))
5742 return SD;
5743
5744 // Simplify the operands using demanded-bits information.
5746 return SDValue(N, 0);
5747
5748 return SDValue();
5749}
5750
5751/// If this is a bitwise logic instruction and both operands have the same
5752/// opcode, try to sink the other opcode after the logic instruction.
5753SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5754 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5755 EVT VT = N0.getValueType();
5756 unsigned LogicOpcode = N->getOpcode();
5757 unsigned HandOpcode = N0.getOpcode();
5758 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5759 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5760
5761 // Bail early if none of these transforms apply.
5762 if (N0.getNumOperands() == 0)
5763 return SDValue();
5764
5765 // FIXME: We should check number of uses of the operands to not increase
5766 // the instruction count for all transforms.
5767
5768 // Handle size-changing casts (or sign_extend_inreg).
5769 SDValue X = N0.getOperand(0);
5770 SDValue Y = N1.getOperand(0);
5771 EVT XVT = X.getValueType();
5772 SDLoc DL(N);
5773 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5774 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5775 N0.getOperand(1) == N1.getOperand(1))) {
5776 // If both operands have other uses, this transform would create extra
5777 // instructions without eliminating anything.
5778 if (!N0.hasOneUse() && !N1.hasOneUse())
5779 return SDValue();
5780 // We need matching integer source types.
5781 if (XVT != Y.getValueType())
5782 return SDValue();
5783 // Don't create an illegal op during or after legalization. Don't ever
5784 // create an unsupported vector op.
5785 if ((VT.isVector() || LegalOperations) &&
5786 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5787 return SDValue();
5788 // Avoid infinite looping with PromoteIntBinOp.
5789 // TODO: Should we apply desirable/legal constraints to all opcodes?
5790 if ((HandOpcode == ISD::ANY_EXTEND ||
5791 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5792 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5793 return SDValue();
5794 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5795 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5796 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5797 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5798 return DAG.getNode(HandOpcode, DL, VT, Logic);
5799 }
5800
5801 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5802 if (HandOpcode == ISD::TRUNCATE) {
5803 // If both operands have other uses, this transform would create extra
5804 // instructions without eliminating anything.
5805 if (!N0.hasOneUse() && !N1.hasOneUse())
5806 return SDValue();
5807 // We need matching source types.
5808 if (XVT != Y.getValueType())
5809 return SDValue();
5810 // Don't create an illegal op during or after legalization.
5811 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5812 return SDValue();
5813 // Be extra careful sinking truncate. If it's free, there's no benefit in
5814 // widening a binop. Also, don't create a logic op on an illegal type.
5815 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5816 return SDValue();
5817 if (!TLI.isTypeLegal(XVT))
5818 return SDValue();
5819 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5820 return DAG.getNode(HandOpcode, DL, VT, Logic);
5821 }
5822
5823 // For binops SHL/SRL/SRA/AND:
5824 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5825 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5826 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5827 N0.getOperand(1) == N1.getOperand(1)) {
5828 // If either operand has other uses, this transform is not an improvement.
5829 if (!N0.hasOneUse() || !N1.hasOneUse())
5830 return SDValue();
5831 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5832 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5833 }
5834
5835 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5836 if (HandOpcode == ISD::BSWAP) {
5837 // If either operand has other uses, this transform is not an improvement.
5838 if (!N0.hasOneUse() || !N1.hasOneUse())
5839 return SDValue();
5840 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5841 return DAG.getNode(HandOpcode, DL, VT, Logic);
5842 }
5843
5844 // For funnel shifts FSHL/FSHR:
5845 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5846 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5847 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5848 N0.getOperand(2) == N1.getOperand(2)) {
5849 if (!N0.hasOneUse() || !N1.hasOneUse())
5850 return SDValue();
5851 SDValue X1 = N0.getOperand(1);
5852 SDValue Y1 = N1.getOperand(1);
5853 SDValue S = N0.getOperand(2);
5854 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5855 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5856 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5857 }
5858
5859 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5860 // Only perform this optimization up until type legalization, before
5861 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5862 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5863 // we don't want to undo this promotion.
5864 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5865 // on scalars.
5866 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5867 Level <= AfterLegalizeTypes) {
5868 // Input types must be integer and the same.
5869 if (XVT.isInteger() && XVT == Y.getValueType() &&
5870 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5871 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5872 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5873 return DAG.getNode(HandOpcode, DL, VT, Logic);
5874 }
5875 }
5876
5877 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5878 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5879 // If both shuffles use the same mask, and both shuffle within a single
5880 // vector, then it is worthwhile to move the swizzle after the operation.
5881 // The type-legalizer generates this pattern when loading illegal
5882 // vector types from memory. In many cases this allows additional shuffle
5883 // optimizations.
5884 // There are other cases where moving the shuffle after the xor/and/or
5885 // is profitable even if shuffles don't perform a swizzle.
5886 // If both shuffles use the same mask, and both shuffles have the same first
5887 // or second operand, then it might still be profitable to move the shuffle
5888 // after the xor/and/or operation.
5889 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5890 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5891 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5892 assert(X.getValueType() == Y.getValueType() &&
5893 "Inputs to shuffles are not the same type");
5894
5895 // Check that both shuffles use the same mask. The masks are known to be of
5896 // the same length because the result vector type is the same.
5897 // Check also that shuffles have only one use to avoid introducing extra
5898 // instructions.
5899 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5900 !SVN0->getMask().equals(SVN1->getMask()))
5901 return SDValue();
5902
5903 // Don't try to fold this node if it requires introducing a
5904 // build vector of all zeros that might be illegal at this stage.
5905 SDValue ShOp = N0.getOperand(1);
5906 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5907 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5908
5909 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5910 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5911 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5912 N0.getOperand(0), N1.getOperand(0));
5913 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5914 }
5915
5916 // Don't try to fold this node if it requires introducing a
5917 // build vector of all zeros that might be illegal at this stage.
5918 ShOp = N0.getOperand(0);
5919 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5920 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5921
5922 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5923 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5924 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5925 N1.getOperand(1));
5926 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5927 }
5928 }
5929
5930 return SDValue();
5931}
5932
5933/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5934SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5935 const SDLoc &DL) {
5936 SDValue LL, LR, RL, RR, N0CC, N1CC;
5937 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5938 !isSetCCEquivalent(N1, RL, RR, N1CC))
5939 return SDValue();
5940
5941 assert(N0.getValueType() == N1.getValueType() &&
5942 "Unexpected operand types for bitwise logic op");
5943 assert(LL.getValueType() == LR.getValueType() &&
5944 RL.getValueType() == RR.getValueType() &&
5945 "Unexpected operand types for setcc");
5946
5947 // If we're here post-legalization or the logic op type is not i1, the logic
5948 // op type must match a setcc result type. Also, all folds require new
5949 // operations on the left and right operands, so those types must match.
5950 EVT VT = N0.getValueType();
5951 EVT OpVT = LL.getValueType();
5952 if (LegalOperations || VT.getScalarType() != MVT::i1)
5953 if (VT != getSetCCResultType(OpVT))
5954 return SDValue();
5955 if (OpVT != RL.getValueType())
5956 return SDValue();
5957
5958 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5959 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5960 bool IsInteger = OpVT.isInteger();
5961 if (LR == RR && CC0 == CC1 && IsInteger) {
5962 bool IsZero = isNullOrNullSplat(LR);
5963 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5964
5965 // All bits clear?
5966 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5967 // All sign bits clear?
5968 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5969 // Any bits set?
5970 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5971 // Any sign bits set?
5972 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5973
5974 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5975 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5976 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5977 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5978 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5979 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5980 AddToWorklist(Or.getNode());
5981 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5982 }
5983
5984 // All bits set?
5985 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5986 // All sign bits set?
5987 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5988 // Any bits clear?
5989 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5990 // Any sign bits clear?
5991 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5992
5993 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5994 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5995 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5996 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5997 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5998 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5999 AddToWorklist(And.getNode());
6000 return DAG.getSetCC(DL, VT, And, LR, CC1);
6001 }
6002 }
6003
6004 // TODO: What is the 'or' equivalent of this fold?
6005 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6006 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6007 IsInteger && CC0 == ISD::SETNE &&
6008 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6009 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6010 SDValue One = DAG.getConstant(1, DL, OpVT);
6011 SDValue Two = DAG.getConstant(2, DL, OpVT);
6012 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6013 AddToWorklist(Add.getNode());
6014 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6015 }
6016
6017 // Try more general transforms if the predicates match and the only user of
6018 // the compares is the 'and' or 'or'.
6019 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6020 N0.hasOneUse() && N1.hasOneUse()) {
6021 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6022 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6023 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6024 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6025 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6026 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6027 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6028 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6029 }
6030
6031 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6032 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6033 // Match a shared variable operand and 2 non-opaque constant operands.
6034 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6035 // The difference of the constants must be a single bit.
6036 const APInt &CMax =
6037 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6038 const APInt &CMin =
6039 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6040 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6041 };
6042 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6043 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6044 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6045 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6046 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6047 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6048 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6049 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6050 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6051 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6052 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6053 }
6054 }
6055 }
6056
6057 // Canonicalize equivalent operands to LL == RL.
6058 if (LL == RR && LR == RL) {
6060 std::swap(RL, RR);
6061 }
6062
6063 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6064 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6065 if (LL == RL && LR == RR) {
6066 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6067 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6068 if (NewCC != ISD::SETCC_INVALID &&
6069 (!LegalOperations ||
6070 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6071 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6072 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6073 }
6074
6075 return SDValue();
6076}
6077
6078static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6079 SelectionDAG &DAG) {
6080 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6081}
6082
6083static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6084 SelectionDAG &DAG) {
6085 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6086}
6087
6088static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6089 ISD::CondCode CC, unsigned OrAndOpcode,
6090 SelectionDAG &DAG,
6091 bool isFMAXNUMFMINNUM_IEEE,
6092 bool isFMAXNUMFMINNUM) {
6093 // The optimization cannot be applied for all the predicates because
6094 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6095 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6096 // applied at all if one of the operands is a signaling NaN.
6097
6098 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6099 // are non NaN values.
6100 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6101 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6102 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6103 isFMAXNUMFMINNUM_IEEE
6106 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6107 (OrAndOpcode == ISD::OR)) ||
6108 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6109 (OrAndOpcode == ISD::AND)))
6110 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6111 isFMAXNUMFMINNUM_IEEE
6114 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6115 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6116 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6117 // that there are not any sNaNs, then the optimization is not valid
6118 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6119 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6120 // we can prove that we do not have any sNaNs, then we can do the
6121 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6122 // cases.
6123 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6124 (OrAndOpcode == ISD::OR)) ||
6125 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6126 (OrAndOpcode == ISD::AND)))
6127 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6128 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6129 isFMAXNUMFMINNUM_IEEE
6132 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6133 (OrAndOpcode == ISD::OR)) ||
6134 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6135 (OrAndOpcode == ISD::AND)))
6136 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6137 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6138 isFMAXNUMFMINNUM_IEEE
6141 return ISD::DELETED_NODE;
6142}
6143
6146 assert(
6147 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6148 "Invalid Op to combine SETCC with");
6149
6150 // TODO: Search past casts/truncates.
6151 SDValue LHS = LogicOp->getOperand(0);
6152 SDValue RHS = LogicOp->getOperand(1);
6153 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6154 !LHS->hasOneUse() || !RHS->hasOneUse())
6155 return SDValue();
6156
6157 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6159 LogicOp, LHS.getNode(), RHS.getNode());
6160
6161 SDValue LHS0 = LHS->getOperand(0);
6162 SDValue RHS0 = RHS->getOperand(0);
6163 SDValue LHS1 = LHS->getOperand(1);
6164 SDValue RHS1 = RHS->getOperand(1);
6165 // TODO: We don't actually need a splat here, for vectors we just need the
6166 // invariants to hold for each element.
6167 auto *LHS1C = isConstOrConstSplat(LHS1);
6168 auto *RHS1C = isConstOrConstSplat(RHS1);
6169 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6170 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6171 EVT VT = LogicOp->getValueType(0);
6172 EVT OpVT = LHS0.getValueType();
6173 SDLoc DL(LogicOp);
6174
6175 // Check if the operands of an and/or operation are comparisons and if they
6176 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6177 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6178 // sequence will be replaced with min-cmp sequence:
6179 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6180 // and and-cmp-cmp will be replaced with max-cmp sequence:
6181 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6182 // The optimization does not work for `==` or `!=` .
6183 // The two comparisons should have either the same predicate or the
6184 // predicate of one of the comparisons is the opposite of the other one.
6185 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6187 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6189 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6190 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6191 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6192 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6193 (OpVT.isFloatingPoint() &&
6194 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6196 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6197 CCL != ISD::SETTRUE &&
6198 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6199
6200 SDValue CommonValue, Operand1, Operand2;
6202 if (CCL == CCR) {
6203 if (LHS0 == RHS0) {
6204 CommonValue = LHS0;
6205 Operand1 = LHS1;
6206 Operand2 = RHS1;
6208 } else if (LHS1 == RHS1) {
6209 CommonValue = LHS1;
6210 Operand1 = LHS0;
6211 Operand2 = RHS0;
6212 CC = CCL;
6213 }
6214 } else {
6215 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6216 if (LHS0 == RHS1) {
6217 CommonValue = LHS0;
6218 Operand1 = LHS1;
6219 Operand2 = RHS0;
6220 CC = CCR;
6221 } else if (RHS0 == LHS1) {
6222 CommonValue = LHS1;
6223 Operand1 = LHS0;
6224 Operand2 = RHS1;
6225 CC = CCL;
6226 }
6227 }
6228
6229 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6230 // handle it using OR/AND.
6231 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6233 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6235
6236 if (CC != ISD::SETCC_INVALID) {
6237 unsigned NewOpcode = ISD::DELETED_NODE;
6238 bool IsSigned = isSignedIntSetCC(CC);
6239 if (OpVT.isInteger()) {
6240 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6241 CC == ISD::SETLT || CC == ISD::SETULT);
6242 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6243 if (IsLess == IsOr)
6244 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6245 else
6246 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6247 } else if (OpVT.isFloatingPoint())
6248 NewOpcode =
6249 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6250 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6251
6252 if (NewOpcode != ISD::DELETED_NODE) {
6253 SDValue MinMaxValue =
6254 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6255 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6256 }
6257 }
6258 }
6259
6260 if (TargetPreference == AndOrSETCCFoldKind::None)
6261 return SDValue();
6262
6263 if (CCL == CCR &&
6264 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6265 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6266 const APInt &APLhs = LHS1C->getAPIntValue();
6267 const APInt &APRhs = RHS1C->getAPIntValue();
6268
6269 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6270 // case this is just a compare).
6271 if (APLhs == (-APRhs) &&
6272 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6273 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6274 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6275 // (icmp eq A, C) | (icmp eq A, -C)
6276 // -> (icmp eq Abs(A), C)
6277 // (icmp ne A, C) & (icmp ne A, -C)
6278 // -> (icmp ne Abs(A), C)
6279 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6280 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6281 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6282 } else if (TargetPreference &
6284
6285 // AndOrSETCCFoldKind::AddAnd:
6286 // A == C0 | A == C1
6287 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6288 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6289 // A != C0 & A != C1
6290 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6291 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6292
6293 // AndOrSETCCFoldKind::NotAnd:
6294 // A == C0 | A == C1
6295 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6296 // -> ~A & smin(C0, C1) == 0
6297 // A != C0 & A != C1
6298 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6299 // -> ~A & smin(C0, C1) != 0
6300
6301 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6302 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6303 APInt Dif = MaxC - MinC;
6304 if (!Dif.isZero() && Dif.isPowerOf2()) {
6305 if (MaxC.isAllOnes() &&
6306 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6307 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6308 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6309 DAG.getConstant(MinC, DL, OpVT));
6310 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6311 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6312 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6313
6314 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6315 DAG.getConstant(-MinC, DL, OpVT));
6316 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6317 DAG.getConstant(~Dif, DL, OpVT));
6318 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6319 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6320 }
6321 }
6322 }
6323 }
6324
6325 return SDValue();
6326}
6327
6328// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6329// We canonicalize to the `select` form in the middle end, but the `and` form
6330// gets better codegen and all tested targets (arm, x86, riscv)
6332 const SDLoc &DL, SelectionDAG &DAG) {
6333 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6334 if (!isNullConstant(F))
6335 return SDValue();
6336
6337 EVT CondVT = Cond.getValueType();
6338 if (TLI.getBooleanContents(CondVT) !=
6340 return SDValue();
6341
6342 if (T.getOpcode() != ISD::AND)
6343 return SDValue();
6344
6345 if (!isOneConstant(T.getOperand(1)))
6346 return SDValue();
6347
6348 EVT OpVT = T.getValueType();
6349
6350 SDValue CondMask =
6351 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6352 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6353}
6354
6355/// This contains all DAGCombine rules which reduce two values combined by
6356/// an And operation to a single value. This makes them reusable in the context
6357/// of visitSELECT(). Rules involving constants are not included as
6358/// visitSELECT() already handles those cases.
6359SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6360 EVT VT = N1.getValueType();
6361 SDLoc DL(N);
6362
6363 // fold (and x, undef) -> 0
6364 if (N0.isUndef() || N1.isUndef())
6365 return DAG.getConstant(0, DL, VT);
6366
6367 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6368 return V;
6369
6370 // Canonicalize:
6371 // and(x, add) -> and(add, x)
6372 if (N1.getOpcode() == ISD::ADD)
6373 std::swap(N0, N1);
6374
6375 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6376 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6377 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6378 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6379 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6380 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6381 // immediate for an add, but it is legal if its top c2 bits are set,
6382 // transform the ADD so the immediate doesn't need to be materialized
6383 // in a register.
6384 APInt ADDC = ADDI->getAPIntValue();
6385 APInt SRLC = SRLI->getAPIntValue();
6386 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6387 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6389 SRLC.getZExtValue());
6390 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6391 ADDC |= Mask;
6392 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6393 SDLoc DL0(N0);
6394 SDValue NewAdd =
6395 DAG.getNode(ISD::ADD, DL0, VT,
6396 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6397 CombineTo(N0.getNode(), NewAdd);
6398 // Return N so it doesn't get rechecked!
6399 return SDValue(N, 0);
6400 }
6401 }
6402 }
6403 }
6404 }
6405 }
6406
6407 return SDValue();
6408}
6409
6410bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6411 EVT LoadResultTy, EVT &ExtVT) {
6412 if (!AndC->getAPIntValue().isMask())
6413 return false;
6414
6415 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6416
6417 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6418 EVT LoadedVT = LoadN->getMemoryVT();
6419
6420 if (ExtVT == LoadedVT &&
6421 (!LegalOperations ||
6422 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6423 // ZEXTLOAD will match without needing to change the size of the value being
6424 // loaded.
6425 return true;
6426 }
6427
6428 // Do not change the width of a volatile or atomic loads.
6429 if (!LoadN->isSimple())
6430 return false;
6431
6432 // Do not generate loads of non-round integer types since these can
6433 // be expensive (and would be wrong if the type is not byte sized).
6434 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6435 return false;
6436
6437 if (LegalOperations &&
6438 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6439 return false;
6440
6441 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6442 return false;
6443
6444 return true;
6445}
6446
6447bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6448 ISD::LoadExtType ExtType, EVT &MemVT,
6449 unsigned ShAmt) {
6450 if (!LDST)
6451 return false;
6452 // Only allow byte offsets.
6453 if (ShAmt % 8)
6454 return false;
6455
6456 // Do not generate loads of non-round integer types since these can
6457 // be expensive (and would be wrong if the type is not byte sized).
6458 if (!MemVT.isRound())
6459 return false;
6460
6461 // Don't change the width of a volatile or atomic loads.
6462 if (!LDST->isSimple())
6463 return false;
6464
6465 EVT LdStMemVT = LDST->getMemoryVT();
6466
6467 // Bail out when changing the scalable property, since we can't be sure that
6468 // we're actually narrowing here.
6469 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6470 return false;
6471
6472 // Verify that we are actually reducing a load width here.
6473 if (LdStMemVT.bitsLT(MemVT))
6474 return false;
6475
6476 // Ensure that this isn't going to produce an unsupported memory access.
6477 if (ShAmt) {
6478 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6479 const unsigned ByteShAmt = ShAmt / 8;
6480 const Align LDSTAlign = LDST->getAlign();
6481 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6482 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6483 LDST->getAddressSpace(), NarrowAlign,
6484 LDST->getMemOperand()->getFlags()))
6485 return false;
6486 }
6487
6488 // It's not possible to generate a constant of extended or untyped type.
6489 EVT PtrType = LDST->getBasePtr().getValueType();
6490 if (PtrType == MVT::Untyped || PtrType.isExtended())
6491 return false;
6492
6493 if (isa<LoadSDNode>(LDST)) {
6494 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6495 // Don't transform one with multiple uses, this would require adding a new
6496 // load.
6497 if (!SDValue(Load, 0).hasOneUse())
6498 return false;
6499
6500 if (LegalOperations &&
6501 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6502 return false;
6503
6504 // For the transform to be legal, the load must produce only two values
6505 // (the value loaded and the chain). Don't transform a pre-increment
6506 // load, for example, which produces an extra value. Otherwise the
6507 // transformation is not equivalent, and the downstream logic to replace
6508 // uses gets things wrong.
6509 if (Load->getNumValues() > 2)
6510 return false;
6511
6512 // If the load that we're shrinking is an extload and we're not just
6513 // discarding the extension we can't simply shrink the load. Bail.
6514 // TODO: It would be possible to merge the extensions in some cases.
6515 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6516 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6517 return false;
6518
6519 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6520 return false;
6521 } else {
6522 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6523 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6524 // Can't write outside the original store
6525 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6526 return false;
6527
6528 if (LegalOperations &&
6529 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6530 return false;
6531 }
6532 return true;
6533}
6534
6535bool DAGCombiner::SearchForAndLoads(SDNode *N,
6537 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6538 ConstantSDNode *Mask,
6539 SDNode *&NodeToMask) {
6540 // Recursively search for the operands, looking for loads which can be
6541 // narrowed.
6542 for (SDValue Op : N->op_values()) {
6543 if (Op.getValueType().isVector())
6544 return false;
6545
6546 // Some constants may need fixing up later if they are too large.
6547 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6548 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6549 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6550 NodesWithConsts.insert(N);
6551 continue;
6552 }
6553
6554 if (!Op.hasOneUse())
6555 return false;
6556
6557 switch(Op.getOpcode()) {
6558 case ISD::LOAD: {
6559 auto *Load = cast<LoadSDNode>(Op);
6560 EVT ExtVT;
6561 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6562 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6563
6564 // ZEXTLOAD is already small enough.
6565 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6566 ExtVT.bitsGE(Load->getMemoryVT()))
6567 continue;
6568
6569 // Use LE to convert equal sized loads to zext.
6570 if (ExtVT.bitsLE(Load->getMemoryVT()))
6571 Loads.push_back(Load);
6572
6573 continue;
6574 }
6575 return false;
6576 }
6577 case ISD::ZERO_EXTEND:
6578 case ISD::AssertZext: {
6579 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6580 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6581 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6582 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6583 Op.getOperand(0).getValueType();
6584
6585 // We can accept extending nodes if the mask is wider or an equal
6586 // width to the original type.
6587 if (ExtVT.bitsGE(VT))
6588 continue;
6589 break;
6590 }
6591 case ISD::OR:
6592 case ISD::XOR:
6593 case ISD::AND:
6594 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6595 NodeToMask))
6596 return false;
6597 continue;
6598 }
6599
6600 // Allow one node which will masked along with any loads found.
6601 if (NodeToMask)
6602 return false;
6603
6604 // Also ensure that the node to be masked only produces one data result.
6605 NodeToMask = Op.getNode();
6606 if (NodeToMask->getNumValues() > 1) {
6607 bool HasValue = false;
6608 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6609 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6610 if (VT != MVT::Glue && VT != MVT::Other) {
6611 if (HasValue) {
6612 NodeToMask = nullptr;
6613 return false;
6614 }
6615 HasValue = true;
6616 }
6617 }
6618 assert(HasValue && "Node to be masked has no data result?");
6619 }
6620 }
6621 return true;
6622}
6623
6624bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6625 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6626 if (!Mask)
6627 return false;
6628
6629 if (!Mask->getAPIntValue().isMask())
6630 return false;
6631
6632 // No need to do anything if the and directly uses a load.
6633 if (isa<LoadSDNode>(N->getOperand(0)))
6634 return false;
6635
6637 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6638 SDNode *FixupNode = nullptr;
6639 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6640 if (Loads.empty())
6641 return false;
6642
6643 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6644 SDValue MaskOp = N->getOperand(1);
6645
6646 // If it exists, fixup the single node we allow in the tree that needs
6647 // masking.
6648 if (FixupNode) {
6649 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6650 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6651 FixupNode->getValueType(0),
6652 SDValue(FixupNode, 0), MaskOp);
6653 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6654 if (And.getOpcode() == ISD ::AND)
6655 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6656 }
6657
6658 // Narrow any constants that need it.
6659 for (auto *LogicN : NodesWithConsts) {
6660 SDValue Op0 = LogicN->getOperand(0);
6661 SDValue Op1 = LogicN->getOperand(1);
6662
6663 if (isa<ConstantSDNode>(Op0))
6664 Op0 =
6665 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6666
6667 if (isa<ConstantSDNode>(Op1))
6668 Op1 =
6669 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6670
6671 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6672 std::swap(Op0, Op1);
6673
6674 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6675 }
6676
6677 // Create narrow loads.
6678 for (auto *Load : Loads) {
6679 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6680 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6681 SDValue(Load, 0), MaskOp);
6682 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6683 if (And.getOpcode() == ISD ::AND)
6684 And = SDValue(
6685 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6686 SDValue NewLoad = reduceLoadWidth(And.getNode());
6687 assert(NewLoad &&
6688 "Shouldn't be masking the load if it can't be narrowed");
6689 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6690 }
6691 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6692 return true;
6693 }
6694 return false;
6695}
6696
6697// Unfold
6698// x & (-1 'logical shift' y)
6699// To
6700// (x 'opposite logical shift' y) 'logical shift' y
6701// if it is better for performance.
6702SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6703 assert(N->getOpcode() == ISD::AND);
6704
6705 SDValue N0 = N->getOperand(0);
6706 SDValue N1 = N->getOperand(1);
6707
6708 // Do we actually prefer shifts over mask?
6710 return SDValue();
6711
6712 // Try to match (-1 '[outer] logical shift' y)
6713 unsigned OuterShift;
6714 unsigned InnerShift; // The opposite direction to the OuterShift.
6715 SDValue Y; // Shift amount.
6716 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6717 if (!M.hasOneUse())
6718 return false;
6719 OuterShift = M->getOpcode();
6720 if (OuterShift == ISD::SHL)
6721 InnerShift = ISD::SRL;
6722 else if (OuterShift == ISD::SRL)
6723 InnerShift = ISD::SHL;
6724 else
6725 return false;
6726 if (!isAllOnesConstant(M->getOperand(0)))
6727 return false;
6728 Y = M->getOperand(1);
6729 return true;
6730 };
6731
6732 SDValue X;
6733 if (matchMask(N1))
6734 X = N0;
6735 else if (matchMask(N0))
6736 X = N1;
6737 else
6738 return SDValue();
6739
6740 SDLoc DL(N);
6741 EVT VT = N->getValueType(0);
6742
6743 // tmp = x 'opposite logical shift' y
6744 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6745 // ret = tmp 'logical shift' y
6746 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6747
6748 return T1;
6749}
6750
6751/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6752/// For a target with a bit test, this is expected to become test + set and save
6753/// at least 1 instruction.
6755 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6756
6757 // Look through an optional extension.
6758 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6759 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6760 And0 = And0.getOperand(0);
6761 if (!isOneConstant(And1) || !And0.hasOneUse())
6762 return SDValue();
6763
6764 SDValue Src = And0;
6765
6766 // Attempt to find a 'not' op.
6767 // TODO: Should we favor test+set even without the 'not' op?
6768 bool FoundNot = false;
6769 if (isBitwiseNot(Src)) {
6770 FoundNot = true;
6771 Src = Src.getOperand(0);
6772
6773 // Look though an optional truncation. The source operand may not be the
6774 // same type as the original 'and', but that is ok because we are masking
6775 // off everything but the low bit.
6776 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6777 Src = Src.getOperand(0);
6778 }
6779
6780 // Match a shift-right by constant.
6781 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6782 return SDValue();
6783
6784 // This is probably not worthwhile without a supported type.
6785 EVT SrcVT = Src.getValueType();
6786 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6787 if (!TLI.isTypeLegal(SrcVT))
6788 return SDValue();
6789
6790 // We might have looked through casts that make this transform invalid.
6791 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6792 SDValue ShiftAmt = Src.getOperand(1);
6793 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6794 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6795 return SDValue();
6796
6797 // Set source to shift source.
6798 Src = Src.getOperand(0);
6799
6800 // Try again to find a 'not' op.
6801 // TODO: Should we favor test+set even with two 'not' ops?
6802 if (!FoundNot) {
6803 if (!isBitwiseNot(Src))
6804 return SDValue();
6805 Src = Src.getOperand(0);
6806 }
6807
6808 if (!TLI.hasBitTest(Src, ShiftAmt))
6809 return SDValue();
6810
6811 // Turn this into a bit-test pattern using mask op + setcc:
6812 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6813 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6814 SDLoc DL(And);
6815 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6816 EVT CCVT =
6817 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6818 SDValue Mask = DAG.getConstant(
6819 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6820 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6821 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6822 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6823 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6824}
6825
6826/// For targets that support usubsat, match a bit-hack form of that operation
6827/// that ends in 'and' and convert it.
6829 EVT VT = N->getValueType(0);
6830 unsigned BitWidth = VT.getScalarSizeInBits();
6831 APInt SignMask = APInt::getSignMask(BitWidth);
6832
6833 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6834 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6835 // xor/add with SMIN (signmask) are logically equivalent.
6836 SDValue X;
6837 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6839 m_SpecificInt(BitWidth - 1))))) &&
6842 m_SpecificInt(BitWidth - 1))))))
6843 return SDValue();
6844
6845 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6846 DAG.getConstant(SignMask, DL, VT));
6847}
6848
6849/// Given a bitwise logic operation N with a matching bitwise logic operand,
6850/// fold a pattern where 2 of the source operands are identically shifted
6851/// values. For example:
6852/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6854 SelectionDAG &DAG) {
6855 unsigned LogicOpcode = N->getOpcode();
6856 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6857 "Expected bitwise logic operation");
6858
6859 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6860 return SDValue();
6861
6862 // Match another bitwise logic op and a shift.
6863 unsigned ShiftOpcode = ShiftOp.getOpcode();
6864 if (LogicOp.getOpcode() != LogicOpcode ||
6865 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6866 ShiftOpcode == ISD::SRA))
6867 return SDValue();
6868
6869 // Match another shift op inside the first logic operand. Handle both commuted
6870 // possibilities.
6871 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6872 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6873 SDValue X1 = ShiftOp.getOperand(0);
6874 SDValue Y = ShiftOp.getOperand(1);
6875 SDValue X0, Z;
6876 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6877 LogicOp.getOperand(0).getOperand(1) == Y) {
6878 X0 = LogicOp.getOperand(0).getOperand(0);
6879 Z = LogicOp.getOperand(1);
6880 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6881 LogicOp.getOperand(1).getOperand(1) == Y) {
6882 X0 = LogicOp.getOperand(1).getOperand(0);
6883 Z = LogicOp.getOperand(0);
6884 } else {
6885 return SDValue();
6886 }
6887
6888 EVT VT = N->getValueType(0);
6889 SDLoc DL(N);
6890 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6891 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6892 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6893}
6894
6895/// Given a tree of logic operations with shape like
6896/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6897/// try to match and fold shift operations with the same shift amount.
6898/// For example:
6899/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6900/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6902 SDValue RightHand, SelectionDAG &DAG) {
6903 unsigned LogicOpcode = N->getOpcode();
6904 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6905 "Expected bitwise logic operation");
6906 if (LeftHand.getOpcode() != LogicOpcode ||
6907 RightHand.getOpcode() != LogicOpcode)
6908 return SDValue();
6909 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6910 return SDValue();
6911
6912 // Try to match one of following patterns:
6913 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6914 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6915 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6916 // itself.
6917 SDValue CombinedShifts, W;
6918 SDValue R0 = RightHand.getOperand(0);
6919 SDValue R1 = RightHand.getOperand(1);
6920 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6921 W = R1;
6922 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6923 W = R0;
6924 else
6925 return SDValue();
6926
6927 EVT VT = N->getValueType(0);
6928 SDLoc DL(N);
6929 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6930}
6931
6932SDValue DAGCombiner::visitAND(SDNode *N) {
6933 SDValue N0 = N->getOperand(0);
6934 SDValue N1 = N->getOperand(1);
6935 EVT VT = N1.getValueType();
6936 SDLoc DL(N);
6937
6938 // x & x --> x
6939 if (N0 == N1)
6940 return N0;
6941
6942 // fold (and c1, c2) -> c1&c2
6943 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6944 return C;
6945
6946 // canonicalize constant to RHS
6949 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6950
6951 if (areBitwiseNotOfEachother(N0, N1))
6952 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6953
6954 // fold vector ops
6955 if (VT.isVector()) {
6956 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6957 return FoldedVOp;
6958
6959 // fold (and x, 0) -> 0, vector edition
6961 // do not return N1, because undef node may exist in N1
6963 N1.getValueType());
6964
6965 // fold (and x, -1) -> x, vector edition
6967 return N0;
6968
6969 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6970 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6971 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6972 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6973 N1.hasOneUse()) {
6974 EVT LoadVT = MLoad->getMemoryVT();
6975 EVT ExtVT = VT;
6976 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6977 // For this AND to be a zero extension of the masked load the elements
6978 // of the BuildVec must mask the bottom bits of the extended element
6979 // type
6980 uint64_t ElementSize =
6982 if (Splat->getAPIntValue().isMask(ElementSize)) {
6983 SDValue NewLoad = DAG.getMaskedLoad(
6984 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6985 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6986 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6987 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6988 bool LoadHasOtherUsers = !N0.hasOneUse();
6989 CombineTo(N, NewLoad);
6990 if (LoadHasOtherUsers)
6991 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6992 return SDValue(N, 0);
6993 }
6994 }
6995 }
6996 }
6997
6998 // fold (and x, -1) -> x
6999 if (isAllOnesConstant(N1))
7000 return N0;
7001
7002 // if (and x, c) is known to be zero, return 0
7003 unsigned BitWidth = VT.getScalarSizeInBits();
7006 return DAG.getConstant(0, DL, VT);
7007
7008 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7009 return R;
7010
7011 if (SDValue NewSel = foldBinOpIntoSelect(N))
7012 return NewSel;
7013
7014 // reassociate and
7015 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7016 return RAND;
7017
7018 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7019 if (SDValue SD =
7020 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7021 return SD;
7022
7023 // fold (and (or x, C), D) -> D if (C & D) == D
7024 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7025 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7026 };
7027 if (N0.getOpcode() == ISD::OR &&
7028 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7029 return N1;
7030
7031 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7032 SDValue N0Op0 = N0.getOperand(0);
7033 EVT SrcVT = N0Op0.getValueType();
7034 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7035 APInt Mask = ~N1C->getAPIntValue();
7036 Mask = Mask.trunc(SrcBitWidth);
7037
7038 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7039 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7040 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7041
7042 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7043 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7044 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7045 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7046 TLI.isNarrowingProfitable(VT, SrcVT))
7047 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7048 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7049 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7050 }
7051
7052 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7053 if (ISD::isExtOpcode(N0.getOpcode())) {
7054 unsigned ExtOpc = N0.getOpcode();
7055 SDValue N0Op0 = N0.getOperand(0);
7056 if (N0Op0.getOpcode() == ISD::AND &&
7057 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7060 N0->hasOneUse() && N0Op0->hasOneUse()) {
7061 SDValue NewMask =
7062 DAG.getNode(ISD::AND, DL, VT, N1,
7063 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
7064 return DAG.getNode(ISD::AND, DL, VT,
7065 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7066 NewMask);
7067 }
7068 }
7069
7070 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7071 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7072 // already be zero by virtue of the width of the base type of the load.
7073 //
7074 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7075 // more cases.
7076 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7078 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7079 N0.getOperand(0).getResNo() == 0) ||
7080 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7081 auto *Load =
7082 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7083
7084 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7085 // This can be a pure constant or a vector splat, in which case we treat the
7086 // vector as a scalar and use the splat value.
7089 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7090 Constant = C->getAPIntValue();
7091 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7092 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7093 APInt SplatValue, SplatUndef;
7094 unsigned SplatBitSize;
7095 bool HasAnyUndefs;
7096 // Endianness should not matter here. Code below makes sure that we only
7097 // use the result if the SplatBitSize is a multiple of the vector element
7098 // size. And after that we AND all element sized parts of the splat
7099 // together. So the end result should be the same regardless of in which
7100 // order we do those operations.
7101 const bool IsBigEndian = false;
7102 bool IsSplat =
7103 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7104 HasAnyUndefs, EltBitWidth, IsBigEndian);
7105
7106 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7107 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7108 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7109 // Undef bits can contribute to a possible optimisation if set, so
7110 // set them.
7111 SplatValue |= SplatUndef;
7112
7113 // The splat value may be something like "0x00FFFFFF", which means 0 for
7114 // the first vector value and FF for the rest, repeating. We need a mask
7115 // that will apply equally to all members of the vector, so AND all the
7116 // lanes of the constant together.
7117 Constant = APInt::getAllOnes(EltBitWidth);
7118 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7119 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7120 }
7121 }
7122
7123 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7124 // actually legal and isn't going to get expanded, else this is a false
7125 // optimisation.
7126 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7127 Load->getValueType(0),
7128 Load->getMemoryVT());
7129
7130 // Resize the constant to the same size as the original memory access before
7131 // extension. If it is still the AllOnesValue then this AND is completely
7132 // unneeded.
7133 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7134
7135 bool B;
7136 switch (Load->getExtensionType()) {
7137 default: B = false; break;
7138 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7139 case ISD::ZEXTLOAD:
7140 case ISD::NON_EXTLOAD: B = true; break;
7141 }
7142
7143 if (B && Constant.isAllOnes()) {
7144 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7145 // preserve semantics once we get rid of the AND.
7146 SDValue NewLoad(Load, 0);
7147
7148 // Fold the AND away. NewLoad may get replaced immediately.
7149 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7150
7151 if (Load->getExtensionType() == ISD::EXTLOAD) {
7152 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7153 Load->getValueType(0), SDLoc(Load),
7154 Load->getChain(), Load->getBasePtr(),
7155 Load->getOffset(), Load->getMemoryVT(),
7156 Load->getMemOperand());
7157 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7158 if (Load->getNumValues() == 3) {
7159 // PRE/POST_INC loads have 3 values.
7160 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7161 NewLoad.getValue(2) };
7162 CombineTo(Load, To, 3, true);
7163 } else {
7164 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7165 }
7166 }
7167
7168 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7169 }
7170 }
7171
7172 // Try to convert a constant mask AND into a shuffle clear mask.
7173 if (VT.isVector())
7174 if (SDValue Shuffle = XformToShuffleWithZero(N))
7175 return Shuffle;
7176
7177 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7178 return Combined;
7179
7180 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7182 SDValue Ext = N0.getOperand(0);
7183 EVT ExtVT = Ext->getValueType(0);
7184 SDValue Extendee = Ext->getOperand(0);
7185
7186 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7187 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7188 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7189 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7190 // => (extract_subvector (iN_zeroext v))
7191 SDValue ZeroExtExtendee =
7192 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7193
7194 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7195 N0.getOperand(1));
7196 }
7197 }
7198
7199 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7200 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7201 EVT MemVT = GN0->getMemoryVT();
7202 EVT ScalarVT = MemVT.getScalarType();
7203
7204 if (SDValue(GN0, 0).hasOneUse() &&
7205 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7207 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7208 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7209
7210 SDValue ZExtLoad = DAG.getMaskedGather(
7211 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7212 GN0->getIndexType(), ISD::ZEXTLOAD);
7213
7214 CombineTo(N, ZExtLoad);
7215 AddToWorklist(ZExtLoad.getNode());
7216 // Avoid recheck of N.
7217 return SDValue(N, 0);
7218 }
7219 }
7220
7221 // fold (and (load x), 255) -> (zextload x, i8)
7222 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7223 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7224 if (SDValue Res = reduceLoadWidth(N))
7225 return Res;
7226
7227 if (LegalTypes) {
7228 // Attempt to propagate the AND back up to the leaves which, if they're
7229 // loads, can be combined to narrow loads and the AND node can be removed.
7230 // Perform after legalization so that extend nodes will already be
7231 // combined into the loads.
7232 if (BackwardsPropagateMask(N))
7233 return SDValue(N, 0);
7234 }
7235
7236 if (SDValue Combined = visitANDLike(N0, N1, N))
7237 return Combined;
7238
7239 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7240 if (N0.getOpcode() == N1.getOpcode())
7241 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7242 return V;
7243
7244 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7245 return R;
7246 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7247 return R;
7248
7249 // Masking the negated extension of a boolean is just the zero-extended
7250 // boolean:
7251 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7252 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7253 //
7254 // Note: the SimplifyDemandedBits fold below can make an information-losing
7255 // transform, and then we have no way to find this better fold.
7256 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7257 if (isNullOrNullSplat(N0.getOperand(0))) {
7258 SDValue SubRHS = N0.getOperand(1);
7259 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7260 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7261 return SubRHS;
7262 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7263 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7264 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7265 }
7266 }
7267
7268 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7269 // fold (and (sra)) -> (and (srl)) when possible.
7271 return SDValue(N, 0);
7272
7273 // fold (zext_inreg (extload x)) -> (zextload x)
7274 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7275 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7276 (ISD::isEXTLoad(N0.getNode()) ||
7277 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7278 auto *LN0 = cast<LoadSDNode>(N0);
7279 EVT MemVT = LN0->getMemoryVT();
7280 // If we zero all the possible extended bits, then we can turn this into
7281 // a zextload if we are running before legalize or the operation is legal.
7282 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7283 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7284 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7285 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7286 ((!LegalOperations && LN0->isSimple()) ||
7287 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7288 SDValue ExtLoad =
7289 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7290 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7291 AddToWorklist(N);
7292 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7293 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7294 }
7295 }
7296
7297 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7298 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7299 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7300 N0.getOperand(1), false))
7301 return BSwap;
7302 }
7303
7304 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7305 return Shifts;
7306
7307 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7308 return V;
7309
7310 // Recognize the following pattern:
7311 //
7312 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7313 //
7314 // where bitmask is a mask that clears the upper bits of AndVT. The
7315 // number of bits in bitmask must be a power of two.
7316 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7317 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7318 return false;
7319
7320 auto *C = dyn_cast<ConstantSDNode>(RHS);
7321 if (!C)
7322 return false;
7323
7324 if (!C->getAPIntValue().isMask(
7325 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7326 return false;
7327
7328 return true;
7329 };
7330
7331 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7332 if (IsAndZeroExtMask(N0, N1))
7333 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7334
7335 if (hasOperation(ISD::USUBSAT, VT))
7336 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7337 return V;
7338
7339 // Postpone until legalization completed to avoid interference with bswap
7340 // folding
7341 if (LegalOperations || VT.isVector())
7342 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7343 return R;
7344
7345 return SDValue();
7346}
7347
7348/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7349SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7350 bool DemandHighBits) {
7351 if (!LegalOperations)
7352 return SDValue();
7353
7354 EVT VT = N->getValueType(0);
7355 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7356 return SDValue();
7358 return SDValue();
7359
7360 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7361 bool LookPassAnd0 = false;
7362 bool LookPassAnd1 = false;
7363 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7364 std::swap(N0, N1);
7365 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7366 std::swap(N0, N1);
7367 if (N0.getOpcode() == ISD::AND) {
7368 if (!N0->hasOneUse())
7369 return SDValue();
7370 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7371 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7372 // This is needed for X86.
7373 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7374 N01C->getZExtValue() != 0xFFFF))
7375 return SDValue();
7376 N0 = N0.getOperand(0);
7377 LookPassAnd0 = true;
7378 }
7379
7380 if (N1.getOpcode() == ISD::AND) {
7381 if (!N1->hasOneUse())
7382 return SDValue();
7383 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7384 if (!N11C || N11C->getZExtValue() != 0xFF)
7385 return SDValue();
7386 N1 = N1.getOperand(0);
7387 LookPassAnd1 = true;
7388 }
7389
7390 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7391 std::swap(N0, N1);
7392 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7393 return SDValue();
7394 if (!N0->hasOneUse() || !N1->hasOneUse())
7395 return SDValue();
7396
7397 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7398 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7399 if (!N01C || !N11C)
7400 return SDValue();
7401 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7402 return SDValue();
7403
7404 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7405 SDValue N00 = N0->getOperand(0);
7406 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7407 if (!N00->hasOneUse())
7408 return SDValue();
7409 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7410 if (!N001C || N001C->getZExtValue() != 0xFF)
7411 return SDValue();
7412 N00 = N00.getOperand(0);
7413 LookPassAnd0 = true;
7414 }
7415
7416 SDValue N10 = N1->getOperand(0);
7417 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7418 if (!N10->hasOneUse())
7419 return SDValue();
7420 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7421 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7422 // for X86.
7423 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7424 N101C->getZExtValue() != 0xFFFF))
7425 return SDValue();
7426 N10 = N10.getOperand(0);
7427 LookPassAnd1 = true;
7428 }
7429
7430 if (N00 != N10)
7431 return SDValue();
7432
7433 // Make sure everything beyond the low halfword gets set to zero since the SRL
7434 // 16 will clear the top bits.
7435 unsigned OpSizeInBits = VT.getSizeInBits();
7436 if (OpSizeInBits > 16) {
7437 // If the left-shift isn't masked out then the only way this is a bswap is
7438 // if all bits beyond the low 8 are 0. In that case the entire pattern
7439 // reduces to a left shift anyway: leave it for other parts of the combiner.
7440 if (DemandHighBits && !LookPassAnd0)
7441 return SDValue();
7442
7443 // However, if the right shift isn't masked out then it might be because
7444 // it's not needed. See if we can spot that too. If the high bits aren't
7445 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7446 // upper bits to be zero.
7447 if (!LookPassAnd1) {
7448 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7449 if (!DAG.MaskedValueIsZero(N10,
7450 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7451 return SDValue();
7452 }
7453 }
7454
7455 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7456 if (OpSizeInBits > 16) {
7457 SDLoc DL(N);
7458 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7459 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7460 }
7461 return Res;
7462}
7463
7464/// Return true if the specified node is an element that makes up a 32-bit
7465/// packed halfword byteswap.
7466/// ((x & 0x000000ff) << 8) |
7467/// ((x & 0x0000ff00) >> 8) |
7468/// ((x & 0x00ff0000) << 8) |
7469/// ((x & 0xff000000) >> 8)
7471 if (!N->hasOneUse())
7472 return false;
7473
7474 unsigned Opc = N.getOpcode();
7475 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7476 return false;
7477
7478 SDValue N0 = N.getOperand(0);
7479 unsigned Opc0 = N0.getOpcode();
7480 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7481 return false;
7482
7483 ConstantSDNode *N1C = nullptr;
7484 // SHL or SRL: look upstream for AND mask operand
7485 if (Opc == ISD::AND)
7486 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7487 else if (Opc0 == ISD::AND)
7488 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7489 if (!N1C)
7490 return false;
7491
7492 unsigned MaskByteOffset;
7493 switch (N1C->getZExtValue()) {
7494 default:
7495 return false;
7496 case 0xFF: MaskByteOffset = 0; break;
7497 case 0xFF00: MaskByteOffset = 1; break;
7498 case 0xFFFF:
7499 // In case demanded bits didn't clear the bits that will be shifted out.
7500 // This is needed for X86.
7501 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7502 MaskByteOffset = 1;
7503 break;
7504 }
7505 return false;
7506 case 0xFF0000: MaskByteOffset = 2; break;
7507 case 0xFF000000: MaskByteOffset = 3; break;
7508 }
7509
7510 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7511 if (Opc == ISD::AND) {
7512 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7513 // (x >> 8) & 0xff
7514 // (x >> 8) & 0xff0000
7515 if (Opc0 != ISD::SRL)
7516 return false;
7517 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7518 if (!C || C->getZExtValue() != 8)
7519 return false;
7520 } else {
7521 // (x << 8) & 0xff00
7522 // (x << 8) & 0xff000000
7523 if (Opc0 != ISD::SHL)
7524 return false;
7525 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7526 if (!C || C->getZExtValue() != 8)
7527 return false;
7528 }
7529 } else if (Opc == ISD::SHL) {
7530 // (x & 0xff) << 8
7531 // (x & 0xff0000) << 8
7532 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7533 return false;
7534 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7535 if (!C || C->getZExtValue() != 8)
7536 return false;
7537 } else { // Opc == ISD::SRL
7538 // (x & 0xff00) >> 8
7539 // (x & 0xff000000) >> 8
7540 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7541 return false;
7542 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7543 if (!C || C->getZExtValue() != 8)
7544 return false;
7545 }
7546
7547 if (Parts[MaskByteOffset])
7548 return false;
7549
7550 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7551 return true;
7552}
7553
7554// Match 2 elements of a packed halfword bswap.
7556 if (N.getOpcode() == ISD::OR)
7557 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7558 isBSwapHWordElement(N.getOperand(1), Parts);
7559
7560 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7561 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7562 if (!C || C->getAPIntValue() != 16)
7563 return false;
7564 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7565 return true;
7566 }
7567
7568 return false;
7569}
7570
7571// Match this pattern:
7572// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7573// And rewrite this to:
7574// (rotr (bswap A), 16)
7576 SelectionDAG &DAG, SDNode *N, SDValue N0,
7577 SDValue N1, EVT VT) {
7578 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7579 "MatchBSwapHWordOrAndAnd: expecting i32");
7580 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7581 return SDValue();
7582 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7583 return SDValue();
7584 // TODO: this is too restrictive; lifting this restriction requires more tests
7585 if (!N0->hasOneUse() || !N1->hasOneUse())
7586 return SDValue();
7589 if (!Mask0 || !Mask1)
7590 return SDValue();
7591 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7592 Mask1->getAPIntValue() != 0x00ff00ff)
7593 return SDValue();
7594 SDValue Shift0 = N0.getOperand(0);
7595 SDValue Shift1 = N1.getOperand(0);
7596 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7597 return SDValue();
7598 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7599 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7600 if (!ShiftAmt0 || !ShiftAmt1)
7601 return SDValue();
7602 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7603 return SDValue();
7604 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7605 return SDValue();
7606
7607 SDLoc DL(N);
7608 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7609 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7610 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7611}
7612
7613/// Match a 32-bit packed halfword bswap. That is
7614/// ((x & 0x000000ff) << 8) |
7615/// ((x & 0x0000ff00) >> 8) |
7616/// ((x & 0x00ff0000) << 8) |
7617/// ((x & 0xff000000) >> 8)
7618/// => (rotl (bswap x), 16)
7619SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7620 if (!LegalOperations)
7621 return SDValue();
7622
7623 EVT VT = N->getValueType(0);
7624 if (VT != MVT::i32)
7625 return SDValue();
7627 return SDValue();
7628
7629 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
7630 return BSwap;
7631
7632 // Try again with commuted operands.
7633 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
7634 return BSwap;
7635
7636
7637 // Look for either
7638 // (or (bswaphpair), (bswaphpair))
7639 // (or (or (bswaphpair), (and)), (and))
7640 // (or (or (and), (bswaphpair)), (and))
7641 SDNode *Parts[4] = {};
7642
7643 if (isBSwapHWordPair(N0, Parts)) {
7644 // (or (or (and), (and)), (or (and), (and)))
7645 if (!isBSwapHWordPair(N1, Parts))
7646 return SDValue();
7647 } else if (N0.getOpcode() == ISD::OR) {
7648 // (or (or (or (and), (and)), (and)), (and))
7649 if (!isBSwapHWordElement(N1, Parts))
7650 return SDValue();
7651 SDValue N00 = N0.getOperand(0);
7652 SDValue N01 = N0.getOperand(1);
7653 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7654 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7655 return SDValue();
7656 } else {
7657 return SDValue();
7658 }
7659
7660 // Make sure the parts are all coming from the same node.
7661 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7662 return SDValue();
7663
7664 SDLoc DL(N);
7665 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7666 SDValue(Parts[0], 0));
7667
7668 // Result of the bswap should be rotated by 16. If it's not legal, then
7669 // do (x << 16) | (x >> 16).
7670 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7672 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7674 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7675 return DAG.getNode(ISD::OR, DL, VT,
7676 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7677 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7678}
7679
7680/// This contains all DAGCombine rules which reduce two values combined by
7681/// an Or operation to a single value \see visitANDLike().
7682SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7683 EVT VT = N1.getValueType();
7684
7685 // fold (or x, undef) -> -1
7686 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7687 return DAG.getAllOnesConstant(DL, VT);
7688
7689 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7690 return V;
7691
7692 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7693 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7694 // Don't increase # computations.
7695 (N0->hasOneUse() || N1->hasOneUse())) {
7696 // We can only do this xform if we know that bits from X that are set in C2
7697 // but not in C1 are already zero. Likewise for Y.
7698 if (const ConstantSDNode *N0O1C =
7700 if (const ConstantSDNode *N1O1C =
7702 // We can only do this xform if we know that bits from X that are set in
7703 // C2 but not in C1 are already zero. Likewise for Y.
7704 const APInt &LHSMask = N0O1C->getAPIntValue();
7705 const APInt &RHSMask = N1O1C->getAPIntValue();
7706
7707 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7708 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7709 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7710 N0.getOperand(0), N1.getOperand(0));
7711 return DAG.getNode(ISD::AND, DL, VT, X,
7712 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7713 }
7714 }
7715 }
7716 }
7717
7718 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7719 if (N0.getOpcode() == ISD::AND &&
7720 N1.getOpcode() == ISD::AND &&
7721 N0.getOperand(0) == N1.getOperand(0) &&
7722 // Don't increase # computations.
7723 (N0->hasOneUse() || N1->hasOneUse())) {
7724 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7725 N0.getOperand(1), N1.getOperand(1));
7726 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7727 }
7728
7729 return SDValue();
7730}
7731
7732/// OR combines for which the commuted variant will be tried as well.
7734 SDNode *N) {
7735 EVT VT = N0.getValueType();
7736 unsigned BW = VT.getScalarSizeInBits();
7737 SDLoc DL(N);
7738
7739 auto peekThroughResize = [](SDValue V) {
7740 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7741 return V->getOperand(0);
7742 return V;
7743 };
7744
7745 SDValue N0Resized = peekThroughResize(N0);
7746 if (N0Resized.getOpcode() == ISD::AND) {
7747 SDValue N1Resized = peekThroughResize(N1);
7748 SDValue N00 = N0Resized.getOperand(0);
7749 SDValue N01 = N0Resized.getOperand(1);
7750
7751 // fold or (and x, y), x --> x
7752 if (N00 == N1Resized || N01 == N1Resized)
7753 return N1;
7754
7755 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7756 // TODO: Set AllowUndefs = true.
7757 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7758 /* AllowUndefs */ false)) {
7759 if (peekThroughResize(NotOperand) == N1Resized)
7760 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7761 N1);
7762 }
7763
7764 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7765 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7766 /* AllowUndefs */ false)) {
7767 if (peekThroughResize(NotOperand) == N1Resized)
7768 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7769 N1);
7770 }
7771 }
7772
7773 SDValue X, Y;
7774
7775 // fold or (xor X, N1), N1 --> or X, N1
7776 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7777 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7778
7779 // fold or (xor x, y), (x and/or y) --> or x, y
7780 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7781 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7783 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7784
7785 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7786 return R;
7787
7788 auto peekThroughZext = [](SDValue V) {
7789 if (V->getOpcode() == ISD::ZERO_EXTEND)
7790 return V->getOperand(0);
7791 return V;
7792 };
7793
7794 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7795 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7796 N0.getOperand(0) == N1.getOperand(0) &&
7797 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7798 return N0;
7799
7800 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7801 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7802 N0.getOperand(1) == N1.getOperand(0) &&
7803 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7804 return N0;
7805
7806 // Attempt to match a legalized build_pair-esque pattern:
7807 // or(shl(aext(Hi),BW/2),zext(Lo))
7808 SDValue Lo, Hi;
7809 if (sd_match(N0,
7811 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7812 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7813 Lo.getValueType() == Hi.getValueType()) {
7814 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7815 SDValue NotLo, NotHi;
7816 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7817 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7818 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7819 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7820 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7821 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7822 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7823 }
7824 }
7825
7826 return SDValue();
7827}
7828
7829SDValue DAGCombiner::visitOR(SDNode *N) {
7830 SDValue N0 = N->getOperand(0);
7831 SDValue N1 = N->getOperand(1);
7832 EVT VT = N1.getValueType();
7833 SDLoc DL(N);
7834
7835 // x | x --> x
7836 if (N0 == N1)
7837 return N0;
7838
7839 // fold (or c1, c2) -> c1|c2
7840 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7841 return C;
7842
7843 // canonicalize constant to RHS
7846 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7847
7848 // fold vector ops
7849 if (VT.isVector()) {
7850 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7851 return FoldedVOp;
7852
7853 // fold (or x, 0) -> x, vector edition
7855 return N0;
7856
7857 // fold (or x, -1) -> -1, vector edition
7859 // do not return N1, because undef node may exist in N1
7860 return DAG.getAllOnesConstant(DL, N1.getValueType());
7861
7862 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7863 // Do this only if the resulting type / shuffle is legal.
7864 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7865 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7866 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7867 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7868 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7869 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7870 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7871 // Ensure both shuffles have a zero input.
7872 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7873 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7874 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7875 bool CanFold = true;
7876 int NumElts = VT.getVectorNumElements();
7877 SmallVector<int, 4> Mask(NumElts, -1);
7878
7879 for (int i = 0; i != NumElts; ++i) {
7880 int M0 = SV0->getMaskElt(i);
7881 int M1 = SV1->getMaskElt(i);
7882
7883 // Determine if either index is pointing to a zero vector.
7884 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7885 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7886
7887 // If one element is zero and the otherside is undef, keep undef.
7888 // This also handles the case that both are undef.
7889 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7890 continue;
7891
7892 // Make sure only one of the elements is zero.
7893 if (M0Zero == M1Zero) {
7894 CanFold = false;
7895 break;
7896 }
7897
7898 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7899
7900 // We have a zero and non-zero element. If the non-zero came from
7901 // SV0 make the index a LHS index. If it came from SV1, make it
7902 // a RHS index. We need to mod by NumElts because we don't care
7903 // which operand it came from in the original shuffles.
7904 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7905 }
7906
7907 if (CanFold) {
7908 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7909 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7910 SDValue LegalShuffle =
7911 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7912 if (LegalShuffle)
7913 return LegalShuffle;
7914 }
7915 }
7916 }
7917 }
7918
7919 // fold (or x, 0) -> x
7920 if (isNullConstant(N1))
7921 return N0;
7922
7923 // fold (or x, -1) -> -1
7924 if (isAllOnesConstant(N1))
7925 return N1;
7926
7927 if (SDValue NewSel = foldBinOpIntoSelect(N))
7928 return NewSel;
7929
7930 // fold (or x, c) -> c iff (x & ~c) == 0
7931 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7932 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7933 return N1;
7934
7935 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7936 return R;
7937
7938 if (SDValue Combined = visitORLike(N0, N1, DL))
7939 return Combined;
7940
7941 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7942 return Combined;
7943
7944 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7945 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7946 return BSwap;
7947 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7948 return BSwap;
7949
7950 // reassociate or
7951 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7952 return ROR;
7953
7954 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7955 if (SDValue SD =
7956 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7957 return SD;
7958
7959 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7960 // iff (c1 & c2) != 0 or c1/c2 are undef.
7961 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7962 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7963 };
7964 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7965 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7966 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7967 {N1, N0.getOperand(1)})) {
7968 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7969 AddToWorklist(IOR.getNode());
7970 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
7971 }
7972 }
7973
7974 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7975 return Combined;
7976 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7977 return Combined;
7978
7979 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7980 if (N0.getOpcode() == N1.getOpcode())
7981 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7982 return V;
7983
7984 // See if this is some rotate idiom.
7985 if (SDValue Rot = MatchRotate(N0, N1, DL))
7986 return Rot;
7987
7988 if (SDValue Load = MatchLoadCombine(N))
7989 return Load;
7990
7991 // Simplify the operands using demanded-bits information.
7993 return SDValue(N, 0);
7994
7995 // If OR can be rewritten into ADD, try combines based on ADD.
7996 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7997 DAG.isADDLike(SDValue(N, 0)))
7998 if (SDValue Combined = visitADDLike(N))
7999 return Combined;
8000
8001 // Postpone until legalization completed to avoid interference with bswap
8002 // folding
8003 if (LegalOperations || VT.isVector())
8004 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8005 return R;
8006
8007 return SDValue();
8008}
8009
8011 SDValue &Mask) {
8012 if (Op.getOpcode() == ISD::AND &&
8013 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8014 Mask = Op.getOperand(1);
8015 return Op.getOperand(0);
8016 }
8017 return Op;
8018}
8019
8020/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8021static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8022 SDValue &Mask) {
8023 Op = stripConstantMask(DAG, Op, Mask);
8024 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8025 Shift = Op;
8026 return true;
8027 }
8028 return false;
8029}
8030
8031/// Helper function for visitOR to extract the needed side of a rotate idiom
8032/// from a shl/srl/mul/udiv. This is meant to handle cases where
8033/// InstCombine merged some outside op with one of the shifts from
8034/// the rotate pattern.
8035/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8036/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8037/// patterns:
8038///
8039/// (or (add v v) (shrl v bitwidth-1)):
8040/// expands (add v v) -> (shl v 1)
8041///
8042/// (or (mul v c0) (shrl (mul v c1) c2)):
8043/// expands (mul v c0) -> (shl (mul v c1) c3)
8044///
8045/// (or (udiv v c0) (shl (udiv v c1) c2)):
8046/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8047///
8048/// (or (shl v c0) (shrl (shl v c1) c2)):
8049/// expands (shl v c0) -> (shl (shl v c1) c3)
8050///
8051/// (or (shrl v c0) (shl (shrl v c1) c2)):
8052/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8053///
8054/// Such that in all cases, c3+c2==bitwidth(op v c1).
8056 SDValue ExtractFrom, SDValue &Mask,
8057 const SDLoc &DL) {
8058 assert(OppShift && ExtractFrom && "Empty SDValue");
8059 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8060 return SDValue();
8061
8062 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8063
8064 // Value and Type of the shift.
8065 SDValue OppShiftLHS = OppShift.getOperand(0);
8066 EVT ShiftedVT = OppShiftLHS.getValueType();
8067
8068 // Amount of the existing shift.
8069 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8070
8071 // (add v v) -> (shl v 1)
8072 // TODO: Should this be a general DAG canonicalization?
8073 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8074 ExtractFrom.getOpcode() == ISD::ADD &&
8075 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8076 ExtractFrom.getOperand(0) == OppShiftLHS &&
8077 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8078 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8079 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8080
8081 // Preconditions:
8082 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8083 //
8084 // Find opcode of the needed shift to be extracted from (op0 v c0).
8085 unsigned Opcode = ISD::DELETED_NODE;
8086 bool IsMulOrDiv = false;
8087 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8088 // opcode or its arithmetic (mul or udiv) variant.
8089 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8090 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8091 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8092 return false;
8093 Opcode = NeededShift;
8094 return true;
8095 };
8096 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8097 // that the needed shift can be extracted from.
8098 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8099 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8100 return SDValue();
8101
8102 // op0 must be the same opcode on both sides, have the same LHS argument,
8103 // and produce the same value type.
8104 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8105 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8106 ShiftedVT != ExtractFrom.getValueType())
8107 return SDValue();
8108
8109 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8110 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8111 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8112 ConstantSDNode *ExtractFromCst =
8113 isConstOrConstSplat(ExtractFrom.getOperand(1));
8114 // TODO: We should be able to handle non-uniform constant vectors for these values
8115 // Check that we have constant values.
8116 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8117 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8118 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8119 return SDValue();
8120
8121 // Compute the shift amount we need to extract to complete the rotate.
8122 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8123 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8124 return SDValue();
8125 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8126 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8127 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8128 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8129 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8130
8131 // Now try extract the needed shift from the ExtractFrom op and see if the
8132 // result matches up with the existing shift's LHS op.
8133 if (IsMulOrDiv) {
8134 // Op to extract from is a mul or udiv by a constant.
8135 // Check:
8136 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8137 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8138 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8139 NeededShiftAmt.getZExtValue());
8140 APInt ResultAmt;
8141 APInt Rem;
8142 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8143 if (Rem != 0 || ResultAmt != OppLHSAmt)
8144 return SDValue();
8145 } else {
8146 // Op to extract from is a shift by a constant.
8147 // Check:
8148 // c2 - (bitwidth(op0 v c0) - c1) == c0
8149 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8150 ExtractFromAmt.getBitWidth()))
8151 return SDValue();
8152 }
8153
8154 // Return the expanded shift op that should allow a rotate to be formed.
8155 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8156 EVT ResVT = ExtractFrom.getValueType();
8157 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8158 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8159}
8160
8161// Return true if we can prove that, whenever Neg and Pos are both in the
8162// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8163// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8164//
8165// (or (shift1 X, Neg), (shift2 X, Pos))
8166//
8167// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8168// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8169// to consider shift amounts with defined behavior.
8170//
8171// The IsRotate flag should be set when the LHS of both shifts is the same.
8172// Otherwise if matching a general funnel shift, it should be clear.
8173static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8174 SelectionDAG &DAG, bool IsRotate) {
8175 const auto &TLI = DAG.getTargetLoweringInfo();
8176 // If EltSize is a power of 2 then:
8177 //
8178 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8179 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8180 //
8181 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8182 // for the stronger condition:
8183 //
8184 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8185 //
8186 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8187 // we can just replace Neg with Neg' for the rest of the function.
8188 //
8189 // In other cases we check for the even stronger condition:
8190 //
8191 // Neg == EltSize - Pos [B]
8192 //
8193 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8194 // behavior if Pos == 0 (and consequently Neg == EltSize).
8195 //
8196 // We could actually use [A] whenever EltSize is a power of 2, but the
8197 // only extra cases that it would match are those uninteresting ones
8198 // where Neg and Pos are never in range at the same time. E.g. for
8199 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8200 // as well as (sub 32, Pos), but:
8201 //
8202 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8203 //
8204 // always invokes undefined behavior for 32-bit X.
8205 //
8206 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8207 // This allows us to peek through any operations that only affect Mask's
8208 // un-demanded bits.
8209 //
8210 // NOTE: We can only do this when matching operations which won't modify the
8211 // least Log2(EltSize) significant bits and not a general funnel shift.
8212 unsigned MaskLoBits = 0;
8213 if (IsRotate && isPowerOf2_64(EltSize)) {
8214 unsigned Bits = Log2_64(EltSize);
8215 unsigned NegBits = Neg.getScalarValueSizeInBits();
8216 if (NegBits >= Bits) {
8217 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8218 if (SDValue Inner =
8220 Neg = Inner;
8221 MaskLoBits = Bits;
8222 }
8223 }
8224 }
8225
8226 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8227 if (Neg.getOpcode() != ISD::SUB)
8228 return false;
8230 if (!NegC)
8231 return false;
8232 SDValue NegOp1 = Neg.getOperand(1);
8233
8234 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8235 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8236 // are redundant for the purpose of the equality.
8237 if (MaskLoBits) {
8238 unsigned PosBits = Pos.getScalarValueSizeInBits();
8239 if (PosBits >= MaskLoBits) {
8240 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8241 if (SDValue Inner =
8243 Pos = Inner;
8244 }
8245 }
8246 }
8247
8248 // The condition we need is now:
8249 //
8250 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8251 //
8252 // If NegOp1 == Pos then we need:
8253 //
8254 // EltSize & Mask == NegC & Mask
8255 //
8256 // (because "x & Mask" is a truncation and distributes through subtraction).
8257 //
8258 // We also need to account for a potential truncation of NegOp1 if the amount
8259 // has already been legalized to a shift amount type.
8260 APInt Width;
8261 if ((Pos == NegOp1) ||
8262 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8263 Width = NegC->getAPIntValue();
8264
8265 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8266 // Then the condition we want to prove becomes:
8267 //
8268 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8269 //
8270 // which, again because "x & Mask" is a truncation, becomes:
8271 //
8272 // NegC & Mask == (EltSize - PosC) & Mask
8273 // EltSize & Mask == (NegC + PosC) & Mask
8274 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8275 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8276 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8277 else
8278 return false;
8279 } else
8280 return false;
8281
8282 // Now we just need to check that EltSize & Mask == Width & Mask.
8283 if (MaskLoBits)
8284 // EltSize & Mask is 0 since Mask is EltSize - 1.
8285 return Width.getLoBits(MaskLoBits) == 0;
8286 return Width == EltSize;
8287}
8288
8289// A subroutine of MatchRotate used once we have found an OR of two opposite
8290// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8291// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8292// former being preferred if supported. InnerPos and InnerNeg are Pos and
8293// Neg with outer conversions stripped away.
8294SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8295 SDValue Neg, SDValue InnerPos,
8296 SDValue InnerNeg, bool HasPos,
8297 unsigned PosOpcode, unsigned NegOpcode,
8298 const SDLoc &DL) {
8299 // fold (or (shl x, (*ext y)),
8300 // (srl x, (*ext (sub 32, y)))) ->
8301 // (rotl x, y) or (rotr x, (sub 32, y))
8302 //
8303 // fold (or (shl x, (*ext (sub 32, y))),
8304 // (srl x, (*ext y))) ->
8305 // (rotr x, y) or (rotl x, (sub 32, y))
8306 EVT VT = Shifted.getValueType();
8307 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8308 /*IsRotate*/ true)) {
8309 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8310 HasPos ? Pos : Neg);
8311 }
8312
8313 return SDValue();
8314}
8315
8316// A subroutine of MatchRotate used once we have found an OR of two opposite
8317// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8318// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8319// former being preferred if supported. InnerPos and InnerNeg are Pos and
8320// Neg with outer conversions stripped away.
8321// TODO: Merge with MatchRotatePosNeg.
8322SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8323 SDValue Neg, SDValue InnerPos,
8324 SDValue InnerNeg, bool HasPos,
8325 unsigned PosOpcode, unsigned NegOpcode,
8326 const SDLoc &DL) {
8327 EVT VT = N0.getValueType();
8328 unsigned EltBits = VT.getScalarSizeInBits();
8329
8330 // fold (or (shl x0, (*ext y)),
8331 // (srl x1, (*ext (sub 32, y)))) ->
8332 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8333 //
8334 // fold (or (shl x0, (*ext (sub 32, y))),
8335 // (srl x1, (*ext y))) ->
8336 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8337 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8338 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8339 HasPos ? Pos : Neg);
8340 }
8341
8342 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8343 // so for now just use the PosOpcode case if its legal.
8344 // TODO: When can we use the NegOpcode case?
8345 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8346 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8347 if (Op.getOpcode() != BinOpc)
8348 return false;
8349 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8350 return Cst && (Cst->getAPIntValue() == Imm);
8351 };
8352
8353 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8354 // -> (fshl x0, x1, y)
8355 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8356 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8357 InnerPos == InnerNeg.getOperand(0) &&
8359 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8360 }
8361
8362 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8363 // -> (fshr x0, x1, y)
8364 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8365 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8366 InnerNeg == InnerPos.getOperand(0) &&
8368 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8369 }
8370
8371 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8372 // -> (fshr x0, x1, y)
8373 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8374 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8375 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8376 InnerNeg == InnerPos.getOperand(0) &&
8378 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8379 }
8380 }
8381
8382 return SDValue();
8383}
8384
8385// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8386// idioms for rotate, and if the target supports rotation instructions, generate
8387// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8388// with different shifted sources.
8389SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8390 EVT VT = LHS.getValueType();
8391
8392 // The target must have at least one rotate/funnel flavor.
8393 // We still try to match rotate by constant pre-legalization.
8394 // TODO: Support pre-legalization funnel-shift by constant.
8395 bool HasROTL = hasOperation(ISD::ROTL, VT);
8396 bool HasROTR = hasOperation(ISD::ROTR, VT);
8397 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8398 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8399
8400 // If the type is going to be promoted and the target has enabled custom
8401 // lowering for rotate, allow matching rotate by non-constants. Only allow
8402 // this for scalar types.
8403 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8407 }
8408
8409 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8410 return SDValue();
8411
8412 // Check for truncated rotate.
8413 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8414 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8415 assert(LHS.getValueType() == RHS.getValueType());
8416 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8417 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8418 }
8419 }
8420
8421 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8422 SDValue LHSShift; // The shift.
8423 SDValue LHSMask; // AND value if any.
8424 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8425
8426 SDValue RHSShift; // The shift.
8427 SDValue RHSMask; // AND value if any.
8428 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8429
8430 // If neither side matched a rotate half, bail
8431 if (!LHSShift && !RHSShift)
8432 return SDValue();
8433
8434 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8435 // side of the rotate, so try to handle that here. In all cases we need to
8436 // pass the matched shift from the opposite side to compute the opcode and
8437 // needed shift amount to extract. We still want to do this if both sides
8438 // matched a rotate half because one half may be a potential overshift that
8439 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8440 // single one).
8441
8442 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8443 if (LHSShift)
8444 if (SDValue NewRHSShift =
8445 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8446 RHSShift = NewRHSShift;
8447 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8448 if (RHSShift)
8449 if (SDValue NewLHSShift =
8450 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8451 LHSShift = NewLHSShift;
8452
8453 // If a side is still missing, nothing else we can do.
8454 if (!RHSShift || !LHSShift)
8455 return SDValue();
8456
8457 // At this point we've matched or extracted a shift op on each side.
8458
8459 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8460 return SDValue(); // Shifts must disagree.
8461
8462 // Canonicalize shl to left side in a shl/srl pair.
8463 if (RHSShift.getOpcode() == ISD::SHL) {
8464 std::swap(LHS, RHS);
8465 std::swap(LHSShift, RHSShift);
8466 std::swap(LHSMask, RHSMask);
8467 }
8468
8469 // Something has gone wrong - we've lost the shl/srl pair - bail.
8470 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8471 return SDValue();
8472
8473 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8474 SDValue LHSShiftArg = LHSShift.getOperand(0);
8475 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8476 SDValue RHSShiftArg = RHSShift.getOperand(0);
8477 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8478
8479 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8481 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8482 };
8483
8484 auto ApplyMasks = [&](SDValue Res) {
8485 // If there is an AND of either shifted operand, apply it to the result.
8486 if (LHSMask.getNode() || RHSMask.getNode()) {
8489
8490 if (LHSMask.getNode()) {
8491 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8492 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8493 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8494 }
8495 if (RHSMask.getNode()) {
8496 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8497 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8498 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8499 }
8500
8501 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8502 }
8503
8504 return Res;
8505 };
8506
8507 // TODO: Support pre-legalization funnel-shift by constant.
8508 bool IsRotate = LHSShiftArg == RHSShiftArg;
8509 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8510 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8511 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8512 // Look for a disguised rotate by constant.
8513 // The common shifted operand X may be hidden inside another 'or'.
8514 SDValue X, Y;
8515 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8516 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8517 return false;
8518 if (CommonOp == Or.getOperand(0)) {
8519 X = CommonOp;
8520 Y = Or.getOperand(1);
8521 return true;
8522 }
8523 if (CommonOp == Or.getOperand(1)) {
8524 X = CommonOp;
8525 Y = Or.getOperand(0);
8526 return true;
8527 }
8528 return false;
8529 };
8530
8531 SDValue Res;
8532 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8533 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8534 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8535 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8536 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8537 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8538 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8539 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8540 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8541 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8542 } else {
8543 return SDValue();
8544 }
8545
8546 return ApplyMasks(Res);
8547 }
8548
8549 return SDValue(); // Requires funnel shift support.
8550 }
8551
8552 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8553 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8554 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8555 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8556 // iff C1+C2 == EltSizeInBits
8557 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8558 SDValue Res;
8559 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8560 bool UseROTL = !LegalOperations || HasROTL;
8561 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8562 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8563 } else {
8564 bool UseFSHL = !LegalOperations || HasFSHL;
8565 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8566 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8567 }
8568
8569 return ApplyMasks(Res);
8570 }
8571
8572 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8573 // shift.
8574 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8575 return SDValue();
8576
8577 // If there is a mask here, and we have a variable shift, we can't be sure
8578 // that we're masking out the right stuff.
8579 if (LHSMask.getNode() || RHSMask.getNode())
8580 return SDValue();
8581
8582 // If the shift amount is sign/zext/any-extended just peel it off.
8583 SDValue LExtOp0 = LHSShiftAmt;
8584 SDValue RExtOp0 = RHSShiftAmt;
8585 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8586 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8587 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8588 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8589 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8590 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8591 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8592 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8593 LExtOp0 = LHSShiftAmt.getOperand(0);
8594 RExtOp0 = RHSShiftAmt.getOperand(0);
8595 }
8596
8597 if (IsRotate && (HasROTL || HasROTR)) {
8598 SDValue TryL =
8599 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8600 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8601 if (TryL)
8602 return TryL;
8603
8604 SDValue TryR =
8605 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8606 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8607 if (TryR)
8608 return TryR;
8609 }
8610
8611 SDValue TryL =
8612 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8613 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8614 if (TryL)
8615 return TryL;
8616
8617 SDValue TryR =
8618 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8619 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8620 if (TryR)
8621 return TryR;
8622
8623 return SDValue();
8624}
8625
8626/// Recursively traverses the expression calculating the origin of the requested
8627/// byte of the given value. Returns std::nullopt if the provider can't be
8628/// calculated.
8629///
8630/// For all the values except the root of the expression, we verify that the
8631/// value has exactly one use and if not then return std::nullopt. This way if
8632/// the origin of the byte is returned it's guaranteed that the values which
8633/// contribute to the byte are not used outside of this expression.
8634
8635/// However, there is a special case when dealing with vector loads -- we allow
8636/// more than one use if the load is a vector type. Since the values that
8637/// contribute to the byte ultimately come from the ExtractVectorElements of the
8638/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8639/// because those operations are independent from the pattern to be combined.
8640/// For vector loads, we simply care that the ByteProviders are adjacent
8641/// positions of the same vector, and their index matches the byte that is being
8642/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8643/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8644/// byte position we are trying to provide for the LoadCombine. If these do
8645/// not match, then we can not combine the vector loads. \p Index uses the
8646/// byte position we are trying to provide for and is matched against the
8647/// shl and load size. The \p Index algorithm ensures the requested byte is
8648/// provided for by the pattern, and the pattern does not over provide bytes.
8649///
8650///
8651/// The supported LoadCombine pattern for vector loads is as follows
8652/// or
8653/// / \
8654/// or shl
8655/// / \ |
8656/// or shl zext
8657/// / \ | |
8658/// shl zext zext EVE*
8659/// | | | |
8660/// zext EVE* EVE* LOAD
8661/// | | |
8662/// EVE* LOAD LOAD
8663/// |
8664/// LOAD
8665///
8666/// *ExtractVectorElement
8668
8669static std::optional<SDByteProvider>
8671 std::optional<uint64_t> VectorIndex,
8672 unsigned StartingIndex = 0) {
8673
8674 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8675 if (Depth == 10)
8676 return std::nullopt;
8677
8678 // Only allow multiple uses if the instruction is a vector load (in which
8679 // case we will use the load for every ExtractVectorElement)
8680 if (Depth && !Op.hasOneUse() &&
8681 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8682 return std::nullopt;
8683
8684 // Fail to combine if we have encountered anything but a LOAD after handling
8685 // an ExtractVectorElement.
8686 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8687 return std::nullopt;
8688
8689 unsigned BitWidth = Op.getValueSizeInBits();
8690 if (BitWidth % 8 != 0)
8691 return std::nullopt;
8692 unsigned ByteWidth = BitWidth / 8;
8693 assert(Index < ByteWidth && "invalid index requested");
8694 (void) ByteWidth;
8695
8696 switch (Op.getOpcode()) {
8697 case ISD::OR: {
8698 auto LHS =
8699 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8700 if (!LHS)
8701 return std::nullopt;
8702 auto RHS =
8703 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8704 if (!RHS)
8705 return std::nullopt;
8706
8707 if (LHS->isConstantZero())
8708 return RHS;
8709 if (RHS->isConstantZero())
8710 return LHS;
8711 return std::nullopt;
8712 }
8713 case ISD::SHL: {
8714 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8715 if (!ShiftOp)
8716 return std::nullopt;
8717
8718 uint64_t BitShift = ShiftOp->getZExtValue();
8719
8720 if (BitShift % 8 != 0)
8721 return std::nullopt;
8722 uint64_t ByteShift = BitShift / 8;
8723
8724 // If we are shifting by an amount greater than the index we are trying to
8725 // provide, then do not provide anything. Otherwise, subtract the index by
8726 // the amount we shifted by.
8727 return Index < ByteShift
8729 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8730 Depth + 1, VectorIndex, Index);
8731 }
8732 case ISD::ANY_EXTEND:
8733 case ISD::SIGN_EXTEND:
8734 case ISD::ZERO_EXTEND: {
8735 SDValue NarrowOp = Op->getOperand(0);
8736 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8737 if (NarrowBitWidth % 8 != 0)
8738 return std::nullopt;
8739 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8740
8741 if (Index >= NarrowByteWidth)
8742 return Op.getOpcode() == ISD::ZERO_EXTEND
8743 ? std::optional<SDByteProvider>(
8745 : std::nullopt;
8746 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8747 StartingIndex);
8748 }
8749 case ISD::BSWAP:
8750 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8751 Depth + 1, VectorIndex, StartingIndex);
8753 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8754 if (!OffsetOp)
8755 return std::nullopt;
8756
8757 VectorIndex = OffsetOp->getZExtValue();
8758
8759 SDValue NarrowOp = Op->getOperand(0);
8760 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8761 if (NarrowBitWidth % 8 != 0)
8762 return std::nullopt;
8763 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8764 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8765 // type, leaving the high bits undefined.
8766 if (Index >= NarrowByteWidth)
8767 return std::nullopt;
8768
8769 // Check to see if the position of the element in the vector corresponds
8770 // with the byte we are trying to provide for. In the case of a vector of
8771 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8772 // the element will provide a range of bytes. For example, if we have a
8773 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8774 // 3).
8775 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8776 return std::nullopt;
8777 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8778 return std::nullopt;
8779
8780 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8781 VectorIndex, StartingIndex);
8782 }
8783 case ISD::LOAD: {
8784 auto L = cast<LoadSDNode>(Op.getNode());
8785 if (!L->isSimple() || L->isIndexed())
8786 return std::nullopt;
8787
8788 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8789 if (NarrowBitWidth % 8 != 0)
8790 return std::nullopt;
8791 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8792
8793 // If the width of the load does not reach byte we are trying to provide for
8794 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8795 // question
8796 if (Index >= NarrowByteWidth)
8797 return L->getExtensionType() == ISD::ZEXTLOAD
8798 ? std::optional<SDByteProvider>(
8800 : std::nullopt;
8801
8802 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8803 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8804 }
8805 }
8806
8807 return std::nullopt;
8808}
8809
8810static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8811 return i;
8812}
8813
8814static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8815 return BW - i - 1;
8816}
8817
8818// Check if the bytes offsets we are looking at match with either big or
8819// little endian value loaded. Return true for big endian, false for little
8820// endian, and std::nullopt if match failed.
8821static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8822 int64_t FirstOffset) {
8823 // The endian can be decided only when it is 2 bytes at least.
8824 unsigned Width = ByteOffsets.size();
8825 if (Width < 2)
8826 return std::nullopt;
8827
8828 bool BigEndian = true, LittleEndian = true;
8829 for (unsigned i = 0; i < Width; i++) {
8830 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8831 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8832 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8833 if (!BigEndian && !LittleEndian)
8834 return std::nullopt;
8835 }
8836
8837 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8838 "little endian");
8839 return BigEndian;
8840}
8841
8842// Look through one layer of truncate or extend.
8844 switch (Value.getOpcode()) {
8845 case ISD::TRUNCATE:
8846 case ISD::ZERO_EXTEND:
8847 case ISD::SIGN_EXTEND:
8848 case ISD::ANY_EXTEND:
8849 return Value.getOperand(0);
8850 }
8851 return SDValue();
8852}
8853
8854/// Match a pattern where a wide type scalar value is stored by several narrow
8855/// stores. Fold it into a single store or a BSWAP and a store if the targets
8856/// supports it.
8857///
8858/// Assuming little endian target:
8859/// i8 *p = ...
8860/// i32 val = ...
8861/// p[0] = (val >> 0) & 0xFF;
8862/// p[1] = (val >> 8) & 0xFF;
8863/// p[2] = (val >> 16) & 0xFF;
8864/// p[3] = (val >> 24) & 0xFF;
8865/// =>
8866/// *((i32)p) = val;
8867///
8868/// i8 *p = ...
8869/// i32 val = ...
8870/// p[0] = (val >> 24) & 0xFF;
8871/// p[1] = (val >> 16) & 0xFF;
8872/// p[2] = (val >> 8) & 0xFF;
8873/// p[3] = (val >> 0) & 0xFF;
8874/// =>
8875/// *((i32)p) = BSWAP(val);
8876SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8877 // The matching looks for "store (trunc x)" patterns that appear early but are
8878 // likely to be replaced by truncating store nodes during combining.
8879 // TODO: If there is evidence that running this later would help, this
8880 // limitation could be removed. Legality checks may need to be added
8881 // for the created store and optional bswap/rotate.
8882 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8883 return SDValue();
8884
8885 // We only handle merging simple stores of 1-4 bytes.
8886 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8887 EVT MemVT = N->getMemoryVT();
8888 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8889 !N->isSimple() || N->isIndexed())
8890 return SDValue();
8891
8892 // Collect all of the stores in the chain, upto the maximum store width (i64).
8893 SDValue Chain = N->getChain();
8895 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8896 unsigned MaxWideNumBits = 64;
8897 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8898 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8899 // All stores must be the same size to ensure that we are writing all of the
8900 // bytes in the wide value.
8901 // This store should have exactly one use as a chain operand for another
8902 // store in the merging set. If there are other chain uses, then the
8903 // transform may not be safe because order of loads/stores outside of this
8904 // set may not be preserved.
8905 // TODO: We could allow multiple sizes by tracking each stored byte.
8906 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8907 Store->isIndexed() || !Store->hasOneUse())
8908 return SDValue();
8909 Stores.push_back(Store);
8910 Chain = Store->getChain();
8911 if (MaxStores < Stores.size())
8912 return SDValue();
8913 }
8914 // There is no reason to continue if we do not have at least a pair of stores.
8915 if (Stores.size() < 2)
8916 return SDValue();
8917
8918 // Handle simple types only.
8919 LLVMContext &Context = *DAG.getContext();
8920 unsigned NumStores = Stores.size();
8921 unsigned WideNumBits = NumStores * NarrowNumBits;
8922 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8923 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8924 return SDValue();
8925
8926 // Check if all bytes of the source value that we are looking at are stored
8927 // to the same base address. Collect offsets from Base address into OffsetMap.
8928 SDValue SourceValue;
8929 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8930 int64_t FirstOffset = INT64_MAX;
8931 StoreSDNode *FirstStore = nullptr;
8932 std::optional<BaseIndexOffset> Base;
8933 for (auto *Store : Stores) {
8934 // All the stores store different parts of the CombinedValue. A truncate is
8935 // required to get the partial value.
8936 SDValue Trunc = Store->getValue();
8937 if (Trunc.getOpcode() != ISD::TRUNCATE)
8938 return SDValue();
8939 // Other than the first/last part, a shift operation is required to get the
8940 // offset.
8941 int64_t Offset = 0;
8942 SDValue WideVal = Trunc.getOperand(0);
8943 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8944 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8945 // The shift amount must be a constant multiple of the narrow type.
8946 // It is translated to the offset address in the wide source value "y".
8947 //
8948 // x = srl y, ShiftAmtC
8949 // i8 z = trunc x
8950 // store z, ...
8951 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8952 if (ShiftAmtC % NarrowNumBits != 0)
8953 return SDValue();
8954
8955 // Make sure we aren't reading bits that are shifted in.
8956 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
8957 return SDValue();
8958
8959 Offset = ShiftAmtC / NarrowNumBits;
8960 WideVal = WideVal.getOperand(0);
8961 }
8962
8963 // Stores must share the same source value with different offsets.
8964 if (!SourceValue)
8965 SourceValue = WideVal;
8966 else if (SourceValue != WideVal) {
8967 // Truncate and extends can be stripped to see if the values are related.
8968 if (stripTruncAndExt(SourceValue) != WideVal &&
8969 stripTruncAndExt(WideVal) != SourceValue)
8970 return SDValue();
8971
8972 if (WideVal.getScalarValueSizeInBits() >
8973 SourceValue.getScalarValueSizeInBits())
8974 SourceValue = WideVal;
8975
8976 // Give up if the source value type is smaller than the store size.
8977 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8978 return SDValue();
8979 }
8980
8981 // Stores must share the same base address.
8983 int64_t ByteOffsetFromBase = 0;
8984 if (!Base)
8985 Base = Ptr;
8986 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8987 return SDValue();
8988
8989 // Remember the first store.
8990 if (ByteOffsetFromBase < FirstOffset) {
8991 FirstStore = Store;
8992 FirstOffset = ByteOffsetFromBase;
8993 }
8994 // Map the offset in the store and the offset in the combined value, and
8995 // early return if it has been set before.
8996 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8997 return SDValue();
8998 OffsetMap[Offset] = ByteOffsetFromBase;
8999 }
9000
9001 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9002 assert(FirstStore && "First store must be set");
9003
9004 // Check that a store of the wide type is both allowed and fast on the target
9005 const DataLayout &Layout = DAG.getDataLayout();
9006 unsigned Fast = 0;
9007 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9008 *FirstStore->getMemOperand(), &Fast);
9009 if (!Allowed || !Fast)
9010 return SDValue();
9011
9012 // Check if the pieces of the value are going to the expected places in memory
9013 // to merge the stores.
9014 auto checkOffsets = [&](bool MatchLittleEndian) {
9015 if (MatchLittleEndian) {
9016 for (unsigned i = 0; i != NumStores; ++i)
9017 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9018 return false;
9019 } else { // MatchBigEndian by reversing loop counter.
9020 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9021 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9022 return false;
9023 }
9024 return true;
9025 };
9026
9027 // Check if the offsets line up for the native data layout of this target.
9028 bool NeedBswap = false;
9029 bool NeedRotate = false;
9030 if (!checkOffsets(Layout.isLittleEndian())) {
9031 // Special-case: check if byte offsets line up for the opposite endian.
9032 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9033 NeedBswap = true;
9034 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9035 NeedRotate = true;
9036 else
9037 return SDValue();
9038 }
9039
9040 SDLoc DL(N);
9041 if (WideVT != SourceValue.getValueType()) {
9042 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9043 "Unexpected store value to merge");
9044 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9045 }
9046
9047 // Before legalize we can introduce illegal bswaps/rotates which will be later
9048 // converted to an explicit bswap sequence. This way we end up with a single
9049 // store and byte shuffling instead of several stores and byte shuffling.
9050 if (NeedBswap) {
9051 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9052 } else if (NeedRotate) {
9053 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9054 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9055 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9056 }
9057
9058 SDValue NewStore =
9059 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9060 FirstStore->getPointerInfo(), FirstStore->getAlign());
9061
9062 // Rely on other DAG combine rules to remove the other individual stores.
9063 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9064 return NewStore;
9065}
9066
9067/// Match a pattern where a wide type scalar value is loaded by several narrow
9068/// loads and combined by shifts and ors. Fold it into a single load or a load
9069/// and a BSWAP if the targets supports it.
9070///
9071/// Assuming little endian target:
9072/// i8 *a = ...
9073/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9074/// =>
9075/// i32 val = *((i32)a)
9076///
9077/// i8 *a = ...
9078/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9079/// =>
9080/// i32 val = BSWAP(*((i32)a))
9081///
9082/// TODO: This rule matches complex patterns with OR node roots and doesn't
9083/// interact well with the worklist mechanism. When a part of the pattern is
9084/// updated (e.g. one of the loads) its direct users are put into the worklist,
9085/// but the root node of the pattern which triggers the load combine is not
9086/// necessarily a direct user of the changed node. For example, once the address
9087/// of t28 load is reassociated load combine won't be triggered:
9088/// t25: i32 = add t4, Constant:i32<2>
9089/// t26: i64 = sign_extend t25
9090/// t27: i64 = add t2, t26
9091/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9092/// t29: i32 = zero_extend t28
9093/// t32: i32 = shl t29, Constant:i8<8>
9094/// t33: i32 = or t23, t32
9095/// As a possible fix visitLoad can check if the load can be a part of a load
9096/// combine pattern and add corresponding OR roots to the worklist.
9097SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9098 assert(N->getOpcode() == ISD::OR &&
9099 "Can only match load combining against OR nodes");
9100
9101 // Handles simple types only
9102 EVT VT = N->getValueType(0);
9103 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9104 return SDValue();
9105 unsigned ByteWidth = VT.getSizeInBits() / 8;
9106
9107 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9108 auto MemoryByteOffset = [&](SDByteProvider P) {
9109 assert(P.hasSrc() && "Must be a memory byte provider");
9110 auto *Load = cast<LoadSDNode>(P.Src.value());
9111
9112 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9113
9114 assert(LoadBitWidth % 8 == 0 &&
9115 "can only analyze providers for individual bytes not bit");
9116 unsigned LoadByteWidth = LoadBitWidth / 8;
9117 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9118 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9119 };
9120
9121 std::optional<BaseIndexOffset> Base;
9122 SDValue Chain;
9123
9125 std::optional<SDByteProvider> FirstByteProvider;
9126 int64_t FirstOffset = INT64_MAX;
9127
9128 // Check if all the bytes of the OR we are looking at are loaded from the same
9129 // base address. Collect bytes offsets from Base address in ByteOffsets.
9130 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9131 unsigned ZeroExtendedBytes = 0;
9132 for (int i = ByteWidth - 1; i >= 0; --i) {
9133 auto P =
9134 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9135 /*StartingIndex*/ i);
9136 if (!P)
9137 return SDValue();
9138
9139 if (P->isConstantZero()) {
9140 // It's OK for the N most significant bytes to be 0, we can just
9141 // zero-extend the load.
9142 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9143 return SDValue();
9144 continue;
9145 }
9146 assert(P->hasSrc() && "provenance should either be memory or zero");
9147 auto *L = cast<LoadSDNode>(P->Src.value());
9148
9149 // All loads must share the same chain
9150 SDValue LChain = L->getChain();
9151 if (!Chain)
9152 Chain = LChain;
9153 else if (Chain != LChain)
9154 return SDValue();
9155
9156 // Loads must share the same base address
9158 int64_t ByteOffsetFromBase = 0;
9159
9160 // For vector loads, the expected load combine pattern will have an
9161 // ExtractElement for each index in the vector. While each of these
9162 // ExtractElements will be accessing the same base address as determined
9163 // by the load instruction, the actual bytes they interact with will differ
9164 // due to different ExtractElement indices. To accurately determine the
9165 // byte position of an ExtractElement, we offset the base load ptr with
9166 // the index multiplied by the byte size of each element in the vector.
9167 if (L->getMemoryVT().isVector()) {
9168 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9169 if (LoadWidthInBit % 8 != 0)
9170 return SDValue();
9171 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9172 Ptr.addToOffset(ByteOffsetFromVector);
9173 }
9174
9175 if (!Base)
9176 Base = Ptr;
9177
9178 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9179 return SDValue();
9180
9181 // Calculate the offset of the current byte from the base address
9182 ByteOffsetFromBase += MemoryByteOffset(*P);
9183 ByteOffsets[i] = ByteOffsetFromBase;
9184
9185 // Remember the first byte load
9186 if (ByteOffsetFromBase < FirstOffset) {
9187 FirstByteProvider = P;
9188 FirstOffset = ByteOffsetFromBase;
9189 }
9190
9191 Loads.insert(L);
9192 }
9193
9194 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9195 "memory, so there must be at least one load which produces the value");
9196 assert(Base && "Base address of the accessed memory location must be set");
9197 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9198
9199 bool NeedsZext = ZeroExtendedBytes > 0;
9200
9201 EVT MemVT =
9202 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9203
9204 if (!MemVT.isSimple())
9205 return SDValue();
9206
9207 // Before legalize we can introduce too wide illegal loads which will be later
9208 // split into legal sized loads. This enables us to combine i64 load by i8
9209 // patterns to a couple of i32 loads on 32 bit targets.
9210 if (LegalOperations &&
9212 MemVT))
9213 return SDValue();
9214
9215 // Check if the bytes of the OR we are looking at match with either big or
9216 // little endian value load
9217 std::optional<bool> IsBigEndian = isBigEndian(
9218 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9219 if (!IsBigEndian)
9220 return SDValue();
9221
9222 assert(FirstByteProvider && "must be set");
9223
9224 // Ensure that the first byte is loaded from zero offset of the first load.
9225 // So the combined value can be loaded from the first load address.
9226 if (MemoryByteOffset(*FirstByteProvider) != 0)
9227 return SDValue();
9228 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9229
9230 // The node we are looking at matches with the pattern, check if we can
9231 // replace it with a single (possibly zero-extended) load and bswap + shift if
9232 // needed.
9233
9234 // If the load needs byte swap check if the target supports it
9235 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9236
9237 // Before legalize we can introduce illegal bswaps which will be later
9238 // converted to an explicit bswap sequence. This way we end up with a single
9239 // load and byte shuffling instead of several loads and byte shuffling.
9240 // We do not introduce illegal bswaps when zero-extending as this tends to
9241 // introduce too many arithmetic instructions.
9242 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9243 !TLI.isOperationLegal(ISD::BSWAP, VT))
9244 return SDValue();
9245
9246 // If we need to bswap and zero extend, we have to insert a shift. Check that
9247 // it is legal.
9248 if (NeedsBswap && NeedsZext && LegalOperations &&
9249 !TLI.isOperationLegal(ISD::SHL, VT))
9250 return SDValue();
9251
9252 // Check that a load of the wide type is both allowed and fast on the target
9253 unsigned Fast = 0;
9254 bool Allowed =
9255 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9256 *FirstLoad->getMemOperand(), &Fast);
9257 if (!Allowed || !Fast)
9258 return SDValue();
9259
9260 SDValue NewLoad =
9261 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9262 Chain, FirstLoad->getBasePtr(),
9263 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9264
9265 // Transfer chain users from old loads to the new load.
9266 for (LoadSDNode *L : Loads)
9267 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9268
9269 if (!NeedsBswap)
9270 return NewLoad;
9271
9272 SDValue ShiftedLoad =
9273 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9274 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9275 VT, SDLoc(N)))
9276 : NewLoad;
9277 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9278}
9279
9280// If the target has andn, bsl, or a similar bit-select instruction,
9281// we want to unfold masked merge, with canonical pattern of:
9282// | A | |B|
9283// ((x ^ y) & m) ^ y
9284// | D |
9285// Into:
9286// (x & m) | (y & ~m)
9287// If y is a constant, m is not a 'not', and the 'andn' does not work with
9288// immediates, we unfold into a different pattern:
9289// ~(~x & m) & (m | y)
9290// If x is a constant, m is a 'not', and the 'andn' does not work with
9291// immediates, we unfold into a different pattern:
9292// (x | ~m) & ~(~m & ~y)
9293// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9294// the very least that breaks andnpd / andnps patterns, and because those
9295// patterns are simplified in IR and shouldn't be created in the DAG
9296SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9297 assert(N->getOpcode() == ISD::XOR);
9298
9299 // Don't touch 'not' (i.e. where y = -1).
9300 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9301 return SDValue();
9302
9303 EVT VT = N->getValueType(0);
9304
9305 // There are 3 commutable operators in the pattern,
9306 // so we have to deal with 8 possible variants of the basic pattern.
9307 SDValue X, Y, M;
9308 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9309 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9310 return false;
9311 SDValue Xor = And.getOperand(XorIdx);
9312 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9313 return false;
9314 SDValue Xor0 = Xor.getOperand(0);
9315 SDValue Xor1 = Xor.getOperand(1);
9316 // Don't touch 'not' (i.e. where y = -1).
9317 if (isAllOnesOrAllOnesSplat(Xor1))
9318 return false;
9319 if (Other == Xor0)
9320 std::swap(Xor0, Xor1);
9321 if (Other != Xor1)
9322 return false;
9323 X = Xor0;
9324 Y = Xor1;
9325 M = And.getOperand(XorIdx ? 0 : 1);
9326 return true;
9327 };
9328
9329 SDValue N0 = N->getOperand(0);
9330 SDValue N1 = N->getOperand(1);
9331 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9332 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9333 return SDValue();
9334
9335 // Don't do anything if the mask is constant. This should not be reachable.
9336 // InstCombine should have already unfolded this pattern, and DAGCombiner
9337 // probably shouldn't produce it, too.
9338 if (isa<ConstantSDNode>(M.getNode()))
9339 return SDValue();
9340
9341 // We can transform if the target has AndNot
9342 if (!TLI.hasAndNot(M))
9343 return SDValue();
9344
9345 SDLoc DL(N);
9346
9347 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9348 // a bitwise not that would already allow ANDN to be used.
9349 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9350 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9351 // If not, we need to do a bit more work to make sure andn is still used.
9352 SDValue NotX = DAG.getNOT(DL, X, VT);
9353 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9354 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9355 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9356 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9357 }
9358
9359 // If X is a constant and M is a bitwise not, check that 'andn' works with
9360 // immediates.
9361 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9362 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9363 // If not, we need to do a bit more work to make sure andn is still used.
9364 SDValue NotM = M.getOperand(0);
9365 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9366 SDValue NotY = DAG.getNOT(DL, Y, VT);
9367 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9368 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9369 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9370 }
9371
9372 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9373 SDValue NotM = DAG.getNOT(DL, M, VT);
9374 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9375
9376 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9377}
9378
9379SDValue DAGCombiner::visitXOR(SDNode *N) {
9380 SDValue N0 = N->getOperand(0);
9381 SDValue N1 = N->getOperand(1);
9382 EVT VT = N0.getValueType();
9383 SDLoc DL(N);
9384
9385 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9386 if (N0.isUndef() && N1.isUndef())
9387 return DAG.getConstant(0, DL, VT);
9388
9389 // fold (xor x, undef) -> undef
9390 if (N0.isUndef())
9391 return N0;
9392 if (N1.isUndef())
9393 return N1;
9394
9395 // fold (xor c1, c2) -> c1^c2
9396 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9397 return C;
9398
9399 // canonicalize constant to RHS
9402 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9403
9404 // fold vector ops
9405 if (VT.isVector()) {
9406 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9407 return FoldedVOp;
9408
9409 // fold (xor x, 0) -> x, vector edition
9411 return N0;
9412 }
9413
9414 // fold (xor x, 0) -> x
9415 if (isNullConstant(N1))
9416 return N0;
9417
9418 if (SDValue NewSel = foldBinOpIntoSelect(N))
9419 return NewSel;
9420
9421 // reassociate xor
9422 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9423 return RXOR;
9424
9425 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9426 if (SDValue SD =
9427 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9428 return SD;
9429
9430 // fold (a^b) -> (a|b) iff a and b share no bits.
9431 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9432 DAG.haveNoCommonBitsSet(N0, N1)) {
9434 Flags.setDisjoint(true);
9435 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9436 }
9437
9438 // look for 'add-like' folds:
9439 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9440 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9442 if (SDValue Combined = visitADDLike(N))
9443 return Combined;
9444
9445 // fold !(x cc y) -> (x !cc y)
9446 unsigned N0Opcode = N0.getOpcode();
9447 SDValue LHS, RHS, CC;
9448 if (TLI.isConstTrueVal(N1) &&
9449 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9450 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9451 LHS.getValueType());
9452 if (!LegalOperations ||
9453 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9454 switch (N0Opcode) {
9455 default:
9456 llvm_unreachable("Unhandled SetCC Equivalent!");
9457 case ISD::SETCC:
9458 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9459 case ISD::SELECT_CC:
9460 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9461 N0.getOperand(3), NotCC);
9462 case ISD::STRICT_FSETCC:
9463 case ISD::STRICT_FSETCCS: {
9464 if (N0.hasOneUse()) {
9465 // FIXME Can we handle multiple uses? Could we token factor the chain
9466 // results from the new/old setcc?
9467 SDValue SetCC =
9468 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9469 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9470 CombineTo(N, SetCC);
9471 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9472 recursivelyDeleteUnusedNodes(N0.getNode());
9473 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9474 }
9475 break;
9476 }
9477 }
9478 }
9479 }
9480
9481 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9482 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9483 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9484 SDValue V = N0.getOperand(0);
9485 SDLoc DL0(N0);
9486 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9487 DAG.getConstant(1, DL0, V.getValueType()));
9488 AddToWorklist(V.getNode());
9489 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9490 }
9491
9492 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9493 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9494 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9495 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9496 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9497 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9498 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9499 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9500 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9501 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9502 }
9503 }
9504 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9505 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9506 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9507 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9508 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9509 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9510 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9511 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9512 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9513 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9514 }
9515 }
9516
9517 // fold (not (neg x)) -> (add X, -1)
9518 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9519 // Y is a constant or the subtract has a single use.
9520 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9521 isNullConstant(N0.getOperand(0))) {
9522 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9523 DAG.getAllOnesConstant(DL, VT));
9524 }
9525
9526 // fold (not (add X, -1)) -> (neg X)
9527 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9529 return DAG.getNegative(N0.getOperand(0), DL, VT);
9530 }
9531
9532 // fold (xor (and x, y), y) -> (and (not x), y)
9533 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9534 SDValue X = N0.getOperand(0);
9535 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9536 AddToWorklist(NotX.getNode());
9537 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9538 }
9539
9540 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9541 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9542 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9543 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9544 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9545 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9546 SDValue S0 = S.getOperand(0);
9547 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9549 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9550 return DAG.getNode(ISD::ABS, DL, VT, S0);
9551 }
9552 }
9553
9554 // fold (xor x, x) -> 0
9555 if (N0 == N1)
9556 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9557
9558 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9559 // Here is a concrete example of this equivalence:
9560 // i16 x == 14
9561 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9562 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9563 //
9564 // =>
9565 //
9566 // i16 ~1 == 0b1111111111111110
9567 // i16 rol(~1, 14) == 0b1011111111111111
9568 //
9569 // Some additional tips to help conceptualize this transform:
9570 // - Try to see the operation as placing a single zero in a value of all ones.
9571 // - There exists no value for x which would allow the result to contain zero.
9572 // - Values of x larger than the bitwidth are undefined and do not require a
9573 // consistent result.
9574 // - Pushing the zero left requires shifting one bits in from the right.
9575 // A rotate left of ~1 is a nice way of achieving the desired result.
9576 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9578 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9579 N0.getOperand(1));
9580 }
9581
9582 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9583 if (N0Opcode == N1.getOpcode())
9584 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9585 return V;
9586
9587 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9588 return R;
9589 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9590 return R;
9591 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9592 return R;
9593
9594 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9595 if (SDValue MM = unfoldMaskedMerge(N))
9596 return MM;
9597
9598 // Simplify the expression using non-local knowledge.
9600 return SDValue(N, 0);
9601
9602 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9603 return Combined;
9604
9605 return SDValue();
9606}
9607
9608/// If we have a shift-by-constant of a bitwise logic op that itself has a
9609/// shift-by-constant operand with identical opcode, we may be able to convert
9610/// that into 2 independent shifts followed by the logic op. This is a
9611/// throughput improvement.
9613 // Match a one-use bitwise logic op.
9614 SDValue LogicOp = Shift->getOperand(0);
9615 if (!LogicOp.hasOneUse())
9616 return SDValue();
9617
9618 unsigned LogicOpcode = LogicOp.getOpcode();
9619 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9620 LogicOpcode != ISD::XOR)
9621 return SDValue();
9622
9623 // Find a matching one-use shift by constant.
9624 unsigned ShiftOpcode = Shift->getOpcode();
9625 SDValue C1 = Shift->getOperand(1);
9626 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9627 assert(C1Node && "Expected a shift with constant operand");
9628 const APInt &C1Val = C1Node->getAPIntValue();
9629 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9630 const APInt *&ShiftAmtVal) {
9631 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9632 return false;
9633
9634 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9635 if (!ShiftCNode)
9636 return false;
9637
9638 // Capture the shifted operand and shift amount value.
9639 ShiftOp = V.getOperand(0);
9640 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9641
9642 // Shift amount types do not have to match their operand type, so check that
9643 // the constants are the same width.
9644 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9645 return false;
9646
9647 // The fold is not valid if the sum of the shift values doesn't fit in the
9648 // given shift amount type.
9649 bool Overflow = false;
9650 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9651 if (Overflow)
9652 return false;
9653
9654 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9655 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9656 return false;
9657
9658 return true;
9659 };
9660
9661 // Logic ops are commutative, so check each operand for a match.
9662 SDValue X, Y;
9663 const APInt *C0Val;
9664 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9665 Y = LogicOp.getOperand(1);
9666 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9667 Y = LogicOp.getOperand(0);
9668 else
9669 return SDValue();
9670
9671 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9672 SDLoc DL(Shift);
9673 EVT VT = Shift->getValueType(0);
9674 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9675 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9676 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9677 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9678 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9679 LogicOp->getFlags());
9680}
9681
9682/// Handle transforms common to the three shifts, when the shift amount is a
9683/// constant.
9684/// We are looking for: (shift being one of shl/sra/srl)
9685/// shift (binop X, C0), C1
9686/// And want to transform into:
9687/// binop (shift X, C1), (shift C0, C1)
9688SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9689 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9690
9691 // Do not turn a 'not' into a regular xor.
9692 if (isBitwiseNot(N->getOperand(0)))
9693 return SDValue();
9694
9695 // The inner binop must be one-use, since we want to replace it.
9696 SDValue LHS = N->getOperand(0);
9697 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9698 return SDValue();
9699
9700 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9701 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9702 return R;
9703
9704 // We want to pull some binops through shifts, so that we have (and (shift))
9705 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9706 // thing happens with address calculations, so it's important to canonicalize
9707 // it.
9708 switch (LHS.getOpcode()) {
9709 default:
9710 return SDValue();
9711 case ISD::OR:
9712 case ISD::XOR:
9713 case ISD::AND:
9714 break;
9715 case ISD::ADD:
9716 if (N->getOpcode() != ISD::SHL)
9717 return SDValue(); // only shl(add) not sr[al](add).
9718 break;
9719 }
9720
9721 // FIXME: disable this unless the input to the binop is a shift by a constant
9722 // or is copy/select. Enable this in other cases when figure out it's exactly
9723 // profitable.
9724 SDValue BinOpLHSVal = LHS.getOperand(0);
9725 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9726 BinOpLHSVal.getOpcode() == ISD::SRA ||
9727 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9728 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9729 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9730 BinOpLHSVal.getOpcode() == ISD::SELECT;
9731
9732 if (!IsShiftByConstant && !IsCopyOrSelect)
9733 return SDValue();
9734
9735 if (IsCopyOrSelect && N->hasOneUse())
9736 return SDValue();
9737
9738 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9739 SDLoc DL(N);
9740 EVT VT = N->getValueType(0);
9741 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9742 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9743 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9744 N->getOperand(1));
9745 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9746 }
9747
9748 return SDValue();
9749}
9750
9751SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9752 assert(N->getOpcode() == ISD::TRUNCATE);
9753 assert(N->getOperand(0).getOpcode() == ISD::AND);
9754
9755 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9756 EVT TruncVT = N->getValueType(0);
9757 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9758 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9759 SDValue N01 = N->getOperand(0).getOperand(1);
9760 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9761 SDLoc DL(N);
9762 SDValue N00 = N->getOperand(0).getOperand(0);
9763 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9764 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9765 AddToWorklist(Trunc00.getNode());
9766 AddToWorklist(Trunc01.getNode());
9767 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9768 }
9769 }
9770
9771 return SDValue();
9772}
9773
9774SDValue DAGCombiner::visitRotate(SDNode *N) {
9775 SDLoc dl(N);
9776 SDValue N0 = N->getOperand(0);
9777 SDValue N1 = N->getOperand(1);
9778 EVT VT = N->getValueType(0);
9779 unsigned Bitsize = VT.getScalarSizeInBits();
9780
9781 // fold (rot x, 0) -> x
9782 if (isNullOrNullSplat(N1))
9783 return N0;
9784
9785 // fold (rot x, c) -> x iff (c % BitSize) == 0
9786 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9787 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9788 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9789 return N0;
9790 }
9791
9792 // fold (rot x, c) -> (rot x, c % BitSize)
9793 bool OutOfRange = false;
9794 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9795 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9796 return true;
9797 };
9798 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9799 EVT AmtVT = N1.getValueType();
9800 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9801 if (SDValue Amt =
9802 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9803 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9804 }
9805
9806 // rot i16 X, 8 --> bswap X
9807 auto *RotAmtC = isConstOrConstSplat(N1);
9808 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9809 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9810 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9811
9812 // Simplify the operands using demanded-bits information.
9814 return SDValue(N, 0);
9815
9816 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9817 if (N1.getOpcode() == ISD::TRUNCATE &&
9818 N1.getOperand(0).getOpcode() == ISD::AND) {
9819 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9820 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9821 }
9822
9823 unsigned NextOp = N0.getOpcode();
9824
9825 // fold (rot* (rot* x, c2), c1)
9826 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9827 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9830 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9831 EVT ShiftVT = C1->getValueType(0);
9832 bool SameSide = (N->getOpcode() == NextOp);
9833 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9834 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9835 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9836 {N1, BitsizeC});
9837 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9838 {N0.getOperand(1), BitsizeC});
9839 if (Norm1 && Norm2)
9840 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9841 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9842 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9843 {CombinedShift, BitsizeC});
9844 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9845 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9846 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9847 CombinedShiftNorm);
9848 }
9849 }
9850 }
9851 return SDValue();
9852}
9853
9854SDValue DAGCombiner::visitSHL(SDNode *N) {
9855 SDValue N0 = N->getOperand(0);
9856 SDValue N1 = N->getOperand(1);
9857 if (SDValue V = DAG.simplifyShift(N0, N1))
9858 return V;
9859
9860 SDLoc DL(N);
9861 EVT VT = N0.getValueType();
9862 EVT ShiftVT = N1.getValueType();
9863 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9864
9865 // fold (shl c1, c2) -> c1<<c2
9866 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
9867 return C;
9868
9869 // fold vector ops
9870 if (VT.isVector()) {
9871 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9872 return FoldedVOp;
9873
9874 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9875 // If setcc produces all-one true value then:
9876 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9877 if (N1CV && N1CV->isConstant()) {
9878 if (N0.getOpcode() == ISD::AND) {
9879 SDValue N00 = N0->getOperand(0);
9880 SDValue N01 = N0->getOperand(1);
9881 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9882
9883 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9886 if (SDValue C =
9887 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
9888 return DAG.getNode(ISD::AND, DL, VT, N00, C);
9889 }
9890 }
9891 }
9892 }
9893
9894 if (SDValue NewSel = foldBinOpIntoSelect(N))
9895 return NewSel;
9896
9897 // if (shl x, c) is known to be zero, return 0
9898 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9899 return DAG.getConstant(0, DL, VT);
9900
9901 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9902 if (N1.getOpcode() == ISD::TRUNCATE &&
9903 N1.getOperand(0).getOpcode() == ISD::AND) {
9904 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9905 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
9906 }
9907
9908 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9909 if (N0.getOpcode() == ISD::SHL) {
9910 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9912 APInt c1 = LHS->getAPIntValue();
9913 APInt c2 = RHS->getAPIntValue();
9914 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9915 return (c1 + c2).uge(OpSizeInBits);
9916 };
9917 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9918 return DAG.getConstant(0, DL, VT);
9919
9920 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9922 APInt c1 = LHS->getAPIntValue();
9923 APInt c2 = RHS->getAPIntValue();
9924 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9925 return (c1 + c2).ult(OpSizeInBits);
9926 };
9927 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9928 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9929 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9930 }
9931 }
9932
9933 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9934 // For this to be valid, the second form must not preserve any of the bits
9935 // that are shifted out by the inner shift in the first form. This means
9936 // the outer shift size must be >= the number of bits added by the ext.
9937 // As a corollary, we don't care what kind of ext it is.
9938 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9939 N0.getOpcode() == ISD::ANY_EXTEND ||
9940 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9941 N0.getOperand(0).getOpcode() == ISD::SHL) {
9942 SDValue N0Op0 = N0.getOperand(0);
9943 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9944 EVT InnerVT = N0Op0.getValueType();
9945 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9946
9947 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9949 APInt c1 = LHS->getAPIntValue();
9950 APInt c2 = RHS->getAPIntValue();
9951 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9952 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9953 (c1 + c2).uge(OpSizeInBits);
9954 };
9955 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9956 /*AllowUndefs*/ false,
9957 /*AllowTypeMismatch*/ true))
9958 return DAG.getConstant(0, DL, VT);
9959
9960 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9962 APInt c1 = LHS->getAPIntValue();
9963 APInt c2 = RHS->getAPIntValue();
9964 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9965 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9966 (c1 + c2).ult(OpSizeInBits);
9967 };
9968 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9969 /*AllowUndefs*/ false,
9970 /*AllowTypeMismatch*/ true)) {
9971 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9972 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9973 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9974 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9975 }
9976 }
9977
9978 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9979 // Only fold this if the inner zext has no other uses to avoid increasing
9980 // the total number of instructions.
9981 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9982 N0.getOperand(0).getOpcode() == ISD::SRL) {
9983 SDValue N0Op0 = N0.getOperand(0);
9984 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9985
9986 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9987 APInt c1 = LHS->getAPIntValue();
9988 APInt c2 = RHS->getAPIntValue();
9989 zeroExtendToMatch(c1, c2);
9990 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9991 };
9992 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9993 /*AllowUndefs*/ false,
9994 /*AllowTypeMismatch*/ true)) {
9995 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9996 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9997 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9998 AddToWorklist(NewSHL.getNode());
9999 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10000 }
10001 }
10002
10003 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10004 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10006 const APInt &LHSC = LHS->getAPIntValue();
10007 const APInt &RHSC = RHS->getAPIntValue();
10008 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10009 LHSC.getZExtValue() <= RHSC.getZExtValue();
10010 };
10011
10012 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10013 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10014 if (N0->getFlags().hasExact()) {
10015 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10016 /*AllowUndefs*/ false,
10017 /*AllowTypeMismatch*/ true)) {
10018 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10019 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10020 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10021 }
10022 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10023 /*AllowUndefs*/ false,
10024 /*AllowTypeMismatch*/ true)) {
10025 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10026 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10027 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10028 }
10029 }
10030
10031 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10032 // (and (srl x, (sub c1, c2), MASK)
10033 // Only fold this if the inner shift has no other uses -- if it does,
10034 // folding this will increase the total number of instructions.
10035 if (N0.getOpcode() == ISD::SRL &&
10036 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10038 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10039 /*AllowUndefs*/ false,
10040 /*AllowTypeMismatch*/ true)) {
10041 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10042 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10043 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10044 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10045 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10046 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10047 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10048 }
10049 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10050 /*AllowUndefs*/ false,
10051 /*AllowTypeMismatch*/ true)) {
10052 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10053 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10054 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10055 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10056 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10057 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10058 }
10059 }
10060 }
10061
10062 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10063 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10064 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10065 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10066 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10067 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10068 }
10069
10070 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10071 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10072 // Variant of version done on multiply, except mul by a power of 2 is turned
10073 // into a shift.
10074 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10075 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10076 SDValue N01 = N0.getOperand(1);
10077 if (SDValue Shl1 =
10078 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10079 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10080 AddToWorklist(Shl0.getNode());
10082 // Preserve the disjoint flag for Or.
10083 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10084 Flags.setDisjoint(true);
10085 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10086 }
10087 }
10088
10089 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10090 // TODO: Add zext/add_nuw variant with suitable test coverage
10091 // TODO: Should we limit this with isLegalAddImmediate?
10092 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10093 N0.getOperand(0).getOpcode() == ISD::ADD &&
10094 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10095 N0.getOperand(0)->hasOneUse() &&
10096 TLI.isDesirableToCommuteWithShift(N, Level)) {
10097 SDValue Add = N0.getOperand(0);
10098 SDLoc DL(N0);
10099 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10100 {Add.getOperand(1)})) {
10101 if (SDValue ShlC =
10102 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10103 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10104 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10105 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10106 }
10107 }
10108 }
10109
10110 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10111 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10112 SDValue N01 = N0.getOperand(1);
10113 if (SDValue Shl =
10114 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10115 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10116 }
10117
10119 if (N1C && !N1C->isOpaque())
10120 if (SDValue NewSHL = visitShiftByConstant(N))
10121 return NewSHL;
10122
10123 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10124 // target.
10125 if (((N1.getOpcode() == ISD::CTTZ &&
10126 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10127 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10128 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10130 SDValue Y = N1.getOperand(0);
10131 SDLoc DL(N);
10132 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10133 SDValue And =
10134 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10135 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10136 }
10137
10139 return SDValue(N, 0);
10140
10141 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10142 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10143 const APInt &C0 = N0.getConstantOperandAPInt(0);
10144 const APInt &C1 = N1C->getAPIntValue();
10145 return DAG.getVScale(DL, VT, C0 << C1);
10146 }
10147
10148 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10149 APInt ShlVal;
10150 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10151 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10152 const APInt &C0 = N0.getConstantOperandAPInt(0);
10153 if (ShlVal.ult(C0.getBitWidth())) {
10154 APInt NewStep = C0 << ShlVal;
10155 return DAG.getStepVector(DL, VT, NewStep);
10156 }
10157 }
10158
10159 return SDValue();
10160}
10161
10162// Transform a right shift of a multiply into a multiply-high.
10163// Examples:
10164// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10165// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10167 const TargetLowering &TLI) {
10168 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10169 "SRL or SRA node is required here!");
10170
10171 // Check the shift amount. Proceed with the transformation if the shift
10172 // amount is constant.
10173 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10174 if (!ShiftAmtSrc)
10175 return SDValue();
10176
10177 // The operation feeding into the shift must be a multiply.
10178 SDValue ShiftOperand = N->getOperand(0);
10179 if (ShiftOperand.getOpcode() != ISD::MUL)
10180 return SDValue();
10181
10182 // Both operands must be equivalent extend nodes.
10183 SDValue LeftOp = ShiftOperand.getOperand(0);
10184 SDValue RightOp = ShiftOperand.getOperand(1);
10185
10186 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10187 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10188
10189 if (!IsSignExt && !IsZeroExt)
10190 return SDValue();
10191
10192 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10193 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10194
10195 // return true if U may use the lower bits of its operands
10196 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10197 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10198 return true;
10199 }
10200 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10201 if (!UShiftAmtSrc) {
10202 return true;
10203 }
10204 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10205 return UShiftAmt < NarrowVTSize;
10206 };
10207
10208 // If the lower part of the MUL is also used and MUL_LOHI is supported
10209 // do not introduce the MULH in favor of MUL_LOHI
10210 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10211 if (!ShiftOperand.hasOneUse() &&
10212 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10213 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10214 return SDValue();
10215 }
10216
10217 SDValue MulhRightOp;
10219 unsigned ActiveBits = IsSignExt
10220 ? Constant->getAPIntValue().getSignificantBits()
10221 : Constant->getAPIntValue().getActiveBits();
10222 if (ActiveBits > NarrowVTSize)
10223 return SDValue();
10224 MulhRightOp = DAG.getConstant(
10225 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10226 NarrowVT);
10227 } else {
10228 if (LeftOp.getOpcode() != RightOp.getOpcode())
10229 return SDValue();
10230 // Check that the two extend nodes are the same type.
10231 if (NarrowVT != RightOp.getOperand(0).getValueType())
10232 return SDValue();
10233 MulhRightOp = RightOp.getOperand(0);
10234 }
10235
10236 EVT WideVT = LeftOp.getValueType();
10237 // Proceed with the transformation if the wide types match.
10238 assert((WideVT == RightOp.getValueType()) &&
10239 "Cannot have a multiply node with two different operand types.");
10240
10241 // Proceed with the transformation if the wide type is twice as large
10242 // as the narrow type.
10243 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10244 return SDValue();
10245
10246 // Check the shift amount with the narrow type size.
10247 // Proceed with the transformation if the shift amount is the width
10248 // of the narrow type.
10249 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10250 if (ShiftAmt != NarrowVTSize)
10251 return SDValue();
10252
10253 // If the operation feeding into the MUL is a sign extend (sext),
10254 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10255 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10256
10257 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10258 // or if it is a vector type then we could transform to an acceptable type and
10259 // rely on legalization to split/combine the result.
10260 if (NarrowVT.isVector()) {
10261 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10262 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10263 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10264 return SDValue();
10265 } else {
10266 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10267 return SDValue();
10268 }
10269
10270 SDValue Result =
10271 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10272 bool IsSigned = N->getOpcode() == ISD::SRA;
10273 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10274}
10275
10276// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10277// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10279 unsigned Opcode = N->getOpcode();
10280 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10281 return SDValue();
10282
10283 SDValue N0 = N->getOperand(0);
10284 EVT VT = N->getValueType(0);
10285 SDLoc DL(N);
10286 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10287 SDValue OldLHS = N0.getOperand(0);
10288 SDValue OldRHS = N0.getOperand(1);
10289
10290 // If both operands are bswap/bitreverse, ignore the multiuse
10291 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10292 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10293 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10294 OldRHS.getOperand(0));
10295 }
10296
10297 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10298 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10299 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10300 NewBitReorder);
10301 }
10302
10303 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10304 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10305 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10306 OldRHS.getOperand(0));
10307 }
10308 }
10309 return SDValue();
10310}
10311
10312SDValue DAGCombiner::visitSRA(SDNode *N) {
10313 SDValue N0 = N->getOperand(0);
10314 SDValue N1 = N->getOperand(1);
10315 if (SDValue V = DAG.simplifyShift(N0, N1))
10316 return V;
10317
10318 SDLoc DL(N);
10319 EVT VT = N0.getValueType();
10320 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10321
10322 // fold (sra c1, c2) -> (sra c1, c2)
10323 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10324 return C;
10325
10326 // Arithmetic shifting an all-sign-bit value is a no-op.
10327 // fold (sra 0, x) -> 0
10328 // fold (sra -1, x) -> -1
10329 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10330 return N0;
10331
10332 // fold vector ops
10333 if (VT.isVector())
10334 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10335 return FoldedVOp;
10336
10337 if (SDValue NewSel = foldBinOpIntoSelect(N))
10338 return NewSel;
10339
10341
10342 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10343 // clamp (add c1, c2) to max shift.
10344 if (N0.getOpcode() == ISD::SRA) {
10345 EVT ShiftVT = N1.getValueType();
10346 EVT ShiftSVT = ShiftVT.getScalarType();
10347 SmallVector<SDValue, 16> ShiftValues;
10348
10349 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10350 APInt c1 = LHS->getAPIntValue();
10351 APInt c2 = RHS->getAPIntValue();
10352 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10353 APInt Sum = c1 + c2;
10354 unsigned ShiftSum =
10355 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10356 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10357 return true;
10358 };
10359 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10360 SDValue ShiftValue;
10361 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10362 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10363 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10364 assert(ShiftValues.size() == 1 &&
10365 "Expected matchBinaryPredicate to return one element for "
10366 "SPLAT_VECTORs");
10367 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10368 } else
10369 ShiftValue = ShiftValues[0];
10370 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10371 }
10372 }
10373
10374 // fold (sra (shl X, m), (sub result_size, n))
10375 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10376 // result_size - n != m.
10377 // If truncate is free for the target sext(shl) is likely to result in better
10378 // code.
10379 if (N0.getOpcode() == ISD::SHL && N1C) {
10380 // Get the two constants of the shifts, CN0 = m, CN = n.
10381 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10382 if (N01C) {
10383 LLVMContext &Ctx = *DAG.getContext();
10384 // Determine what the truncate's result bitsize and type would be.
10385 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10386
10387 if (VT.isVector())
10388 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10389
10390 // Determine the residual right-shift amount.
10391 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10392
10393 // If the shift is not a no-op (in which case this should be just a sign
10394 // extend already), the truncated to type is legal, sign_extend is legal
10395 // on that type, and the truncate to that type is both legal and free,
10396 // perform the transform.
10397 if ((ShiftAmt > 0) &&
10400 TLI.isTruncateFree(VT, TruncVT)) {
10401 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10402 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10403 N0.getOperand(0), Amt);
10404 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10405 Shift);
10406 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10407 N->getValueType(0), Trunc);
10408 }
10409 }
10410 }
10411
10412 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10413 // sra (add (shl X, N1C), AddC), N1C -->
10414 // sext (add (trunc X to (width - N1C)), AddC')
10415 // sra (sub AddC, (shl X, N1C)), N1C -->
10416 // sext (sub AddC1',(trunc X to (width - N1C)))
10417 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10418 N0.hasOneUse()) {
10419 bool IsAdd = N0.getOpcode() == ISD::ADD;
10420 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10421 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10422 Shl.hasOneUse()) {
10423 // TODO: AddC does not need to be a splat.
10424 if (ConstantSDNode *AddC =
10425 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10426 // Determine what the truncate's type would be and ask the target if
10427 // that is a free operation.
10428 LLVMContext &Ctx = *DAG.getContext();
10429 unsigned ShiftAmt = N1C->getZExtValue();
10430 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10431 if (VT.isVector())
10432 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10433
10434 // TODO: The simple type check probably belongs in the default hook
10435 // implementation and/or target-specific overrides (because
10436 // non-simple types likely require masking when legalized), but
10437 // that restriction may conflict with other transforms.
10438 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10439 TLI.isTruncateFree(VT, TruncVT)) {
10440 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10441 SDValue ShiftC =
10442 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10443 TruncVT.getScalarSizeInBits()),
10444 DL, TruncVT);
10445 SDValue Add;
10446 if (IsAdd)
10447 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10448 else
10449 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10450 return DAG.getSExtOrTrunc(Add, DL, VT);
10451 }
10452 }
10453 }
10454 }
10455
10456 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10457 if (N1.getOpcode() == ISD::TRUNCATE &&
10458 N1.getOperand(0).getOpcode() == ISD::AND) {
10459 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10460 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10461 }
10462
10463 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10464 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10465 // if c1 is equal to the number of bits the trunc removes
10466 // TODO - support non-uniform vector shift amounts.
10467 if (N0.getOpcode() == ISD::TRUNCATE &&
10468 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10469 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10470 N0.getOperand(0).hasOneUse() &&
10471 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10472 SDValue N0Op0 = N0.getOperand(0);
10473 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10474 EVT LargeVT = N0Op0.getValueType();
10475 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10476 if (LargeShift->getAPIntValue() == TruncBits) {
10477 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10478 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10479 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10480 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10481 SDValue SRA =
10482 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10483 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10484 }
10485 }
10486 }
10487
10488 // Simplify, based on bits shifted out of the LHS.
10490 return SDValue(N, 0);
10491
10492 // If the sign bit is known to be zero, switch this to a SRL.
10493 if (DAG.SignBitIsZero(N0))
10494 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10495
10496 if (N1C && !N1C->isOpaque())
10497 if (SDValue NewSRA = visitShiftByConstant(N))
10498 return NewSRA;
10499
10500 // Try to transform this shift into a multiply-high if
10501 // it matches the appropriate pattern detected in combineShiftToMULH.
10502 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10503 return MULH;
10504
10505 // Attempt to convert a sra of a load into a narrower sign-extending load.
10506 if (SDValue NarrowLoad = reduceLoadWidth(N))
10507 return NarrowLoad;
10508
10509 return SDValue();
10510}
10511
10512SDValue DAGCombiner::visitSRL(SDNode *N) {
10513 SDValue N0 = N->getOperand(0);
10514 SDValue N1 = N->getOperand(1);
10515 if (SDValue V = DAG.simplifyShift(N0, N1))
10516 return V;
10517
10518 SDLoc DL(N);
10519 EVT VT = N0.getValueType();
10520 EVT ShiftVT = N1.getValueType();
10521 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10522
10523 // fold (srl c1, c2) -> c1 >>u c2
10524 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10525 return C;
10526
10527 // fold vector ops
10528 if (VT.isVector())
10529 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10530 return FoldedVOp;
10531
10532 if (SDValue NewSel = foldBinOpIntoSelect(N))
10533 return NewSel;
10534
10535 // if (srl x, c) is known to be zero, return 0
10537 if (N1C &&
10538 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10539 return DAG.getConstant(0, DL, VT);
10540
10541 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10542 if (N0.getOpcode() == ISD::SRL) {
10543 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10545 APInt c1 = LHS->getAPIntValue();
10546 APInt c2 = RHS->getAPIntValue();
10547 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10548 return (c1 + c2).uge(OpSizeInBits);
10549 };
10550 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10551 return DAG.getConstant(0, DL, VT);
10552
10553 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10555 APInt c1 = LHS->getAPIntValue();
10556 APInt c2 = RHS->getAPIntValue();
10557 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10558 return (c1 + c2).ult(OpSizeInBits);
10559 };
10560 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10561 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10562 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10563 }
10564 }
10565
10566 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10567 N0.getOperand(0).getOpcode() == ISD::SRL) {
10568 SDValue InnerShift = N0.getOperand(0);
10569 // TODO - support non-uniform vector shift amounts.
10570 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10571 uint64_t c1 = N001C->getZExtValue();
10572 uint64_t c2 = N1C->getZExtValue();
10573 EVT InnerShiftVT = InnerShift.getValueType();
10574 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10575 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10576 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10577 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10578 if (c1 + OpSizeInBits == InnerShiftSize) {
10579 if (c1 + c2 >= InnerShiftSize)
10580 return DAG.getConstant(0, DL, VT);
10581 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10582 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10583 InnerShift.getOperand(0), NewShiftAmt);
10584 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10585 }
10586 // In the more general case, we can clear the high bits after the shift:
10587 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10588 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10589 c1 + c2 < InnerShiftSize) {
10590 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10591 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10592 InnerShift.getOperand(0), NewShiftAmt);
10593 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10594 OpSizeInBits - c2),
10595 DL, InnerShiftVT);
10596 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10597 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10598 }
10599 }
10600 }
10601
10602 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10603 // (and (srl x, (sub c2, c1), MASK)
10604 if (N0.getOpcode() == ISD::SHL &&
10605 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10607 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10609 const APInt &LHSC = LHS->getAPIntValue();
10610 const APInt &RHSC = RHS->getAPIntValue();
10611 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10612 LHSC.getZExtValue() <= RHSC.getZExtValue();
10613 };
10614 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10615 /*AllowUndefs*/ false,
10616 /*AllowTypeMismatch*/ true)) {
10617 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10618 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10619 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10620 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10621 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10622 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10623 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10624 }
10625 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10626 /*AllowUndefs*/ false,
10627 /*AllowTypeMismatch*/ true)) {
10628 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10629 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10630 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10631 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10632 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10633 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10634 }
10635 }
10636
10637 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10638 // TODO - support non-uniform vector shift amounts.
10639 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10640 // Shifting in all undef bits?
10641 EVT SmallVT = N0.getOperand(0).getValueType();
10642 unsigned BitSize = SmallVT.getScalarSizeInBits();
10643 if (N1C->getAPIntValue().uge(BitSize))
10644 return DAG.getUNDEF(VT);
10645
10646 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10647 uint64_t ShiftAmt = N1C->getZExtValue();
10648 SDLoc DL0(N0);
10649 SDValue SmallShift =
10650 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
10651 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
10652 AddToWorklist(SmallShift.getNode());
10653 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10654 return DAG.getNode(ISD::AND, DL, VT,
10655 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10656 DAG.getConstant(Mask, DL, VT));
10657 }
10658 }
10659
10660 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10661 // bit, which is unmodified by sra.
10662 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10663 if (N0.getOpcode() == ISD::SRA)
10664 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10665 }
10666
10667 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10668 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10669 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10670 isPowerOf2_32(OpSizeInBits) &&
10671 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10672 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10673
10674 // If any of the input bits are KnownOne, then the input couldn't be all
10675 // zeros, thus the result of the srl will always be zero.
10676 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10677
10678 // If all of the bits input the to ctlz node are known to be zero, then
10679 // the result of the ctlz is "32" and the result of the shift is one.
10680 APInt UnknownBits = ~Known.Zero;
10681 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10682
10683 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10684 if (UnknownBits.isPowerOf2()) {
10685 // Okay, we know that only that the single bit specified by UnknownBits
10686 // could be set on input to the CTLZ node. If this bit is set, the SRL
10687 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10688 // to an SRL/XOR pair, which is likely to simplify more.
10689 unsigned ShAmt = UnknownBits.countr_zero();
10690 SDValue Op = N0.getOperand(0);
10691
10692 if (ShAmt) {
10693 SDLoc DL(N0);
10694 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10695 DAG.getShiftAmountConstant(ShAmt, VT, DL));
10696 AddToWorklist(Op.getNode());
10697 }
10698 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10699 }
10700 }
10701
10702 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10703 if (N1.getOpcode() == ISD::TRUNCATE &&
10704 N1.getOperand(0).getOpcode() == ISD::AND) {
10705 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10706 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10707 }
10708
10709 // fold operands of srl based on knowledge that the low bits are not
10710 // demanded.
10712 return SDValue(N, 0);
10713
10714 if (N1C && !N1C->isOpaque())
10715 if (SDValue NewSRL = visitShiftByConstant(N))
10716 return NewSRL;
10717
10718 // Attempt to convert a srl of a load into a narrower zero-extending load.
10719 if (SDValue NarrowLoad = reduceLoadWidth(N))
10720 return NarrowLoad;
10721
10722 // Here is a common situation. We want to optimize:
10723 //
10724 // %a = ...
10725 // %b = and i32 %a, 2
10726 // %c = srl i32 %b, 1
10727 // brcond i32 %c ...
10728 //
10729 // into
10730 //
10731 // %a = ...
10732 // %b = and %a, 2
10733 // %c = setcc eq %b, 0
10734 // brcond %c ...
10735 //
10736 // However when after the source operand of SRL is optimized into AND, the SRL
10737 // itself may not be optimized further. Look for it and add the BRCOND into
10738 // the worklist.
10739 //
10740 // The also tends to happen for binary operations when SimplifyDemandedBits
10741 // is involved.
10742 //
10743 // FIXME: This is unecessary if we process the DAG in topological order,
10744 // which we plan to do. This workaround can be removed once the DAG is
10745 // processed in topological order.
10746 if (N->hasOneUse()) {
10747 SDNode *Use = *N->use_begin();
10748
10749 // Look pass the truncate.
10750 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10751 Use = *Use->use_begin();
10752
10753 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10754 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10755 AddToWorklist(Use);
10756 }
10757
10758 // Try to transform this shift into a multiply-high if
10759 // it matches the appropriate pattern detected in combineShiftToMULH.
10760 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10761 return MULH;
10762
10763 return SDValue();
10764}
10765
10766SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10767 EVT VT = N->getValueType(0);
10768 SDValue N0 = N->getOperand(0);
10769 SDValue N1 = N->getOperand(1);
10770 SDValue N2 = N->getOperand(2);
10771 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10772 unsigned BitWidth = VT.getScalarSizeInBits();
10773 SDLoc DL(N);
10774
10775 // fold (fshl N0, N1, 0) -> N0
10776 // fold (fshr N0, N1, 0) -> N1
10778 if (DAG.MaskedValueIsZero(
10779 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10780 return IsFSHL ? N0 : N1;
10781
10782 auto IsUndefOrZero = [](SDValue V) {
10783 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10784 };
10785
10786 // TODO - support non-uniform vector shift amounts.
10787 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10788 EVT ShAmtTy = N2.getValueType();
10789
10790 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10791 if (Cst->getAPIntValue().uge(BitWidth)) {
10792 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10793 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10794 DAG.getConstant(RotAmt, DL, ShAmtTy));
10795 }
10796
10797 unsigned ShAmt = Cst->getZExtValue();
10798 if (ShAmt == 0)
10799 return IsFSHL ? N0 : N1;
10800
10801 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10802 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10803 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10804 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10805 if (IsUndefOrZero(N0))
10806 return DAG.getNode(
10807 ISD::SRL, DL, VT, N1,
10808 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10809 if (IsUndefOrZero(N1))
10810 return DAG.getNode(
10811 ISD::SHL, DL, VT, N0,
10812 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10813
10814 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10815 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10816 // TODO - bigendian support once we have test coverage.
10817 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10818 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10819 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10820 !DAG.getDataLayout().isBigEndian()) {
10821 auto *LHS = dyn_cast<LoadSDNode>(N0);
10822 auto *RHS = dyn_cast<LoadSDNode>(N1);
10823 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10824 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10825 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10826 ISD::isNON_EXTLoad(LHS)) {
10827 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10828 SDLoc DL(RHS);
10829 uint64_t PtrOff =
10830 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10831 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10832 unsigned Fast = 0;
10833 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10834 RHS->getAddressSpace(), NewAlign,
10835 RHS->getMemOperand()->getFlags(), &Fast) &&
10836 Fast) {
10837 SDValue NewPtr = DAG.getMemBasePlusOffset(
10838 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10839 AddToWorklist(NewPtr.getNode());
10840 SDValue Load = DAG.getLoad(
10841 VT, DL, RHS->getChain(), NewPtr,
10842 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10843 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10844 // Replace the old load's chain with the new load's chain.
10845 WorklistRemover DeadNodes(*this);
10846 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10847 return Load;
10848 }
10849 }
10850 }
10851 }
10852 }
10853
10854 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10855 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10856 // iff We know the shift amount is in range.
10857 // TODO: when is it worth doing SUB(BW, N2) as well?
10858 if (isPowerOf2_32(BitWidth)) {
10859 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10860 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10861 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
10862 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10863 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
10864 }
10865
10866 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10867 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10868 // TODO: Investigate flipping this rotate if only one is legal.
10869 // If funnel shift is legal as well we might be better off avoiding
10870 // non-constant (BW - N2).
10871 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10872 if (N0 == N1 && hasOperation(RotOpc, VT))
10873 return DAG.getNode(RotOpc, DL, VT, N0, N2);
10874
10875 // Simplify, based on bits shifted out of N0/N1.
10877 return SDValue(N, 0);
10878
10879 return SDValue();
10880}
10881
10882SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10883 SDValue N0 = N->getOperand(0);
10884 SDValue N1 = N->getOperand(1);
10885 if (SDValue V = DAG.simplifyShift(N0, N1))
10886 return V;
10887
10888 SDLoc DL(N);
10889 EVT VT = N0.getValueType();
10890
10891 // fold (*shlsat c1, c2) -> c1<<c2
10892 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
10893 return C;
10894
10896
10897 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10898 // fold (sshlsat x, c) -> (shl x, c)
10899 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10900 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10901 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10902
10903 // fold (ushlsat x, c) -> (shl x, c)
10904 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10905 N1C->getAPIntValue().ule(
10907 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10908 }
10909
10910 return SDValue();
10911}
10912
10913// Given a ABS node, detect the following patterns:
10914// (ABS (SUB (EXTEND a), (EXTEND b))).
10915// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10916// Generates UABD/SABD instruction.
10917SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10918 EVT SrcVT = N->getValueType(0);
10919
10920 if (N->getOpcode() == ISD::TRUNCATE)
10921 N = N->getOperand(0).getNode();
10922
10923 if (N->getOpcode() != ISD::ABS)
10924 return SDValue();
10925
10926 EVT VT = N->getValueType(0);
10927 SDValue AbsOp1 = N->getOperand(0);
10928 SDValue Op0, Op1;
10929
10930 if (AbsOp1.getOpcode() != ISD::SUB)
10931 return SDValue();
10932
10933 Op0 = AbsOp1.getOperand(0);
10934 Op1 = AbsOp1.getOperand(1);
10935
10936 unsigned Opc0 = Op0.getOpcode();
10937
10938 // Check if the operands of the sub are (zero|sign)-extended.
10939 // TODO: Should we use ValueTracking instead?
10940 if (Opc0 != Op1.getOpcode() ||
10941 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10942 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10943 // fold (abs (sub nsw x, y)) -> abds(x, y)
10944 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10945 TLI.preferABDSToABSWithNSW(VT)) {
10946 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10947 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10948 }
10949 return SDValue();
10950 }
10951
10952 EVT VT0, VT1;
10953 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10954 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10955 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10956 } else {
10957 VT0 = Op0.getOperand(0).getValueType();
10958 VT1 = Op1.getOperand(0).getValueType();
10959 }
10960 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10961
10962 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10963 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10964 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10965 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10966 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10967 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10968 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10969 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10970 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10971 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10972 }
10973
10974 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10975 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10976 if (hasOperation(ABDOpcode, VT)) {
10977 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10978 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10979 }
10980
10981 return SDValue();
10982}
10983
10984SDValue DAGCombiner::visitABS(SDNode *N) {
10985 SDValue N0 = N->getOperand(0);
10986 EVT VT = N->getValueType(0);
10987 SDLoc DL(N);
10988
10989 // fold (abs c1) -> c2
10990 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10991 return C;
10992 // fold (abs (abs x)) -> (abs x)
10993 if (N0.getOpcode() == ISD::ABS)
10994 return N0;
10995 // fold (abs x) -> x iff not-negative
10996 if (DAG.SignBitIsZero(N0))
10997 return N0;
10998
10999 if (SDValue ABD = foldABSToABD(N, DL))
11000 return ABD;
11001
11002 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11003 // iff zero_extend/truncate are free.
11004 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11005 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11006 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11007 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11008 hasOperation(ISD::ABS, ExtVT)) {
11009 return DAG.getNode(
11010 ISD::ZERO_EXTEND, DL, VT,
11011 DAG.getNode(ISD::ABS, DL, ExtVT,
11012 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11013 }
11014 }
11015
11016 return SDValue();
11017}
11018
11019SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11020 SDValue N0 = N->getOperand(0);
11021 EVT VT = N->getValueType(0);
11022 SDLoc DL(N);
11023
11024 // fold (bswap c1) -> c2
11025 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11026 return C;
11027 // fold (bswap (bswap x)) -> x
11028 if (N0.getOpcode() == ISD::BSWAP)
11029 return N0.getOperand(0);
11030
11031 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11032 // isn't supported, it will be expanded to bswap followed by a manual reversal
11033 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11034 // the two bswaps if the bitreverse gets expanded.
11035 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11036 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11037 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11038 }
11039
11040 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11041 // iff x >= bw/2 (i.e. lower half is known zero)
11042 unsigned BW = VT.getScalarSizeInBits();
11043 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11044 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11045 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11046 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11047 ShAmt->getZExtValue() >= (BW / 2) &&
11048 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11049 TLI.isTruncateFree(VT, HalfVT) &&
11050 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11051 SDValue Res = N0.getOperand(0);
11052 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11053 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11054 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11055 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11056 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11057 return DAG.getZExtOrTrunc(Res, DL, VT);
11058 }
11059 }
11060
11061 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11062 // inverse-shift-of-bswap:
11063 // bswap (X u<< C) --> (bswap X) u>> C
11064 // bswap (X u>> C) --> (bswap X) u<< C
11065 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11066 N0.hasOneUse()) {
11067 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11068 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11069 ShAmt->getZExtValue() % 8 == 0) {
11070 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11071 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11072 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11073 }
11074 }
11075
11076 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11077 return V;
11078
11079 return SDValue();
11080}
11081
11082SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11083 SDValue N0 = N->getOperand(0);
11084 EVT VT = N->getValueType(0);
11085 SDLoc DL(N);
11086
11087 // fold (bitreverse c1) -> c2
11088 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11089 return C;
11090
11091 // fold (bitreverse (bitreverse x)) -> x
11092 if (N0.getOpcode() == ISD::BITREVERSE)
11093 return N0.getOperand(0);
11094
11095 SDValue X, Y;
11096
11097 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11098 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11100 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11101
11102 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11103 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11105 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11106
11107 return SDValue();
11108}
11109
11110SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11111 SDValue N0 = N->getOperand(0);
11112 EVT VT = N->getValueType(0);
11113 SDLoc DL(N);
11114
11115 // fold (ctlz c1) -> c2
11116 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11117 return C;
11118
11119 // If the value is known never to be zero, switch to the undef version.
11120 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11121 if (DAG.isKnownNeverZero(N0))
11122 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11123
11124 return SDValue();
11125}
11126
11127SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11128 SDValue N0 = N->getOperand(0);
11129 EVT VT = N->getValueType(0);
11130 SDLoc DL(N);
11131
11132 // fold (ctlz_zero_undef c1) -> c2
11133 if (SDValue C =
11135 return C;
11136 return SDValue();
11137}
11138
11139SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11140 SDValue N0 = N->getOperand(0);
11141 EVT VT = N->getValueType(0);
11142 SDLoc DL(N);
11143
11144 // fold (cttz c1) -> c2
11145 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11146 return C;
11147
11148 // If the value is known never to be zero, switch to the undef version.
11149 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11150 if (DAG.isKnownNeverZero(N0))
11151 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11152
11153 return SDValue();
11154}
11155
11156SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11157 SDValue N0 = N->getOperand(0);
11158 EVT VT = N->getValueType(0);
11159 SDLoc DL(N);
11160
11161 // fold (cttz_zero_undef c1) -> c2
11162 if (SDValue C =
11164 return C;
11165 return SDValue();
11166}
11167
11168SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11169 SDValue N0 = N->getOperand(0);
11170 EVT VT = N->getValueType(0);
11171 unsigned NumBits = VT.getScalarSizeInBits();
11172 SDLoc DL(N);
11173
11174 // fold (ctpop c1) -> c2
11175 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11176 return C;
11177
11178 // If the source is being shifted, but doesn't affect any active bits,
11179 // then we can call CTPOP on the shift source directly.
11180 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11181 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11182 const APInt &Amt = AmtC->getAPIntValue();
11183 if (Amt.ult(NumBits)) {
11184 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11185 if ((N0.getOpcode() == ISD::SRL &&
11186 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11187 (N0.getOpcode() == ISD::SHL &&
11188 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11189 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11190 }
11191 }
11192 }
11193 }
11194
11195 // If the upper bits are known to be zero, then see if its profitable to
11196 // only count the lower bits.
11197 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11198 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11199 if (hasOperation(ISD::CTPOP, HalfVT) &&
11200 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11201 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11202 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11203 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11204 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11205 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11206 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11207 }
11208 }
11209 }
11210
11211 return SDValue();
11212}
11213
11215 SDValue RHS, const SDNodeFlags Flags,
11216 const TargetLowering &TLI) {
11217 EVT VT = LHS.getValueType();
11218 if (!VT.isFloatingPoint())
11219 return false;
11220
11221 const TargetOptions &Options = DAG.getTarget().Options;
11222
11223 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11225 (Flags.hasNoNaNs() ||
11226 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11227}
11228
11230 SDValue RHS, SDValue True, SDValue False,
11232 const TargetLowering &TLI,
11233 SelectionDAG &DAG) {
11234 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11235 switch (CC) {
11236 case ISD::SETOLT:
11237 case ISD::SETOLE:
11238 case ISD::SETLT:
11239 case ISD::SETLE:
11240 case ISD::SETULT:
11241 case ISD::SETULE: {
11242 // Since it's known never nan to get here already, either fminnum or
11243 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11244 // expanded in terms of it.
11245 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11246 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11247 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11248
11249 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11250 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11251 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11252 return SDValue();
11253 }
11254 case ISD::SETOGT:
11255 case ISD::SETOGE:
11256 case ISD::SETGT:
11257 case ISD::SETGE:
11258 case ISD::SETUGT:
11259 case ISD::SETUGE: {
11260 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11261 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11262 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11263
11264 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11265 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11266 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11267 return SDValue();
11268 }
11269 default:
11270 return SDValue();
11271 }
11272}
11273
11274/// Generate Min/Max node
11275SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11276 SDValue RHS, SDValue True,
11277 SDValue False, ISD::CondCode CC) {
11278 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11279 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11280
11281 // If we can't directly match this, try to see if we can pull an fneg out of
11282 // the select.
11284 True, DAG, LegalOperations, ForCodeSize);
11285 if (!NegTrue)
11286 return SDValue();
11287
11288 HandleSDNode NegTrueHandle(NegTrue);
11289
11290 // Try to unfold an fneg from the select if we are comparing the negated
11291 // constant.
11292 //
11293 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11294 //
11295 // TODO: Handle fabs
11296 if (LHS == NegTrue) {
11297 // If we can't directly match this, try to see if we can pull an fneg out of
11298 // the select.
11300 RHS, DAG, LegalOperations, ForCodeSize);
11301 if (NegRHS) {
11302 HandleSDNode NegRHSHandle(NegRHS);
11303 if (NegRHS == False) {
11304 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11305 False, CC, TLI, DAG);
11306 if (Combined)
11307 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11308 }
11309 }
11310 }
11311
11312 return SDValue();
11313}
11314
11315/// If a (v)select has a condition value that is a sign-bit test, try to smear
11316/// the condition operand sign-bit across the value width and use it as a mask.
11318 SelectionDAG &DAG) {
11319 SDValue Cond = N->getOperand(0);
11320 SDValue C1 = N->getOperand(1);
11321 SDValue C2 = N->getOperand(2);
11323 return SDValue();
11324
11325 EVT VT = N->getValueType(0);
11326 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11327 VT != Cond.getOperand(0).getValueType())
11328 return SDValue();
11329
11330 // The inverted-condition + commuted-select variants of these patterns are
11331 // canonicalized to these forms in IR.
11332 SDValue X = Cond.getOperand(0);
11333 SDValue CondC = Cond.getOperand(1);
11334 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11335 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11337 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11338 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11339 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11340 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11341 }
11342 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11343 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11344 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11345 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11346 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11347 }
11348 return SDValue();
11349}
11350
11352 const TargetLowering &TLI) {
11353 if (!TLI.convertSelectOfConstantsToMath(VT))
11354 return false;
11355
11356 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11357 return true;
11359 return true;
11360
11361 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11362 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11363 return true;
11364 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11365 return true;
11366
11367 return false;
11368}
11369
11370SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11371 SDValue Cond = N->getOperand(0);
11372 SDValue N1 = N->getOperand(1);
11373 SDValue N2 = N->getOperand(2);
11374 EVT VT = N->getValueType(0);
11375 EVT CondVT = Cond.getValueType();
11376 SDLoc DL(N);
11377
11378 if (!VT.isInteger())
11379 return SDValue();
11380
11381 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11382 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11383 if (!C1 || !C2)
11384 return SDValue();
11385
11386 if (CondVT != MVT::i1 || LegalOperations) {
11387 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11388 // We can't do this reliably if integer based booleans have different contents
11389 // to floating point based booleans. This is because we can't tell whether we
11390 // have an integer-based boolean or a floating-point-based boolean unless we
11391 // can find the SETCC that produced it and inspect its operands. This is
11392 // fairly easy if C is the SETCC node, but it can potentially be
11393 // undiscoverable (or not reasonably discoverable). For example, it could be
11394 // in another basic block or it could require searching a complicated
11395 // expression.
11396 if (CondVT.isInteger() &&
11397 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11399 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11401 C1->isZero() && C2->isOne()) {
11402 SDValue NotCond =
11403 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11404 if (VT.bitsEq(CondVT))
11405 return NotCond;
11406 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11407 }
11408
11409 return SDValue();
11410 }
11411
11412 // Only do this before legalization to avoid conflicting with target-specific
11413 // transforms in the other direction (create a select from a zext/sext). There
11414 // is also a target-independent combine here in DAGCombiner in the other
11415 // direction for (select Cond, -1, 0) when the condition is not i1.
11416 assert(CondVT == MVT::i1 && !LegalOperations);
11417
11418 // select Cond, 1, 0 --> zext (Cond)
11419 if (C1->isOne() && C2->isZero())
11420 return DAG.getZExtOrTrunc(Cond, DL, VT);
11421
11422 // select Cond, -1, 0 --> sext (Cond)
11423 if (C1->isAllOnes() && C2->isZero())
11424 return DAG.getSExtOrTrunc(Cond, DL, VT);
11425
11426 // select Cond, 0, 1 --> zext (!Cond)
11427 if (C1->isZero() && C2->isOne()) {
11428 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11429 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11430 return NotCond;
11431 }
11432
11433 // select Cond, 0, -1 --> sext (!Cond)
11434 if (C1->isZero() && C2->isAllOnes()) {
11435 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11436 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11437 return NotCond;
11438 }
11439
11440 // Use a target hook because some targets may prefer to transform in the
11441 // other direction.
11443 return SDValue();
11444
11445 // For any constants that differ by 1, we can transform the select into
11446 // an extend and add.
11447 const APInt &C1Val = C1->getAPIntValue();
11448 const APInt &C2Val = C2->getAPIntValue();
11449
11450 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11451 if (C1Val - 1 == C2Val) {
11452 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11453 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11454 }
11455
11456 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11457 if (C1Val + 1 == C2Val) {
11458 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11459 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11460 }
11461
11462 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11463 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11464 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11465 SDValue ShAmtC =
11466 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11467 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11468 }
11469
11470 // select Cond, -1, C --> or (sext Cond), C
11471 if (C1->isAllOnes()) {
11472 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11473 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11474 }
11475
11476 // select Cond, C, -1 --> or (sext (not Cond)), C
11477 if (C2->isAllOnes()) {
11478 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11479 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11480 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11481 }
11482
11484 return V;
11485
11486 return SDValue();
11487}
11488
11489template <class MatchContextClass>
11491 SelectionDAG &DAG) {
11492 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11493 N->getOpcode() == ISD::VP_SELECT) &&
11494 "Expected a (v)(vp.)select");
11495 SDValue Cond = N->getOperand(0);
11496 SDValue T = N->getOperand(1), F = N->getOperand(2);
11497 EVT VT = N->getValueType(0);
11498 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11499 MatchContextClass matcher(DAG, TLI, N);
11500
11501 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11502 return SDValue();
11503
11504 // select Cond, Cond, F --> or Cond, freeze(F)
11505 // select Cond, 1, F --> or Cond, freeze(F)
11506 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11507 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
11508
11509 // select Cond, T, Cond --> and Cond, freeze(T)
11510 // select Cond, T, 0 --> and Cond, freeze(T)
11511 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11512 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
11513
11514 // select Cond, T, 1 --> or (not Cond), freeze(T)
11515 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11516 SDValue NotCond =
11517 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11518 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
11519 }
11520
11521 // select Cond, 0, F --> and (not Cond), freeze(F)
11522 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11523 SDValue NotCond =
11524 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11525 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
11526 }
11527
11528 return SDValue();
11529}
11530
11532 SDValue N0 = N->getOperand(0);
11533 SDValue N1 = N->getOperand(1);
11534 SDValue N2 = N->getOperand(2);
11535 EVT VT = N->getValueType(0);
11536
11537 SDValue Cond0, Cond1;
11539 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
11540 m_CondCode(CC)))) ||
11541 VT != Cond0.getValueType())
11542 return SDValue();
11543
11544 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11545 // compare is inverted from that pattern ("Cond0 s> -1").
11546 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11547 ; // This is the pattern we are looking for.
11548 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11549 std::swap(N1, N2);
11550 else
11551 return SDValue();
11552
11553 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11554 if (isNullOrNullSplat(N2)) {
11555 SDLoc DL(N);
11556 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11557 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11558 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
11559 }
11560
11561 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11562 if (isAllOnesOrAllOnesSplat(N1)) {
11563 SDLoc DL(N);
11564 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11565 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11566 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
11567 }
11568
11569 // If we have to invert the sign bit mask, only do that transform if the
11570 // target has a bitwise 'and not' instruction (the invert is free).
11571 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11572 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11573 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11574 SDLoc DL(N);
11575 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11576 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11577 SDValue Not = DAG.getNOT(DL, Sra, VT);
11578 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
11579 }
11580
11581 // TODO: There's another pattern in this family, but it may require
11582 // implementing hasOrNot() to check for profitability:
11583 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11584
11585 return SDValue();
11586}
11587
11588SDValue DAGCombiner::visitSELECT(SDNode *N) {
11589 SDValue N0 = N->getOperand(0);
11590 SDValue N1 = N->getOperand(1);
11591 SDValue N2 = N->getOperand(2);
11592 EVT VT = N->getValueType(0);
11593 EVT VT0 = N0.getValueType();
11594 SDLoc DL(N);
11595 SDNodeFlags Flags = N->getFlags();
11596
11597 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11598 return V;
11599
11600 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
11601 return V;
11602
11603 // select (not Cond), N1, N2 -> select Cond, N2, N1
11604 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11605 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11606 SelectOp->setFlags(Flags);
11607 return SelectOp;
11608 }
11609
11610 if (SDValue V = foldSelectOfConstants(N))
11611 return V;
11612
11613 // If we can fold this based on the true/false value, do so.
11614 if (SimplifySelectOps(N, N1, N2))
11615 return SDValue(N, 0); // Don't revisit N.
11616
11617 if (VT0 == MVT::i1) {
11618 // The code in this block deals with the following 2 equivalences:
11619 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11620 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11621 // The target can specify its preferred form with the
11622 // shouldNormalizeToSelectSequence() callback. However we always transform
11623 // to the right anyway if we find the inner select exists in the DAG anyway
11624 // and we always transform to the left side if we know that we can further
11625 // optimize the combination of the conditions.
11626 bool normalizeToSequence =
11628 // select (and Cond0, Cond1), X, Y
11629 // -> select Cond0, (select Cond1, X, Y), Y
11630 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11631 SDValue Cond0 = N0->getOperand(0);
11632 SDValue Cond1 = N0->getOperand(1);
11633 SDValue InnerSelect =
11634 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11635 if (normalizeToSequence || !InnerSelect.use_empty())
11636 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11637 InnerSelect, N2, Flags);
11638 // Cleanup on failure.
11639 if (InnerSelect.use_empty())
11640 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11641 }
11642 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11643 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11644 SDValue Cond0 = N0->getOperand(0);
11645 SDValue Cond1 = N0->getOperand(1);
11646 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11647 Cond1, N1, N2, Flags);
11648 if (normalizeToSequence || !InnerSelect.use_empty())
11649 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11650 InnerSelect, Flags);
11651 // Cleanup on failure.
11652 if (InnerSelect.use_empty())
11653 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11654 }
11655
11656 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11657 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11658 SDValue N1_0 = N1->getOperand(0);
11659 SDValue N1_1 = N1->getOperand(1);
11660 SDValue N1_2 = N1->getOperand(2);
11661 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11662 // Create the actual and node if we can generate good code for it.
11663 if (!normalizeToSequence) {
11664 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11665 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11666 N2, Flags);
11667 }
11668 // Otherwise see if we can optimize the "and" to a better pattern.
11669 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11670 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11671 N2, Flags);
11672 }
11673 }
11674 }
11675 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11676 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11677 SDValue N2_0 = N2->getOperand(0);
11678 SDValue N2_1 = N2->getOperand(1);
11679 SDValue N2_2 = N2->getOperand(2);
11680 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11681 // Create the actual or node if we can generate good code for it.
11682 if (!normalizeToSequence) {
11683 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11684 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11685 N2_2, Flags);
11686 }
11687 // Otherwise see if we can optimize to a better pattern.
11688 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11689 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11690 N2_2, Flags);
11691 }
11692 }
11693 }
11694
11695 // Fold selects based on a setcc into other things, such as min/max/abs.
11696 if (N0.getOpcode() == ISD::SETCC) {
11697 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11698 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11699
11700 // select (fcmp lt x, y), x, y -> fminnum x, y
11701 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11702 //
11703 // This is OK if we don't care what happens if either operand is a NaN.
11704 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
11705 if (SDValue FMinMax =
11706 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11707 return FMinMax;
11708
11709 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11710 // This is conservatively limited to pre-legal-operations to give targets
11711 // a chance to reverse the transform if they want to do that. Also, it is
11712 // unlikely that the pattern would be formed late, so it's probably not
11713 // worth going through the other checks.
11714 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11715 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11716 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11717 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11718 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11719 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11720 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11721 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11722 //
11723 // The IR equivalent of this transform would have this form:
11724 // %a = add %x, C
11725 // %c = icmp ugt %x, ~C
11726 // %r = select %c, -1, %a
11727 // =>
11728 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11729 // %u0 = extractvalue %u, 0
11730 // %u1 = extractvalue %u, 1
11731 // %r = select %u1, -1, %u0
11732 SDVTList VTs = DAG.getVTList(VT, VT0);
11733 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11734 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11735 }
11736 }
11737
11738 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11739 (!LegalOperations &&
11741 // Any flags available in a select/setcc fold will be on the setcc as they
11742 // migrated from fcmp
11743 Flags = N0->getFlags();
11744 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11745 N2, N0.getOperand(2));
11746 SelectNode->setFlags(Flags);
11747 return SelectNode;
11748 }
11749
11750 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11751 return NewSel;
11752 }
11753
11754 if (!VT.isVector())
11755 if (SDValue BinOp = foldSelectOfBinops(N))
11756 return BinOp;
11757
11758 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11759 return R;
11760
11761 return SDValue();
11762}
11763
11764// This function assumes all the vselect's arguments are CONCAT_VECTOR
11765// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11767 SDLoc DL(N);
11768 SDValue Cond = N->getOperand(0);
11769 SDValue LHS = N->getOperand(1);
11770 SDValue RHS = N->getOperand(2);
11771 EVT VT = N->getValueType(0);
11772 int NumElems = VT.getVectorNumElements();
11773 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11774 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11775 Cond.getOpcode() == ISD::BUILD_VECTOR);
11776
11777 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11778 // binary ones here.
11779 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11780 return SDValue();
11781
11782 // We're sure we have an even number of elements due to the
11783 // concat_vectors we have as arguments to vselect.
11784 // Skip BV elements until we find one that's not an UNDEF
11785 // After we find an UNDEF element, keep looping until we get to half the
11786 // length of the BV and see if all the non-undef nodes are the same.
11787 ConstantSDNode *BottomHalf = nullptr;
11788 for (int i = 0; i < NumElems / 2; ++i) {
11789 if (Cond->getOperand(i)->isUndef())
11790 continue;
11791
11792 if (BottomHalf == nullptr)
11793 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11794 else if (Cond->getOperand(i).getNode() != BottomHalf)
11795 return SDValue();
11796 }
11797
11798 // Do the same for the second half of the BuildVector
11799 ConstantSDNode *TopHalf = nullptr;
11800 for (int i = NumElems / 2; i < NumElems; ++i) {
11801 if (Cond->getOperand(i)->isUndef())
11802 continue;
11803
11804 if (TopHalf == nullptr)
11805 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11806 else if (Cond->getOperand(i).getNode() != TopHalf)
11807 return SDValue();
11808 }
11809
11810 assert(TopHalf && BottomHalf &&
11811 "One half of the selector was all UNDEFs and the other was all the "
11812 "same value. This should have been addressed before this function.");
11813 return DAG.getNode(
11815 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11816 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11817}
11818
11819bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11820 SelectionDAG &DAG, const SDLoc &DL) {
11821
11822 // Only perform the transformation when existing operands can be reused.
11823 if (IndexIsScaled)
11824 return false;
11825
11826 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11827 return false;
11828
11829 EVT VT = BasePtr.getValueType();
11830
11831 if (SDValue SplatVal = DAG.getSplatValue(Index);
11832 SplatVal && !isNullConstant(SplatVal) &&
11833 SplatVal.getValueType() == VT) {
11834 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11835 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11836 return true;
11837 }
11838
11839 if (Index.getOpcode() != ISD::ADD)
11840 return false;
11841
11842 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11843 SplatVal && SplatVal.getValueType() == VT) {
11844 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11845 Index = Index.getOperand(1);
11846 return true;
11847 }
11848 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11849 SplatVal && SplatVal.getValueType() == VT) {
11850 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11851 Index = Index.getOperand(0);
11852 return true;
11853 }
11854 return false;
11855}
11856
11857// Fold sext/zext of index into index type.
11859 SelectionDAG &DAG) {
11860 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11861
11862 // It's always safe to look through zero extends.
11863 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11864 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11865 IndexType = ISD::UNSIGNED_SCALED;
11866 Index = Index.getOperand(0);
11867 return true;
11868 }
11869 if (ISD::isIndexTypeSigned(IndexType)) {
11870 IndexType = ISD::UNSIGNED_SCALED;
11871 return true;
11872 }
11873 }
11874
11875 // It's only safe to look through sign extends when Index is signed.
11876 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11877 ISD::isIndexTypeSigned(IndexType) &&
11878 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11879 Index = Index.getOperand(0);
11880 return true;
11881 }
11882
11883 return false;
11884}
11885
11886SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11887 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11888 SDValue Mask = MSC->getMask();
11889 SDValue Chain = MSC->getChain();
11890 SDValue Index = MSC->getIndex();
11891 SDValue Scale = MSC->getScale();
11892 SDValue StoreVal = MSC->getValue();
11893 SDValue BasePtr = MSC->getBasePtr();
11894 SDValue VL = MSC->getVectorLength();
11895 ISD::MemIndexType IndexType = MSC->getIndexType();
11896 SDLoc DL(N);
11897
11898 // Zap scatters with a zero mask.
11900 return Chain;
11901
11902 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11903 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11904 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11905 DL, Ops, MSC->getMemOperand(), IndexType);
11906 }
11907
11908 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11909 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11910 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11911 DL, Ops, MSC->getMemOperand(), IndexType);
11912 }
11913
11914 return SDValue();
11915}
11916
11917SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11918 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11919 SDValue Mask = MSC->getMask();
11920 SDValue Chain = MSC->getChain();
11921 SDValue Index = MSC->getIndex();
11922 SDValue Scale = MSC->getScale();
11923 SDValue StoreVal = MSC->getValue();
11924 SDValue BasePtr = MSC->getBasePtr();
11925 ISD::MemIndexType IndexType = MSC->getIndexType();
11926 SDLoc DL(N);
11927
11928 // Zap scatters with a zero mask.
11930 return Chain;
11931
11932 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11933 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11934 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11935 DL, Ops, MSC->getMemOperand(), IndexType,
11936 MSC->isTruncatingStore());
11937 }
11938
11939 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11940 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11941 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11942 DL, Ops, MSC->getMemOperand(), IndexType,
11943 MSC->isTruncatingStore());
11944 }
11945
11946 return SDValue();
11947}
11948
11949SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11950 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11951 SDValue Mask = MST->getMask();
11952 SDValue Chain = MST->getChain();
11953 SDValue Value = MST->getValue();
11954 SDValue Ptr = MST->getBasePtr();
11955 SDLoc DL(N);
11956
11957 // Zap masked stores with a zero mask.
11959 return Chain;
11960
11961 // Remove a masked store if base pointers and masks are equal.
11962 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11963 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11964 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11965 !MST->getBasePtr().isUndef() &&
11966 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11967 MST1->getMemoryVT().getStoreSize()) ||
11969 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11970 MST->getMemoryVT().getStoreSize())) {
11971 CombineTo(MST1, MST1->getChain());
11972 if (N->getOpcode() != ISD::DELETED_NODE)
11973 AddToWorklist(N);
11974 return SDValue(N, 0);
11975 }
11976 }
11977
11978 // If this is a masked load with an all ones mask, we can use a unmasked load.
11979 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11980 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11981 !MST->isCompressingStore() && !MST->isTruncatingStore())
11982 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11983 MST->getBasePtr(), MST->getPointerInfo(),
11984 MST->getOriginalAlign(),
11985 MST->getMemOperand()->getFlags(), MST->getAAInfo());
11986
11987 // Try transforming N to an indexed store.
11988 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11989 return SDValue(N, 0);
11990
11991 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11992 Value.getValueType().isInteger() &&
11993 (!isa<ConstantSDNode>(Value) ||
11994 !cast<ConstantSDNode>(Value)->isOpaque())) {
11995 APInt TruncDemandedBits =
11996 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11998
11999 // See if we can simplify the operation with
12000 // SimplifyDemandedBits, which only works if the value has a single use.
12001 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12002 // Re-visit the store if anything changed and the store hasn't been merged
12003 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12004 // node back to the worklist if necessary, but we also need to re-visit
12005 // the Store node itself.
12006 if (N->getOpcode() != ISD::DELETED_NODE)
12007 AddToWorklist(N);
12008 return SDValue(N, 0);
12009 }
12010 }
12011
12012 // If this is a TRUNC followed by a masked store, fold this into a masked
12013 // truncating store. We can do this even if this is already a masked
12014 // truncstore.
12015 // TODO: Try combine to masked compress store if possiable.
12016 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12017 MST->isUnindexed() && !MST->isCompressingStore() &&
12018 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12019 MST->getMemoryVT(), LegalOperations)) {
12020 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12021 Value.getOperand(0).getValueType());
12022 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12023 MST->getOffset(), Mask, MST->getMemoryVT(),
12024 MST->getMemOperand(), MST->getAddressingMode(),
12025 /*IsTruncating=*/true);
12026 }
12027
12028 return SDValue();
12029}
12030
12031SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12032 auto *SST = cast<VPStridedStoreSDNode>(N);
12033 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12034 // Combine strided stores with unit-stride to a regular VP store.
12035 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12036 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12037 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12038 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12039 SST->getVectorLength(), SST->getMemoryVT(),
12040 SST->getMemOperand(), SST->getAddressingMode(),
12041 SST->isTruncatingStore(), SST->isCompressingStore());
12042 }
12043 return SDValue();
12044}
12045
12046SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12047 SDLoc DL(N);
12048 SDValue Vec = N->getOperand(0);
12049 SDValue Mask = N->getOperand(1);
12050 SDValue Passthru = N->getOperand(2);
12051 EVT VecVT = Vec.getValueType();
12052
12053 bool HasPassthru = !Passthru.isUndef();
12054
12055 APInt SplatVal;
12056 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12057 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12058
12059 if (Vec.isUndef() || Mask.isUndef())
12060 return Passthru;
12061
12062 // No need for potentially expensive compress if the mask is constant.
12065 EVT ScalarVT = VecVT.getVectorElementType();
12066 unsigned NumSelected = 0;
12067 unsigned NumElmts = VecVT.getVectorNumElements();
12068 for (unsigned I = 0; I < NumElmts; ++I) {
12069 SDValue MaskI = Mask.getOperand(I);
12070 // We treat undef mask entries as "false".
12071 if (MaskI.isUndef())
12072 continue;
12073
12074 if (TLI.isConstTrueVal(MaskI)) {
12075 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12076 DAG.getVectorIdxConstant(I, DL));
12077 Ops.push_back(VecI);
12078 NumSelected++;
12079 }
12080 }
12081 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12082 SDValue Val =
12083 HasPassthru
12084 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12085 DAG.getVectorIdxConstant(Rest, DL))
12086 : DAG.getUNDEF(ScalarVT);
12087 Ops.push_back(Val);
12088 }
12089 return DAG.getBuildVector(VecVT, DL, Ops);
12090 }
12091
12092 return SDValue();
12093}
12094
12095SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12096 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12097 SDValue Mask = MGT->getMask();
12098 SDValue Chain = MGT->getChain();
12099 SDValue Index = MGT->getIndex();
12100 SDValue Scale = MGT->getScale();
12101 SDValue BasePtr = MGT->getBasePtr();
12102 SDValue VL = MGT->getVectorLength();
12103 ISD::MemIndexType IndexType = MGT->getIndexType();
12104 SDLoc DL(N);
12105
12106 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12107 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12108 return DAG.getGatherVP(
12109 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12110 Ops, MGT->getMemOperand(), IndexType);
12111 }
12112
12113 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12114 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12115 return DAG.getGatherVP(
12116 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12117 Ops, MGT->getMemOperand(), IndexType);
12118 }
12119
12120 return SDValue();
12121}
12122
12123SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12124 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12125 SDValue Mask = MGT->getMask();
12126 SDValue Chain = MGT->getChain();
12127 SDValue Index = MGT->getIndex();
12128 SDValue Scale = MGT->getScale();
12129 SDValue PassThru = MGT->getPassThru();
12130 SDValue BasePtr = MGT->getBasePtr();
12131 ISD::MemIndexType IndexType = MGT->getIndexType();
12132 SDLoc DL(N);
12133
12134 // Zap gathers with a zero mask.
12136 return CombineTo(N, PassThru, MGT->getChain());
12137
12138 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12139 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12140 return DAG.getMaskedGather(
12141 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12142 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12143 }
12144
12145 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12146 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12147 return DAG.getMaskedGather(
12148 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12149 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12150 }
12151
12152 return SDValue();
12153}
12154
12155SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12156 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12157 SDValue Mask = MLD->getMask();
12158 SDLoc DL(N);
12159
12160 // Zap masked loads with a zero mask.
12162 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12163
12164 // If this is a masked load with an all ones mask, we can use a unmasked load.
12165 // FIXME: Can we do this for indexed, expanding, or extending loads?
12166 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12167 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12168 SDValue NewLd = DAG.getLoad(
12169 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12170 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12171 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12172 return CombineTo(N, NewLd, NewLd.getValue(1));
12173 }
12174
12175 // Try transforming N to an indexed load.
12176 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12177 return SDValue(N, 0);
12178
12179 return SDValue();
12180}
12181
12182SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12183 auto *SLD = cast<VPStridedLoadSDNode>(N);
12184 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12185 // Combine strided loads with unit-stride to a regular VP load.
12186 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12187 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12188 SDValue NewLd = DAG.getLoadVP(
12189 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12190 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12191 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12192 SLD->getMemOperand(), SLD->isExpandingLoad());
12193 return CombineTo(N, NewLd, NewLd.getValue(1));
12194 }
12195 return SDValue();
12196}
12197
12198/// A vector select of 2 constant vectors can be simplified to math/logic to
12199/// avoid a variable select instruction and possibly avoid constant loads.
12200SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12201 SDValue Cond = N->getOperand(0);
12202 SDValue N1 = N->getOperand(1);
12203 SDValue N2 = N->getOperand(2);
12204 EVT VT = N->getValueType(0);
12205 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12209 return SDValue();
12210
12211 // Check if we can use the condition value to increment/decrement a single
12212 // constant value. This simplifies a select to an add and removes a constant
12213 // load/materialization from the general case.
12214 bool AllAddOne = true;
12215 bool AllSubOne = true;
12216 unsigned Elts = VT.getVectorNumElements();
12217 for (unsigned i = 0; i != Elts; ++i) {
12218 SDValue N1Elt = N1.getOperand(i);
12219 SDValue N2Elt = N2.getOperand(i);
12220 if (N1Elt.isUndef() || N2Elt.isUndef())
12221 continue;
12222 if (N1Elt.getValueType() != N2Elt.getValueType()) {
12223 AllAddOne = false;
12224 AllSubOne = false;
12225 break;
12226 }
12227
12228 const APInt &C1 = N1Elt->getAsAPIntVal();
12229 const APInt &C2 = N2Elt->getAsAPIntVal();
12230 if (C1 != C2 + 1)
12231 AllAddOne = false;
12232 if (C1 != C2 - 1)
12233 AllSubOne = false;
12234 }
12235
12236 // Further simplifications for the extra-special cases where the constants are
12237 // all 0 or all -1 should be implemented as folds of these patterns.
12238 SDLoc DL(N);
12239 if (AllAddOne || AllSubOne) {
12240 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12241 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12242 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12243 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12244 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12245 }
12246
12247 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12248 APInt Pow2C;
12249 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12250 isNullOrNullSplat(N2)) {
12251 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12252 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12253 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12254 }
12255
12257 return V;
12258
12259 // The general case for select-of-constants:
12260 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12261 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12262 // leave that to a machine-specific pass.
12263 return SDValue();
12264}
12265
12266SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12267 SDValue N0 = N->getOperand(0);
12268 SDValue N1 = N->getOperand(1);
12269 SDValue N2 = N->getOperand(2);
12270 SDLoc DL(N);
12271
12272 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12273 return V;
12274
12275 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
12276 return V;
12277
12278 return SDValue();
12279}
12280
12281SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12282 SDValue N0 = N->getOperand(0);
12283 SDValue N1 = N->getOperand(1);
12284 SDValue N2 = N->getOperand(2);
12285 EVT VT = N->getValueType(0);
12286 SDLoc DL(N);
12287
12288 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12289 return V;
12290
12291 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12292 return V;
12293
12294 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12295 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12296 return DAG.getSelect(DL, VT, F, N2, N1);
12297
12298 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12299 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12302 TLI.getBooleanContents(N0.getValueType()) ==
12304 return DAG.getNode(
12305 ISD::ADD, DL, N1.getValueType(), N2,
12306 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12307 }
12308
12309 // Canonicalize integer abs.
12310 // vselect (setg[te] X, 0), X, -X ->
12311 // vselect (setgt X, -1), X, -X ->
12312 // vselect (setl[te] X, 0), -X, X ->
12313 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12314 if (N0.getOpcode() == ISD::SETCC) {
12315 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12316 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12317 bool isAbs = false;
12318 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12319
12320 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12321 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12322 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12324 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12325 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12327
12328 if (isAbs) {
12330 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12331
12332 SDValue Shift = DAG.getNode(
12333 ISD::SRA, DL, VT, LHS,
12334 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12335 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12336 AddToWorklist(Shift.getNode());
12337 AddToWorklist(Add.getNode());
12338 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12339 }
12340
12341 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12342 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12343 //
12344 // This is OK if we don't care about what happens if either operand is a
12345 // NaN.
12346 //
12347 if (N0.hasOneUse() &&
12348 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12349 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12350 return FMinMax;
12351 }
12352
12353 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12354 return S;
12355 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12356 return S;
12357
12358 // If this select has a condition (setcc) with narrower operands than the
12359 // select, try to widen the compare to match the select width.
12360 // TODO: This should be extended to handle any constant.
12361 // TODO: This could be extended to handle non-loading patterns, but that
12362 // requires thorough testing to avoid regressions.
12363 if (isNullOrNullSplat(RHS)) {
12364 EVT NarrowVT = LHS.getValueType();
12366 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12367 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12368 unsigned WideWidth = WideVT.getScalarSizeInBits();
12369 bool IsSigned = isSignedIntSetCC(CC);
12370 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12371 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12372 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12373 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12374 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12375 // Both compare operands can be widened for free. The LHS can use an
12376 // extended load, and the RHS is a constant:
12377 // vselect (ext (setcc load(X), C)), N1, N2 -->
12378 // vselect (setcc extload(X), C'), N1, N2
12379 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12380 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12381 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12382 EVT WideSetCCVT = getSetCCResultType(WideVT);
12383 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12384 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12385 }
12386 }
12387
12388 // Match VSELECTs with absolute difference patterns.
12389 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12390 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12391 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12392 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12393 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12394 N1.getOperand(0) == N2.getOperand(1) &&
12395 N1.getOperand(1) == N2.getOperand(0)) {
12396 bool IsSigned = isSignedIntSetCC(CC);
12397 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12398 if (hasOperation(ABDOpc, VT)) {
12399 switch (CC) {
12400 case ISD::SETGT:
12401 case ISD::SETGE:
12402 case ISD::SETUGT:
12403 case ISD::SETUGE:
12404 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12405 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12406 break;
12407 case ISD::SETLT:
12408 case ISD::SETLE:
12409 case ISD::SETULT:
12410 case ISD::SETULE:
12411 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12412 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12413 break;
12414 default:
12415 break;
12416 }
12417 }
12418 }
12419
12420 // Match VSELECTs into add with unsigned saturation.
12421 if (hasOperation(ISD::UADDSAT, VT)) {
12422 // Check if one of the arms of the VSELECT is vector with all bits set.
12423 // If it's on the left side invert the predicate to simplify logic below.
12424 SDValue Other;
12425 ISD::CondCode SatCC = CC;
12427 Other = N2;
12428 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12429 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12430 Other = N1;
12431 }
12432
12433 if (Other && Other.getOpcode() == ISD::ADD) {
12434 SDValue CondLHS = LHS, CondRHS = RHS;
12435 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12436
12437 // Canonicalize condition operands.
12438 if (SatCC == ISD::SETUGE) {
12439 std::swap(CondLHS, CondRHS);
12440 SatCC = ISD::SETULE;
12441 }
12442
12443 // We can test against either of the addition operands.
12444 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12445 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12446 if (SatCC == ISD::SETULE && Other == CondRHS &&
12447 (OpLHS == CondLHS || OpRHS == CondLHS))
12448 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12449
12450 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12451 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12452 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12453 CondLHS == OpLHS) {
12454 // If the RHS is a constant we have to reverse the const
12455 // canonicalization.
12456 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12457 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12458 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12459 };
12460 if (SatCC == ISD::SETULE &&
12461 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12462 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12463 }
12464 }
12465 }
12466
12467 // Match VSELECTs into sub with unsigned saturation.
12468 if (hasOperation(ISD::USUBSAT, VT)) {
12469 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12470 // the left side invert the predicate to simplify logic below.
12471 SDValue Other;
12472 ISD::CondCode SatCC = CC;
12474 Other = N2;
12475 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12477 Other = N1;
12478 }
12479
12480 // zext(x) >= y ? trunc(zext(x) - y) : 0
12481 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12482 // zext(x) > y ? trunc(zext(x) - y) : 0
12483 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12484 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12485 Other.getOperand(0).getOpcode() == ISD::SUB &&
12486 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12487 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12488 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12489 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12490 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12491 DAG, DL))
12492 return R;
12493 }
12494
12495 if (Other && Other.getNumOperands() == 2) {
12496 SDValue CondRHS = RHS;
12497 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12498
12499 if (OpLHS == LHS) {
12500 // Look for a general sub with unsigned saturation first.
12501 // x >= y ? x-y : 0 --> usubsat x, y
12502 // x > y ? x-y : 0 --> usubsat x, y
12503 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12504 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12505 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12506
12507 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12508 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12509 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12510 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12511 // If the RHS is a constant we have to reverse the const
12512 // canonicalization.
12513 // x > C-1 ? x+-C : 0 --> usubsat x, C
12514 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12515 return (!Op && !Cond) ||
12516 (Op && Cond &&
12517 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12518 };
12519 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12520 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12521 /*AllowUndefs*/ true)) {
12522 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12523 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12524 }
12525
12526 // Another special case: If C was a sign bit, the sub has been
12527 // canonicalized into a xor.
12528 // FIXME: Would it be better to use computeKnownBits to
12529 // determine whether it's safe to decanonicalize the xor?
12530 // x s< 0 ? x^C : 0 --> usubsat x, C
12531 APInt SplatValue;
12532 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12533 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12535 SplatValue.isSignMask()) {
12536 // Note that we have to rebuild the RHS constant here to
12537 // ensure we don't rely on particular values of undef lanes.
12538 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12539 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12540 }
12541 }
12542 }
12543 }
12544 }
12545 }
12546 }
12547
12548 if (SimplifySelectOps(N, N1, N2))
12549 return SDValue(N, 0); // Don't revisit N.
12550
12551 // Fold (vselect all_ones, N1, N2) -> N1
12553 return N1;
12554 // Fold (vselect all_zeros, N1, N2) -> N2
12556 return N2;
12557
12558 // The ConvertSelectToConcatVector function is assuming both the above
12559 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12560 // and addressed.
12561 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12564 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12565 return CV;
12566 }
12567
12568 if (SDValue V = foldVSelectOfConstants(N))
12569 return V;
12570
12571 if (hasOperation(ISD::SRA, VT))
12573 return V;
12574
12576 return SDValue(N, 0);
12577
12578 return SDValue();
12579}
12580
12581SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12582 SDValue N0 = N->getOperand(0);
12583 SDValue N1 = N->getOperand(1);
12584 SDValue N2 = N->getOperand(2);
12585 SDValue N3 = N->getOperand(3);
12586 SDValue N4 = N->getOperand(4);
12587 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12588 SDLoc DL(N);
12589
12590 // fold select_cc lhs, rhs, x, x, cc -> x
12591 if (N2 == N3)
12592 return N2;
12593
12594 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12595 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12596 isNullConstant(N1))
12597 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
12598
12599 // Determine if the condition we're dealing with is constant
12600 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12601 CC, DL, false)) {
12602 AddToWorklist(SCC.getNode());
12603
12604 // cond always true -> true val
12605 // cond always false -> false val
12606 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12607 return SCCC->isZero() ? N3 : N2;
12608
12609 // When the condition is UNDEF, just return the first operand. This is
12610 // coherent the DAG creation, no setcc node is created in this case
12611 if (SCC->isUndef())
12612 return N2;
12613
12614 // Fold to a simpler select_cc
12615 if (SCC.getOpcode() == ISD::SETCC) {
12616 SDValue SelectOp =
12617 DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0),
12618 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12619 SelectOp->setFlags(SCC->getFlags());
12620 return SelectOp;
12621 }
12622 }
12623
12624 // If we can fold this based on the true/false value, do so.
12625 if (SimplifySelectOps(N, N2, N3))
12626 return SDValue(N, 0); // Don't revisit N.
12627
12628 // fold select_cc into other things, such as min/max/abs
12629 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
12630}
12631
12632SDValue DAGCombiner::visitSETCC(SDNode *N) {
12633 // setcc is very commonly used as an argument to brcond. This pattern
12634 // also lend itself to numerous combines and, as a result, it is desired
12635 // we keep the argument to a brcond as a setcc as much as possible.
12636 bool PreferSetCC =
12637 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12638
12639 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12640 EVT VT = N->getValueType(0);
12641 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12642 SDLoc DL(N);
12643
12644 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
12645 // If we prefer to have a setcc, and we don't, we'll try our best to
12646 // recreate one using rebuildSetCC.
12647 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12648 SDValue NewSetCC = rebuildSetCC(Combined);
12649
12650 // We don't have anything interesting to combine to.
12651 if (NewSetCC.getNode() == N)
12652 return SDValue();
12653
12654 if (NewSetCC)
12655 return NewSetCC;
12656 }
12657 return Combined;
12658 }
12659
12660 // Optimize
12661 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12662 // or
12663 // 2) (icmp eq/ne X, (rotate X, C1))
12664 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12665 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12666 // Then:
12667 // If C1 is a power of 2, then the rotate and shift+and versions are
12668 // equivilent, so we can interchange them depending on target preference.
12669 // Otherwise, if we have the shift+and version we can interchange srl/shl
12670 // which inturn affects the constant C0. We can use this to get better
12671 // constants again determined by target preference.
12672 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12673 auto IsAndWithShift = [](SDValue A, SDValue B) {
12674 return A.getOpcode() == ISD::AND &&
12675 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12676 A.getOperand(0) == B.getOperand(0);
12677 };
12678 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12679 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12680 B.getOperand(0) == A;
12681 };
12682 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12683 bool IsRotate = false;
12684
12685 // Find either shift+and or rotate pattern.
12686 if (IsAndWithShift(N0, N1)) {
12687 AndOrOp = N0;
12688 ShiftOrRotate = N1;
12689 } else if (IsAndWithShift(N1, N0)) {
12690 AndOrOp = N1;
12691 ShiftOrRotate = N0;
12692 } else if (IsRotateWithOp(N0, N1)) {
12693 IsRotate = true;
12694 AndOrOp = N0;
12695 ShiftOrRotate = N1;
12696 } else if (IsRotateWithOp(N1, N0)) {
12697 IsRotate = true;
12698 AndOrOp = N1;
12699 ShiftOrRotate = N0;
12700 }
12701
12702 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12703 (IsRotate || AndOrOp.hasOneUse())) {
12704 EVT OpVT = N0.getValueType();
12705 // Get constant shift/rotate amount and possibly mask (if its shift+and
12706 // variant).
12707 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12708 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12709 /*AllowTrunc*/ false);
12710 if (CNode == nullptr)
12711 return std::nullopt;
12712 return CNode->getAPIntValue();
12713 };
12714 std::optional<APInt> AndCMask =
12715 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12716 std::optional<APInt> ShiftCAmt =
12717 GetAPIntValue(ShiftOrRotate.getOperand(1));
12718 unsigned NumBits = OpVT.getScalarSizeInBits();
12719
12720 // We found constants.
12721 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12722 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12723 // Check that the constants meet the constraints.
12724 bool CanTransform = IsRotate;
12725 if (!CanTransform) {
12726 // Check that mask and shift compliment eachother
12727 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12728 // Check that we are comparing all bits
12729 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12730 // Check that the and mask is correct for the shift
12731 CanTransform &=
12732 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12733 }
12734
12735 // See if target prefers another shift/rotate opcode.
12736 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12737 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12738 // Transform is valid and we have a new preference.
12739 if (CanTransform && NewShiftOpc != ShiftOpc) {
12740 SDValue NewShiftOrRotate =
12741 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12742 ShiftOrRotate.getOperand(1));
12743 SDValue NewAndOrOp = SDValue();
12744
12745 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12746 APInt NewMask =
12747 NewShiftOpc == ISD::SHL
12748 ? APInt::getHighBitsSet(NumBits,
12749 NumBits - ShiftCAmt->getZExtValue())
12750 : APInt::getLowBitsSet(NumBits,
12751 NumBits - ShiftCAmt->getZExtValue());
12752 NewAndOrOp =
12753 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12754 DAG.getConstant(NewMask, DL, OpVT));
12755 } else {
12756 NewAndOrOp = ShiftOrRotate.getOperand(0);
12757 }
12758
12759 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12760 }
12761 }
12762 }
12763 }
12764 return SDValue();
12765}
12766
12767SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12768 SDValue LHS = N->getOperand(0);
12769 SDValue RHS = N->getOperand(1);
12770 SDValue Carry = N->getOperand(2);
12771 SDValue Cond = N->getOperand(3);
12772
12773 // If Carry is false, fold to a regular SETCC.
12774 if (isNullConstant(Carry))
12775 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12776
12777 return SDValue();
12778}
12779
12780/// Check if N satisfies:
12781/// N is used once.
12782/// N is a Load.
12783/// The load is compatible with ExtOpcode. It means
12784/// If load has explicit zero/sign extension, ExpOpcode must have the same
12785/// extension.
12786/// Otherwise returns true.
12787static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12788 if (!N.hasOneUse())
12789 return false;
12790
12791 if (!isa<LoadSDNode>(N))
12792 return false;
12793
12794 LoadSDNode *Load = cast<LoadSDNode>(N);
12795 ISD::LoadExtType LoadExt = Load->getExtensionType();
12796 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12797 return true;
12798
12799 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12800 // extension.
12801 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12802 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12803 return false;
12804
12805 return true;
12806}
12807
12808/// Fold
12809/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12810/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12811/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12812/// This function is called by the DAGCombiner when visiting sext/zext/aext
12813/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12815 SelectionDAG &DAG, const SDLoc &DL,
12816 CombineLevel Level) {
12817 unsigned Opcode = N->getOpcode();
12818 SDValue N0 = N->getOperand(0);
12819 EVT VT = N->getValueType(0);
12820 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12821 Opcode == ISD::ANY_EXTEND) &&
12822 "Expected EXTEND dag node in input!");
12823
12824 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12825 !N0.hasOneUse())
12826 return SDValue();
12827
12828 SDValue Op1 = N0->getOperand(1);
12829 SDValue Op2 = N0->getOperand(2);
12830 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12831 return SDValue();
12832
12833 auto ExtLoadOpcode = ISD::EXTLOAD;
12834 if (Opcode == ISD::SIGN_EXTEND)
12835 ExtLoadOpcode = ISD::SEXTLOAD;
12836 else if (Opcode == ISD::ZERO_EXTEND)
12837 ExtLoadOpcode = ISD::ZEXTLOAD;
12838
12839 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12840 // Combine2), so we should conservatively check the OperationAction.
12841 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12842 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12843 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12844 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12845 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12847 return SDValue();
12848
12849 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12850 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12851 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12852}
12853
12854/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12855/// a build_vector of constants.
12856/// This function is called by the DAGCombiner when visiting sext/zext/aext
12857/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12858/// Vector extends are not folded if operations are legal; this is to
12859/// avoid introducing illegal build_vector dag nodes.
12861 const TargetLowering &TLI,
12862 SelectionDAG &DAG, bool LegalTypes) {
12863 unsigned Opcode = N->getOpcode();
12864 SDValue N0 = N->getOperand(0);
12865 EVT VT = N->getValueType(0);
12866
12867 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12868 "Expected EXTEND dag node in input!");
12869
12870 // fold (sext c1) -> c1
12871 // fold (zext c1) -> c1
12872 // fold (aext c1) -> c1
12873 if (isa<ConstantSDNode>(N0))
12874 return DAG.getNode(Opcode, DL, VT, N0);
12875
12876 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12877 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12878 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12879 if (N0->getOpcode() == ISD::SELECT) {
12880 SDValue Op1 = N0->getOperand(1);
12881 SDValue Op2 = N0->getOperand(2);
12882 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12883 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12884 // For any_extend, choose sign extension of the constants to allow a
12885 // possible further transform to sign_extend_inreg.i.e.
12886 //
12887 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12888 // t2: i64 = any_extend t1
12889 // -->
12890 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12891 // -->
12892 // t4: i64 = sign_extend_inreg t3
12893 unsigned FoldOpc = Opcode;
12894 if (FoldOpc == ISD::ANY_EXTEND)
12895 FoldOpc = ISD::SIGN_EXTEND;
12896 return DAG.getSelect(DL, VT, N0->getOperand(0),
12897 DAG.getNode(FoldOpc, DL, VT, Op1),
12898 DAG.getNode(FoldOpc, DL, VT, Op2));
12899 }
12900 }
12901
12902 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12903 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12904 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12905 EVT SVT = VT.getScalarType();
12906 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12908 return SDValue();
12909
12910 // We can fold this node into a build_vector.
12911 unsigned VTBits = SVT.getSizeInBits();
12912 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12914 unsigned NumElts = VT.getVectorNumElements();
12915
12916 for (unsigned i = 0; i != NumElts; ++i) {
12917 SDValue Op = N0.getOperand(i);
12918 if (Op.isUndef()) {
12919 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12920 Elts.push_back(DAG.getUNDEF(SVT));
12921 else
12922 Elts.push_back(DAG.getConstant(0, DL, SVT));
12923 continue;
12924 }
12925
12926 SDLoc DL(Op);
12927 // Get the constant value and if needed trunc it to the size of the type.
12928 // Nodes like build_vector might have constants wider than the scalar type.
12929 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12930 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12931 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12932 else
12933 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12934 }
12935
12936 return DAG.getBuildVector(VT, DL, Elts);
12937}
12938
12939// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12940// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12941// transformation. Returns true if extension are possible and the above
12942// mentioned transformation is profitable.
12944 unsigned ExtOpc,
12945 SmallVectorImpl<SDNode *> &ExtendNodes,
12946 const TargetLowering &TLI) {
12947 bool HasCopyToRegUses = false;
12948 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12949 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12950 ++UI) {
12951 SDNode *User = *UI;
12952 if (User == N)
12953 continue;
12954 if (UI.getUse().getResNo() != N0.getResNo())
12955 continue;
12956 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12957 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12958 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12959 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12960 // Sign bits will be lost after a zext.
12961 return false;
12962 bool Add = false;
12963 for (unsigned i = 0; i != 2; ++i) {
12964 SDValue UseOp = User->getOperand(i);
12965 if (UseOp == N0)
12966 continue;
12967 if (!isa<ConstantSDNode>(UseOp))
12968 return false;
12969 Add = true;
12970 }
12971 if (Add)
12972 ExtendNodes.push_back(User);
12973 continue;
12974 }
12975 // If truncates aren't free and there are users we can't
12976 // extend, it isn't worthwhile.
12977 if (!isTruncFree)
12978 return false;
12979 // Remember if this value is live-out.
12980 if (User->getOpcode() == ISD::CopyToReg)
12981 HasCopyToRegUses = true;
12982 }
12983
12984 if (HasCopyToRegUses) {
12985 bool BothLiveOut = false;
12986 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12987 UI != UE; ++UI) {
12988 SDUse &Use = UI.getUse();
12989 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12990 BothLiveOut = true;
12991 break;
12992 }
12993 }
12994 if (BothLiveOut)
12995 // Both unextended and extended values are live out. There had better be
12996 // a good reason for the transformation.
12997 return !ExtendNodes.empty();
12998 }
12999 return true;
13000}
13001
13002void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
13003 SDValue OrigLoad, SDValue ExtLoad,
13004 ISD::NodeType ExtType) {
13005 // Extend SetCC uses if necessary.
13006 SDLoc DL(ExtLoad);
13007 for (SDNode *SetCC : SetCCs) {
13009
13010 for (unsigned j = 0; j != 2; ++j) {
13011 SDValue SOp = SetCC->getOperand(j);
13012 if (SOp == OrigLoad)
13013 Ops.push_back(ExtLoad);
13014 else
13015 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13016 }
13017
13018 Ops.push_back(SetCC->getOperand(2));
13019 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13020 }
13021}
13022
13023// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13024SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13025 SDValue N0 = N->getOperand(0);
13026 EVT DstVT = N->getValueType(0);
13027 EVT SrcVT = N0.getValueType();
13028
13029 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13030 N->getOpcode() == ISD::ZERO_EXTEND) &&
13031 "Unexpected node type (not an extend)!");
13032
13033 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13034 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13035 // (v8i32 (sext (v8i16 (load x))))
13036 // into:
13037 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13038 // (v4i32 (sextload (x + 16)))))
13039 // Where uses of the original load, i.e.:
13040 // (v8i16 (load x))
13041 // are replaced with:
13042 // (v8i16 (truncate
13043 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13044 // (v4i32 (sextload (x + 16)))))))
13045 //
13046 // This combine is only applicable to illegal, but splittable, vectors.
13047 // All legal types, and illegal non-vector types, are handled elsewhere.
13048 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13049 //
13050 if (N0->getOpcode() != ISD::LOAD)
13051 return SDValue();
13052
13053 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13054
13055 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13056 !N0.hasOneUse() || !LN0->isSimple() ||
13057 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13059 return SDValue();
13060
13062 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13063 return SDValue();
13064
13065 ISD::LoadExtType ExtType =
13066 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13067
13068 // Try to split the vector types to get down to legal types.
13069 EVT SplitSrcVT = SrcVT;
13070 EVT SplitDstVT = DstVT;
13071 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13072 SplitSrcVT.getVectorNumElements() > 1) {
13073 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13074 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13075 }
13076
13077 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13078 return SDValue();
13079
13080 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13081
13082 SDLoc DL(N);
13083 const unsigned NumSplits =
13084 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13085 const unsigned Stride = SplitSrcVT.getStoreSize();
13088
13089 SDValue BasePtr = LN0->getBasePtr();
13090 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13091 const unsigned Offset = Idx * Stride;
13092
13093 SDValue SplitLoad =
13094 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13095 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13096 SplitSrcVT, LN0->getOriginalAlign(),
13097 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13098
13099 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13100
13101 Loads.push_back(SplitLoad.getValue(0));
13102 Chains.push_back(SplitLoad.getValue(1));
13103 }
13104
13105 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13106 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13107
13108 // Simplify TF.
13109 AddToWorklist(NewChain.getNode());
13110
13111 CombineTo(N, NewValue);
13112
13113 // Replace uses of the original load (before extension)
13114 // with a truncate of the concatenated sextloaded vectors.
13115 SDValue Trunc =
13116 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13117 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13118 CombineTo(N0.getNode(), Trunc, NewChain);
13119 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13120}
13121
13122// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13123// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13124SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13125 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13126 EVT VT = N->getValueType(0);
13127 EVT OrigVT = N->getOperand(0).getValueType();
13128 if (TLI.isZExtFree(OrigVT, VT))
13129 return SDValue();
13130
13131 // and/or/xor
13132 SDValue N0 = N->getOperand(0);
13133 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13134 N0.getOperand(1).getOpcode() != ISD::Constant ||
13135 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13136 return SDValue();
13137
13138 // shl/shr
13139 SDValue N1 = N0->getOperand(0);
13140 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13141 N1.getOperand(1).getOpcode() != ISD::Constant ||
13142 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13143 return SDValue();
13144
13145 // load
13146 if (!isa<LoadSDNode>(N1.getOperand(0)))
13147 return SDValue();
13148 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13149 EVT MemVT = Load->getMemoryVT();
13150 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13151 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13152 return SDValue();
13153
13154
13155 // If the shift op is SHL, the logic op must be AND, otherwise the result
13156 // will be wrong.
13157 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13158 return SDValue();
13159
13160 if (!N0.hasOneUse() || !N1.hasOneUse())
13161 return SDValue();
13162
13164 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13165 ISD::ZERO_EXTEND, SetCCs, TLI))
13166 return SDValue();
13167
13168 // Actually do the transformation.
13169 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13170 Load->getChain(), Load->getBasePtr(),
13171 Load->getMemoryVT(), Load->getMemOperand());
13172
13173 SDLoc DL1(N1);
13174 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13175 N1.getOperand(1));
13176
13178 SDLoc DL0(N0);
13179 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13180 DAG.getConstant(Mask, DL0, VT));
13181
13182 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13183 CombineTo(N, And);
13184 if (SDValue(Load, 0).hasOneUse()) {
13185 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13186 } else {
13187 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13188 Load->getValueType(0), ExtLoad);
13189 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13190 }
13191
13192 // N0 is dead at this point.
13193 recursivelyDeleteUnusedNodes(N0.getNode());
13194
13195 return SDValue(N,0); // Return N so it doesn't get rechecked!
13196}
13197
13198/// If we're narrowing or widening the result of a vector select and the final
13199/// size is the same size as a setcc (compare) feeding the select, then try to
13200/// apply the cast operation to the select's operands because matching vector
13201/// sizes for a select condition and other operands should be more efficient.
13202SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13203 unsigned CastOpcode = Cast->getOpcode();
13204 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13205 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13206 CastOpcode == ISD::FP_ROUND) &&
13207 "Unexpected opcode for vector select narrowing/widening");
13208
13209 // We only do this transform before legal ops because the pattern may be
13210 // obfuscated by target-specific operations after legalization. Do not create
13211 // an illegal select op, however, because that may be difficult to lower.
13212 EVT VT = Cast->getValueType(0);
13213 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13214 return SDValue();
13215
13216 SDValue VSel = Cast->getOperand(0);
13217 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13218 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13219 return SDValue();
13220
13221 // Does the setcc have the same vector size as the casted select?
13222 SDValue SetCC = VSel.getOperand(0);
13223 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13224 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13225 return SDValue();
13226
13227 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13228 SDValue A = VSel.getOperand(1);
13229 SDValue B = VSel.getOperand(2);
13230 SDValue CastA, CastB;
13231 SDLoc DL(Cast);
13232 if (CastOpcode == ISD::FP_ROUND) {
13233 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13234 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13235 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13236 } else {
13237 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13238 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13239 }
13240 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13241}
13242
13243// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13244// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13246 const TargetLowering &TLI, EVT VT,
13247 bool LegalOperations, SDNode *N,
13248 SDValue N0, ISD::LoadExtType ExtLoadType) {
13249 SDNode *N0Node = N0.getNode();
13250 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13251 : ISD::isZEXTLoad(N0Node);
13252 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13253 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13254 return SDValue();
13255
13256 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13257 EVT MemVT = LN0->getMemoryVT();
13258 if ((LegalOperations || !LN0->isSimple() ||
13259 VT.isVector()) &&
13260 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13261 return SDValue();
13262
13263 SDValue ExtLoad =
13264 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13265 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13266 Combiner.CombineTo(N, ExtLoad);
13267 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13268 if (LN0->use_empty())
13269 Combiner.recursivelyDeleteUnusedNodes(LN0);
13270 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13271}
13272
13273// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13274// Only generate vector extloads when 1) they're legal, and 2) they are
13275// deemed desirable by the target. NonNegZExt can be set to true if a zero
13276// extend has the nonneg flag to allow use of sextload if profitable.
13278 const TargetLowering &TLI, EVT VT,
13279 bool LegalOperations, SDNode *N, SDValue N0,
13280 ISD::LoadExtType ExtLoadType,
13281 ISD::NodeType ExtOpc,
13282 bool NonNegZExt = false) {
13284 return {};
13285
13286 // If this is zext nneg, see if it would make sense to treat it as a sext.
13287 if (NonNegZExt) {
13288 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13289 "Unexpected load type or opcode");
13290 for (SDNode *User : N0->uses()) {
13291 if (User->getOpcode() == ISD::SETCC) {
13292 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13294 ExtLoadType = ISD::SEXTLOAD;
13295 ExtOpc = ISD::SIGN_EXTEND;
13296 break;
13297 }
13298 }
13299 }
13300 }
13301
13302 // TODO: isFixedLengthVector() should be removed and any negative effects on
13303 // code generation being the result of that target's implementation of
13304 // isVectorLoadExtDesirable().
13305 if ((LegalOperations || VT.isFixedLengthVector() ||
13306 !cast<LoadSDNode>(N0)->isSimple()) &&
13307 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13308 return {};
13309
13310 bool DoXform = true;
13312 if (!N0.hasOneUse())
13313 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13314 if (VT.isVector())
13315 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13316 if (!DoXform)
13317 return {};
13318
13319 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13320 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13321 LN0->getBasePtr(), N0.getValueType(),
13322 LN0->getMemOperand());
13323 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13324 // If the load value is used only by N, replace it via CombineTo N.
13325 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13326 Combiner.CombineTo(N, ExtLoad);
13327 if (NoReplaceTrunc) {
13328 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13329 Combiner.recursivelyDeleteUnusedNodes(LN0);
13330 } else {
13331 SDValue Trunc =
13332 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13333 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13334 }
13335 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13336}
13337
13338static SDValue
13340 bool LegalOperations, SDNode *N, SDValue N0,
13341 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13342 if (!N0.hasOneUse())
13343 return SDValue();
13344
13345 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13346 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13347 return SDValue();
13348
13349 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13350 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13351 return SDValue();
13352
13353 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13354 return SDValue();
13355
13356 SDLoc dl(Ld);
13357 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13358 SDValue NewLoad = DAG.getMaskedLoad(
13359 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13360 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13361 ExtLoadType, Ld->isExpandingLoad());
13362 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13363 return NewLoad;
13364}
13365
13366// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13368 const TargetLowering &TLI, EVT VT,
13369 SDValue N0,
13370 ISD::LoadExtType ExtLoadType) {
13371 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13372 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13373 return {};
13374 EVT MemoryVT = ALoad->getMemoryVT();
13375 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13376 return {};
13377 // Can't fold into ALoad if it is already extending differently.
13378 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13379 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13380 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13381 return {};
13382
13383 EVT OrigVT = ALoad->getValueType(0);
13384 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13385 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13386 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13387 ALoad->getBasePtr(), ALoad->getMemOperand()));
13388 NewALoad->setExtensionType(ExtLoadType);
13390 SDValue(ALoad, 0),
13391 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13392 // Update the chain uses.
13393 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13394 return SDValue(NewALoad, 0);
13395}
13396
13398 bool LegalOperations) {
13399 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13400 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13401
13402 SDValue SetCC = N->getOperand(0);
13403 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13404 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13405 return SDValue();
13406
13407 SDValue X = SetCC.getOperand(0);
13408 SDValue Ones = SetCC.getOperand(1);
13409 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13410 EVT VT = N->getValueType(0);
13411 EVT XVT = X.getValueType();
13412 // setge X, C is canonicalized to setgt, so we do not need to match that
13413 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13414 // not require the 'not' op.
13415 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13416 // Invert and smear/shift the sign bit:
13417 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13418 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13419 SDLoc DL(N);
13420 unsigned ShCt = VT.getSizeInBits() - 1;
13421 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13422 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13423 SDValue NotX = DAG.getNOT(DL, X, VT);
13424 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13425 auto ShiftOpcode =
13426 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13427 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13428 }
13429 }
13430 return SDValue();
13431}
13432
13433SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13434 SDValue N0 = N->getOperand(0);
13435 if (N0.getOpcode() != ISD::SETCC)
13436 return SDValue();
13437
13438 SDValue N00 = N0.getOperand(0);
13439 SDValue N01 = N0.getOperand(1);
13440 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13441 EVT VT = N->getValueType(0);
13442 EVT N00VT = N00.getValueType();
13443 SDLoc DL(N);
13444
13445 // Propagate fast-math-flags.
13446 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13447
13448 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13449 // the same size as the compared operands. Try to optimize sext(setcc())
13450 // if this is the case.
13451 if (VT.isVector() && !LegalOperations &&
13452 TLI.getBooleanContents(N00VT) ==
13454 EVT SVT = getSetCCResultType(N00VT);
13455
13456 // If we already have the desired type, don't change it.
13457 if (SVT != N0.getValueType()) {
13458 // We know that the # elements of the results is the same as the
13459 // # elements of the compare (and the # elements of the compare result
13460 // for that matter). Check to see that they are the same size. If so,
13461 // we know that the element size of the sext'd result matches the
13462 // element size of the compare operands.
13463 if (VT.getSizeInBits() == SVT.getSizeInBits())
13464 return DAG.getSetCC(DL, VT, N00, N01, CC);
13465
13466 // If the desired elements are smaller or larger than the source
13467 // elements, we can use a matching integer vector type and then
13468 // truncate/sign extend.
13469 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13470 if (SVT == MatchingVecType) {
13471 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13472 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13473 }
13474 }
13475
13476 // Try to eliminate the sext of a setcc by zexting the compare operands.
13477 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13479 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13480 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13481 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13482
13483 // We have an unsupported narrow vector compare op that would be legal
13484 // if extended to the destination type. See if the compare operands
13485 // can be freely extended to the destination type.
13486 auto IsFreeToExtend = [&](SDValue V) {
13487 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13488 return true;
13489 // Match a simple, non-extended load that can be converted to a
13490 // legal {z/s}ext-load.
13491 // TODO: Allow widening of an existing {z/s}ext-load?
13492 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13493 ISD::isUNINDEXEDLoad(V.getNode()) &&
13494 cast<LoadSDNode>(V)->isSimple() &&
13495 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13496 return false;
13497
13498 // Non-chain users of this value must either be the setcc in this
13499 // sequence or extends that can be folded into the new {z/s}ext-load.
13500 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13501 UI != UE; ++UI) {
13502 // Skip uses of the chain and the setcc.
13503 SDNode *User = *UI;
13504 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13505 continue;
13506 // Extra users must have exactly the same cast we are about to create.
13507 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13508 // is enhanced similarly.
13509 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13510 return false;
13511 }
13512 return true;
13513 };
13514
13515 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13516 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13517 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13518 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13519 }
13520 }
13521 }
13522
13523 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13524 // Here, T can be 1 or -1, depending on the type of the setcc and
13525 // getBooleanContents().
13526 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13527
13528 // To determine the "true" side of the select, we need to know the high bit
13529 // of the value returned by the setcc if it evaluates to true.
13530 // If the type of the setcc is i1, then the true case of the select is just
13531 // sext(i1 1), that is, -1.
13532 // If the type of the setcc is larger (say, i8) then the value of the high
13533 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13534 // of the appropriate width.
13535 SDValue ExtTrueVal = (SetCCWidth == 1)
13536 ? DAG.getAllOnesConstant(DL, VT)
13537 : DAG.getBoolConstant(true, DL, VT, N00VT);
13538 SDValue Zero = DAG.getConstant(0, DL, VT);
13539 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13540 return SCC;
13541
13542 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13543 EVT SetCCVT = getSetCCResultType(N00VT);
13544 // Don't do this transform for i1 because there's a select transform
13545 // that would reverse it.
13546 // TODO: We should not do this transform at all without a target hook
13547 // because a sext is likely cheaper than a select?
13548 if (SetCCVT.getScalarSizeInBits() != 1 &&
13549 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13550 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13551 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13552 }
13553 }
13554
13555 return SDValue();
13556}
13557
13558SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13559 SDValue N0 = N->getOperand(0);
13560 EVT VT = N->getValueType(0);
13561 SDLoc DL(N);
13562
13563 if (VT.isVector())
13564 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13565 return FoldedVOp;
13566
13567 // sext(undef) = 0 because the top bit will all be the same.
13568 if (N0.isUndef())
13569 return DAG.getConstant(0, DL, VT);
13570
13571 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13572 return Res;
13573
13574 // fold (sext (sext x)) -> (sext x)
13575 // fold (sext (aext x)) -> (sext x)
13576 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13577 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13578
13579 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13580 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13584 N0.getOperand(0));
13585
13586 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13587 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13588 SDValue N00 = N0.getOperand(0);
13589 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13590 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13591 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13592 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13593 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13594 }
13595 }
13596
13597 if (N0.getOpcode() == ISD::TRUNCATE) {
13598 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13599 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13600 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13601 SDNode *oye = N0.getOperand(0).getNode();
13602 if (NarrowLoad.getNode() != N0.getNode()) {
13603 CombineTo(N0.getNode(), NarrowLoad);
13604 // CombineTo deleted the truncate, if needed, but not what's under it.
13605 AddToWorklist(oye);
13606 }
13607 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13608 }
13609
13610 // See if the value being truncated is already sign extended. If so, just
13611 // eliminate the trunc/sext pair.
13612 SDValue Op = N0.getOperand(0);
13613 unsigned OpBits = Op.getScalarValueSizeInBits();
13614 unsigned MidBits = N0.getScalarValueSizeInBits();
13615 unsigned DestBits = VT.getScalarSizeInBits();
13616 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13617
13618 if (OpBits == DestBits) {
13619 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13620 // bits, it is already ready.
13621 if (NumSignBits > DestBits-MidBits)
13622 return Op;
13623 } else if (OpBits < DestBits) {
13624 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13625 // bits, just sext from i32.
13626 if (NumSignBits > OpBits-MidBits)
13627 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13628 } else {
13629 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13630 // bits, just truncate to i32.
13631 if (NumSignBits > OpBits-MidBits)
13632 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13633 }
13634
13635 // fold (sext (truncate x)) -> (sextinreg x).
13636 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13637 N0.getValueType())) {
13638 if (OpBits < DestBits)
13639 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13640 else if (OpBits > DestBits)
13641 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13642 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13643 DAG.getValueType(N0.getValueType()));
13644 }
13645 }
13646
13647 // Try to simplify (sext (load x)).
13648 if (SDValue foldedExt =
13649 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13651 return foldedExt;
13652
13653 if (SDValue foldedExt =
13654 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13656 return foldedExt;
13657
13658 // fold (sext (load x)) to multiple smaller sextloads.
13659 // Only on illegal but splittable vectors.
13660 if (SDValue ExtLoad = CombineExtLoad(N))
13661 return ExtLoad;
13662
13663 // Try to simplify (sext (sextload x)).
13664 if (SDValue foldedExt = tryToFoldExtOfExtload(
13665 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13666 return foldedExt;
13667
13668 // Try to simplify (sext (atomic_load x)).
13669 if (SDValue foldedExt =
13670 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13671 return foldedExt;
13672
13673 // fold (sext (and/or/xor (load x), cst)) ->
13674 // (and/or/xor (sextload x), (sext cst))
13675 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13676 isa<LoadSDNode>(N0.getOperand(0)) &&
13677 N0.getOperand(1).getOpcode() == ISD::Constant &&
13678 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13679 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13680 EVT MemVT = LN00->getMemoryVT();
13681 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13682 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13684 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13685 ISD::SIGN_EXTEND, SetCCs, TLI);
13686 if (DoXform) {
13687 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13688 LN00->getChain(), LN00->getBasePtr(),
13689 LN00->getMemoryVT(),
13690 LN00->getMemOperand());
13692 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13693 ExtLoad, DAG.getConstant(Mask, DL, VT));
13694 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13695 bool NoReplaceTruncAnd = !N0.hasOneUse();
13696 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13697 CombineTo(N, And);
13698 // If N0 has multiple uses, change other uses as well.
13699 if (NoReplaceTruncAnd) {
13700 SDValue TruncAnd =
13702 CombineTo(N0.getNode(), TruncAnd);
13703 }
13704 if (NoReplaceTrunc) {
13705 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13706 } else {
13707 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13708 LN00->getValueType(0), ExtLoad);
13709 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13710 }
13711 return SDValue(N,0); // Return N so it doesn't get rechecked!
13712 }
13713 }
13714 }
13715
13716 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13717 return V;
13718
13719 if (SDValue V = foldSextSetcc(N))
13720 return V;
13721
13722 // fold (sext x) -> (zext x) if the sign bit is known zero.
13723 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13724 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13725 DAG.SignBitIsZero(N0)) {
13727 Flags.setNonNeg(true);
13728 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13729 }
13730
13731 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13732 return NewVSel;
13733
13734 // Eliminate this sign extend by doing a negation in the destination type:
13735 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13736 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13740 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13741 return DAG.getNegative(Zext, DL, VT);
13742 }
13743 // Eliminate this sign extend by doing a decrement in the destination type:
13744 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13745 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13749 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13750 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13751 }
13752
13753 // fold sext (not i1 X) -> add (zext i1 X), -1
13754 // TODO: This could be extended to handle bool vectors.
13755 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13756 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13757 TLI.isOperationLegal(ISD::ADD, VT)))) {
13758 // If we can eliminate the 'not', the sext form should be better
13759 if (SDValue NewXor = visitXOR(N0.getNode())) {
13760 // Returning N0 is a form of in-visit replacement that may have
13761 // invalidated N0.
13762 if (NewXor.getNode() == N0.getNode()) {
13763 // Return SDValue here as the xor should have already been replaced in
13764 // this sext.
13765 return SDValue();
13766 }
13767
13768 // Return a new sext with the new xor.
13769 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13770 }
13771
13772 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13773 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13774 }
13775
13776 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
13777 return Res;
13778
13779 return SDValue();
13780}
13781
13782/// Given an extending node with a pop-count operand, if the target does not
13783/// support a pop-count in the narrow source type but does support it in the
13784/// destination type, widen the pop-count to the destination type.
13785static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
13786 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13787 Extend->getOpcode() == ISD::ANY_EXTEND) &&
13788 "Expected extend op");
13789
13790 SDValue CtPop = Extend->getOperand(0);
13791 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13792 return SDValue();
13793
13794 EVT VT = Extend->getValueType(0);
13795 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13798 return SDValue();
13799
13800 // zext (ctpop X) --> ctpop (zext X)
13801 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13802 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13803}
13804
13805// If we have (zext (abs X)) where X is a type that will be promoted by type
13806// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13807static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13808 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13809
13810 EVT VT = Extend->getValueType(0);
13811 if (VT.isVector())
13812 return SDValue();
13813
13814 SDValue Abs = Extend->getOperand(0);
13815 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13816 return SDValue();
13817
13818 EVT AbsVT = Abs.getValueType();
13819 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13820 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13822 return SDValue();
13823
13824 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13825
13826 SDValue SExt =
13827 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13828 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13829 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13830}
13831
13832SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13833 SDValue N0 = N->getOperand(0);
13834 EVT VT = N->getValueType(0);
13835 SDLoc DL(N);
13836
13837 if (VT.isVector())
13838 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13839 return FoldedVOp;
13840
13841 // zext(undef) = 0
13842 if (N0.isUndef())
13843 return DAG.getConstant(0, DL, VT);
13844
13845 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13846 return Res;
13847
13848 // fold (zext (zext x)) -> (zext x)
13849 // fold (zext (aext x)) -> (zext x)
13850 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13852 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13853 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13854 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13855 }
13856
13857 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13858 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13861 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
13862
13863 // fold (zext (truncate x)) -> (zext x) or
13864 // (zext (truncate x)) -> (truncate x)
13865 // This is valid when the truncated bits of x are already zero.
13866 SDValue Op;
13867 KnownBits Known;
13868 if (isTruncateOf(DAG, N0, Op, Known)) {
13869 APInt TruncatedBits =
13870 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13871 APInt(Op.getScalarValueSizeInBits(), 0) :
13872 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13874 std::min(Op.getScalarValueSizeInBits(),
13875 VT.getScalarSizeInBits()));
13876 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13877 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13878 DAG.salvageDebugInfo(*N0.getNode());
13879
13880 return ZExtOrTrunc;
13881 }
13882 }
13883
13884 // fold (zext (truncate x)) -> (and x, mask)
13885 if (N0.getOpcode() == ISD::TRUNCATE) {
13886 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13887 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13888 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13889 SDNode *oye = N0.getOperand(0).getNode();
13890 if (NarrowLoad.getNode() != N0.getNode()) {
13891 CombineTo(N0.getNode(), NarrowLoad);
13892 // CombineTo deleted the truncate, if needed, but not what's under it.
13893 AddToWorklist(oye);
13894 }
13895 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13896 }
13897
13898 EVT SrcVT = N0.getOperand(0).getValueType();
13899 EVT MinVT = N0.getValueType();
13900
13901 if (N->getFlags().hasNonNeg()) {
13902 SDValue Op = N0.getOperand(0);
13903 unsigned OpBits = SrcVT.getScalarSizeInBits();
13904 unsigned MidBits = MinVT.getScalarSizeInBits();
13905 unsigned DestBits = VT.getScalarSizeInBits();
13906 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13907
13908 if (OpBits == DestBits) {
13909 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13910 // bits, it is already ready.
13911 if (NumSignBits > DestBits - MidBits)
13912 return Op;
13913 } else if (OpBits < DestBits) {
13914 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13915 // bits, just sext from i32.
13916 // FIXME: This can probably be ZERO_EXTEND nneg?
13917 if (NumSignBits > OpBits - MidBits)
13918 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13919 } else {
13920 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13921 // bits, just truncate to i32.
13922 if (NumSignBits > OpBits - MidBits)
13923 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13924 }
13925 }
13926
13927 // Try to mask before the extension to avoid having to generate a larger mask,
13928 // possibly over several sub-vectors.
13929 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13930 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13932 SDValue Op = N0.getOperand(0);
13933 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13934 AddToWorklist(Op.getNode());
13935 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13936 // Transfer the debug info; the new node is equivalent to N0.
13937 DAG.transferDbgValues(N0, ZExtOrTrunc);
13938 return ZExtOrTrunc;
13939 }
13940 }
13941
13942 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13943 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13944 AddToWorklist(Op.getNode());
13945 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13946 // We may safely transfer the debug info describing the truncate node over
13947 // to the equivalent and operation.
13948 DAG.transferDbgValues(N0, And);
13949 return And;
13950 }
13951 }
13952
13953 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13954 // if either of the casts is not free.
13955 if (N0.getOpcode() == ISD::AND &&
13956 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13957 N0.getOperand(1).getOpcode() == ISD::Constant &&
13958 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13959 !TLI.isZExtFree(N0.getValueType(), VT))) {
13960 SDValue X = N0.getOperand(0).getOperand(0);
13961 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13963 return DAG.getNode(ISD::AND, DL, VT,
13964 X, DAG.getConstant(Mask, DL, VT));
13965 }
13966
13967 // Try to simplify (zext (load x)).
13968 if (SDValue foldedExt = tryToFoldExtOfLoad(
13969 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13970 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13971 return foldedExt;
13972
13973 if (SDValue foldedExt =
13974 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13976 return foldedExt;
13977
13978 // fold (zext (load x)) to multiple smaller zextloads.
13979 // Only on illegal but splittable vectors.
13980 if (SDValue ExtLoad = CombineExtLoad(N))
13981 return ExtLoad;
13982
13983 // Try to simplify (zext (atomic_load x)).
13984 if (SDValue foldedExt =
13985 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13986 return foldedExt;
13987
13988 // fold (zext (and/or/xor (load x), cst)) ->
13989 // (and/or/xor (zextload x), (zext cst))
13990 // Unless (and (load x) cst) will match as a zextload already and has
13991 // additional users, or the zext is already free.
13992 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13993 isa<LoadSDNode>(N0.getOperand(0)) &&
13994 N0.getOperand(1).getOpcode() == ISD::Constant &&
13995 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13996 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13997 EVT MemVT = LN00->getMemoryVT();
13998 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13999 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
14000 bool DoXform = true;
14002 if (!N0.hasOneUse()) {
14003 if (N0.getOpcode() == ISD::AND) {
14004 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14005 EVT LoadResultTy = AndC->getValueType(0);
14006 EVT ExtVT;
14007 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14008 DoXform = false;
14009 }
14010 }
14011 if (DoXform)
14012 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14013 ISD::ZERO_EXTEND, SetCCs, TLI);
14014 if (DoXform) {
14015 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14016 LN00->getChain(), LN00->getBasePtr(),
14017 LN00->getMemoryVT(),
14018 LN00->getMemOperand());
14020 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14021 ExtLoad, DAG.getConstant(Mask, DL, VT));
14022 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14023 bool NoReplaceTruncAnd = !N0.hasOneUse();
14024 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14025 CombineTo(N, And);
14026 // If N0 has multiple uses, change other uses as well.
14027 if (NoReplaceTruncAnd) {
14028 SDValue TruncAnd =
14030 CombineTo(N0.getNode(), TruncAnd);
14031 }
14032 if (NoReplaceTrunc) {
14033 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14034 } else {
14035 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14036 LN00->getValueType(0), ExtLoad);
14037 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14038 }
14039 return SDValue(N,0); // Return N so it doesn't get rechecked!
14040 }
14041 }
14042 }
14043
14044 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14045 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14046 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14047 return ZExtLoad;
14048
14049 // Try to simplify (zext (zextload x)).
14050 if (SDValue foldedExt = tryToFoldExtOfExtload(
14051 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14052 return foldedExt;
14053
14054 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14055 return V;
14056
14057 if (N0.getOpcode() == ISD::SETCC) {
14058 // Propagate fast-math-flags.
14059 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14060
14061 // Only do this before legalize for now.
14062 if (!LegalOperations && VT.isVector() &&
14063 N0.getValueType().getVectorElementType() == MVT::i1) {
14064 EVT N00VT = N0.getOperand(0).getValueType();
14065 if (getSetCCResultType(N00VT) == N0.getValueType())
14066 return SDValue();
14067
14068 // We know that the # elements of the results is the same as the #
14069 // elements of the compare (and the # elements of the compare result for
14070 // that matter). Check to see that they are the same size. If so, we know
14071 // that the element size of the sext'd result matches the element size of
14072 // the compare operands.
14073 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14074 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14075 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14076 N0.getOperand(1), N0.getOperand(2));
14077 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14078 }
14079
14080 // If the desired elements are smaller or larger than the source
14081 // elements we can use a matching integer vector type and then
14082 // truncate/any extend followed by zext_in_reg.
14083 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14084 SDValue VsetCC =
14085 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14086 N0.getOperand(1), N0.getOperand(2));
14087 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14088 N0.getValueType());
14089 }
14090
14091 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14092 EVT N0VT = N0.getValueType();
14093 EVT N00VT = N0.getOperand(0).getValueType();
14094 if (SDValue SCC = SimplifySelectCC(
14095 DL, N0.getOperand(0), N0.getOperand(1),
14096 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14097 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14098 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14099 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14100 }
14101
14102 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14103 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14104 !TLI.isZExtFree(N0, VT)) {
14105 SDValue ShVal = N0.getOperand(0);
14106 SDValue ShAmt = N0.getOperand(1);
14107 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14108 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14109 if (N0.getOpcode() == ISD::SHL) {
14110 // If the original shl may be shifting out bits, do not perform this
14111 // transformation.
14112 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14113 ShVal.getOperand(0).getValueSizeInBits();
14114 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14115 // If the shift is too large, then see if we can deduce that the
14116 // shift is safe anyway.
14117 // Create a mask that has ones for the bits being shifted out.
14118 APInt ShiftOutMask =
14120 ShAmtC->getAPIntValue().getZExtValue());
14121
14122 // Check if the bits being shifted out are known to be zero.
14123 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
14124 return SDValue();
14125 }
14126 }
14127
14128 // Ensure that the shift amount is wide enough for the shifted value.
14129 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14130 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14131
14132 return DAG.getNode(N0.getOpcode(), DL, VT,
14133 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14134 }
14135 }
14136 }
14137
14138 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14139 return NewVSel;
14140
14141 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14142 return NewCtPop;
14143
14144 if (SDValue V = widenAbs(N, DAG))
14145 return V;
14146
14147 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14148 return Res;
14149
14150 // CSE zext nneg with sext if the zext is not free.
14151 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14152 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14153 if (CSENode)
14154 return SDValue(CSENode, 0);
14155 }
14156
14157 return SDValue();
14158}
14159
14160SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14161 SDValue N0 = N->getOperand(0);
14162 EVT VT = N->getValueType(0);
14163 SDLoc DL(N);
14164
14165 // aext(undef) = undef
14166 if (N0.isUndef())
14167 return DAG.getUNDEF(VT);
14168
14169 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14170 return Res;
14171
14172 // fold (aext (aext x)) -> (aext x)
14173 // fold (aext (zext x)) -> (zext x)
14174 // fold (aext (sext x)) -> (sext x)
14175 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14176 N0.getOpcode() == ISD::SIGN_EXTEND) {
14178 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14179 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14180 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14181 }
14182
14183 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14184 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14185 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14189 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14190
14191 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14192 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14193 if (N0.getOpcode() == ISD::TRUNCATE) {
14194 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14195 SDNode *oye = N0.getOperand(0).getNode();
14196 if (NarrowLoad.getNode() != N0.getNode()) {
14197 CombineTo(N0.getNode(), NarrowLoad);
14198 // CombineTo deleted the truncate, if needed, but not what's under it.
14199 AddToWorklist(oye);
14200 }
14201 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14202 }
14203 }
14204
14205 // fold (aext (truncate x))
14206 if (N0.getOpcode() == ISD::TRUNCATE)
14207 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14208
14209 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14210 // if the trunc is not free.
14211 if (N0.getOpcode() == ISD::AND &&
14212 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14213 N0.getOperand(1).getOpcode() == ISD::Constant &&
14214 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14215 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14216 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14217 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14218 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14219 }
14220
14221 // fold (aext (load x)) -> (aext (truncate (extload x)))
14222 // None of the supported targets knows how to perform load and any_ext
14223 // on vectors in one instruction, so attempt to fold to zext instead.
14224 if (VT.isVector()) {
14225 // Try to simplify (zext (load x)).
14226 if (SDValue foldedExt =
14227 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14229 return foldedExt;
14230 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14232 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14233 bool DoXform = true;
14235 if (!N0.hasOneUse())
14236 DoXform =
14237 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14238 if (DoXform) {
14239 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14240 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14241 LN0->getBasePtr(), N0.getValueType(),
14242 LN0->getMemOperand());
14243 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14244 // If the load value is used only by N, replace it via CombineTo N.
14245 bool NoReplaceTrunc = N0.hasOneUse();
14246 CombineTo(N, ExtLoad);
14247 if (NoReplaceTrunc) {
14248 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14249 recursivelyDeleteUnusedNodes(LN0);
14250 } else {
14251 SDValue Trunc =
14252 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14253 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14254 }
14255 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14256 }
14257 }
14258
14259 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14260 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14261 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14262 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14263 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14264 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14265 ISD::LoadExtType ExtType = LN0->getExtensionType();
14266 EVT MemVT = LN0->getMemoryVT();
14267 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14268 SDValue ExtLoad =
14269 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14270 MemVT, LN0->getMemOperand());
14271 CombineTo(N, ExtLoad);
14272 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14273 recursivelyDeleteUnusedNodes(LN0);
14274 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14275 }
14276 }
14277
14278 if (N0.getOpcode() == ISD::SETCC) {
14279 // Propagate fast-math-flags.
14280 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14281
14282 // For vectors:
14283 // aext(setcc) -> vsetcc
14284 // aext(setcc) -> truncate(vsetcc)
14285 // aext(setcc) -> aext(vsetcc)
14286 // Only do this before legalize for now.
14287 if (VT.isVector() && !LegalOperations) {
14288 EVT N00VT = N0.getOperand(0).getValueType();
14289 if (getSetCCResultType(N00VT) == N0.getValueType())
14290 return SDValue();
14291
14292 // We know that the # elements of the results is the same as the
14293 // # elements of the compare (and the # elements of the compare result
14294 // for that matter). Check to see that they are the same size. If so,
14295 // we know that the element size of the sext'd result matches the
14296 // element size of the compare operands.
14297 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14298 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14299 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14300
14301 // If the desired elements are smaller or larger than the source
14302 // elements we can use a matching integer vector type and then
14303 // truncate/any extend
14304 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14305 SDValue VsetCC = DAG.getSetCC(
14306 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14307 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14308 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14309 }
14310
14311 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14312 if (SDValue SCC = SimplifySelectCC(
14313 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14314 DAG.getConstant(0, DL, VT),
14315 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14316 return SCC;
14317 }
14318
14319 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14320 return NewCtPop;
14321
14322 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14323 return Res;
14324
14325 return SDValue();
14326}
14327
14328SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14329 unsigned Opcode = N->getOpcode();
14330 SDValue N0 = N->getOperand(0);
14331 SDValue N1 = N->getOperand(1);
14332 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14333
14334 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14335 if (N0.getOpcode() == Opcode &&
14336 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14337 return N0;
14338
14339 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14340 N0.getOperand(0).getOpcode() == Opcode) {
14341 // We have an assert, truncate, assert sandwich. Make one stronger assert
14342 // by asserting on the smallest asserted type to the larger source type.
14343 // This eliminates the later assert:
14344 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14345 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14346 SDLoc DL(N);
14347 SDValue BigA = N0.getOperand(0);
14348 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14349 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14350 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14351 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14352 BigA.getOperand(0), MinAssertVTVal);
14353 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14354 }
14355
14356 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14357 // than X. Just move the AssertZext in front of the truncate and drop the
14358 // AssertSExt.
14359 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14361 Opcode == ISD::AssertZext) {
14362 SDValue BigA = N0.getOperand(0);
14363 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14364 if (AssertVT.bitsLT(BigA_AssertVT)) {
14365 SDLoc DL(N);
14366 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14367 BigA.getOperand(0), N1);
14368 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14369 }
14370 }
14371
14372 return SDValue();
14373}
14374
14375SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14376 SDLoc DL(N);
14377
14378 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14379 SDValue N0 = N->getOperand(0);
14380
14381 // Fold (assertalign (assertalign x, AL0), AL1) ->
14382 // (assertalign x, max(AL0, AL1))
14383 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14384 return DAG.getAssertAlign(DL, N0.getOperand(0),
14385 std::max(AL, AAN->getAlign()));
14386
14387 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14388 // this assert down to source operands so that those arithmetic ops could be
14389 // exposed to the DAG combining.
14390 switch (N0.getOpcode()) {
14391 default:
14392 break;
14393 case ISD::ADD:
14394 case ISD::SUB: {
14395 unsigned AlignShift = Log2(AL);
14396 SDValue LHS = N0.getOperand(0);
14397 SDValue RHS = N0.getOperand(1);
14398 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14399 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14400 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14401 if (LHSAlignShift < AlignShift)
14402 LHS = DAG.getAssertAlign(DL, LHS, AL);
14403 if (RHSAlignShift < AlignShift)
14404 RHS = DAG.getAssertAlign(DL, RHS, AL);
14405 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14406 }
14407 break;
14408 }
14409 }
14410
14411 return SDValue();
14412}
14413
14414/// If the result of a load is shifted/masked/truncated to an effectively
14415/// narrower type, try to transform the load to a narrower type and/or
14416/// use an extending load.
14417SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14418 unsigned Opc = N->getOpcode();
14419
14421 SDValue N0 = N->getOperand(0);
14422 EVT VT = N->getValueType(0);
14423 EVT ExtVT = VT;
14424
14425 // This transformation isn't valid for vector loads.
14426 if (VT.isVector())
14427 return SDValue();
14428
14429 // The ShAmt variable is used to indicate that we've consumed a right
14430 // shift. I.e. we want to narrow the width of the load by skipping to load the
14431 // ShAmt least significant bits.
14432 unsigned ShAmt = 0;
14433 // A special case is when the least significant bits from the load are masked
14434 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14435 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14436 // the result.
14437 unsigned ShiftedOffset = 0;
14438 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14439 // extended to VT.
14440 if (Opc == ISD::SIGN_EXTEND_INREG) {
14441 ExtType = ISD::SEXTLOAD;
14442 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14443 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14444 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14445 // value, or it may be shifting a higher subword, half or byte into the
14446 // lowest bits.
14447
14448 // Only handle shift with constant shift amount, and the shiftee must be a
14449 // load.
14450 auto *LN = dyn_cast<LoadSDNode>(N0);
14451 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14452 if (!N1C || !LN)
14453 return SDValue();
14454 // If the shift amount is larger than the memory type then we're not
14455 // accessing any of the loaded bytes.
14456 ShAmt = N1C->getZExtValue();
14457 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14458 if (MemoryWidth <= ShAmt)
14459 return SDValue();
14460 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14461 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14462 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14463 // If original load is a SEXTLOAD then we can't simply replace it by a
14464 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14465 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14466 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14467 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14468 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14469 LN->getExtensionType() != ExtType)
14470 return SDValue();
14471 } else if (Opc == ISD::AND) {
14472 // An AND with a constant mask is the same as a truncate + zero-extend.
14473 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14474 if (!AndC)
14475 return SDValue();
14476
14477 const APInt &Mask = AndC->getAPIntValue();
14478 unsigned ActiveBits = 0;
14479 if (Mask.isMask()) {
14480 ActiveBits = Mask.countr_one();
14481 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14482 ShiftedOffset = ShAmt;
14483 } else {
14484 return SDValue();
14485 }
14486
14487 ExtType = ISD::ZEXTLOAD;
14488 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14489 }
14490
14491 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14492 // a right shift. Here we redo some of those checks, to possibly adjust the
14493 // ExtVT even further based on "a masking AND". We could also end up here for
14494 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14495 // need to be done here as well.
14496 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14497 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14498 // Bail out when the SRL has more than one use. This is done for historical
14499 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14500 // check below? And maybe it could be non-profitable to do the transform in
14501 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14502 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14503 if (!SRL.hasOneUse())
14504 return SDValue();
14505
14506 // Only handle shift with constant shift amount, and the shiftee must be a
14507 // load.
14508 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14509 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14510 if (!SRL1C || !LN)
14511 return SDValue();
14512
14513 // If the shift amount is larger than the input type then we're not
14514 // accessing any of the loaded bytes. If the load was a zextload/extload
14515 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14516 ShAmt = SRL1C->getZExtValue();
14517 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14518 if (ShAmt >= MemoryWidth)
14519 return SDValue();
14520
14521 // Because a SRL must be assumed to *need* to zero-extend the high bits
14522 // (as opposed to anyext the high bits), we can't combine the zextload
14523 // lowering of SRL and an sextload.
14524 if (LN->getExtensionType() == ISD::SEXTLOAD)
14525 return SDValue();
14526
14527 // Avoid reading outside the memory accessed by the original load (could
14528 // happened if we only adjust the load base pointer by ShAmt). Instead we
14529 // try to narrow the load even further. The typical scenario here is:
14530 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14531 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14532 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14533 // Don't replace sextload by zextload.
14534 if (ExtType == ISD::SEXTLOAD)
14535 return SDValue();
14536 // Narrow the load.
14537 ExtType = ISD::ZEXTLOAD;
14538 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14539 }
14540
14541 // If the SRL is only used by a masking AND, we may be able to adjust
14542 // the ExtVT to make the AND redundant.
14543 SDNode *Mask = *(SRL->use_begin());
14544 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14545 isa<ConstantSDNode>(Mask->getOperand(1))) {
14546 unsigned Offset, ActiveBits;
14547 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14548 if (ShiftMask.isMask()) {
14549 EVT MaskedVT =
14550 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14551 // If the mask is smaller, recompute the type.
14552 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14553 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14554 ExtVT = MaskedVT;
14555 } else if (ExtType == ISD::ZEXTLOAD &&
14556 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14557 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14558 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14559 // If the mask is shifted we can use a narrower load and a shl to insert
14560 // the trailing zeros.
14561 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14562 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14563 ExtVT = MaskedVT;
14564 ShAmt = Offset + ShAmt;
14565 ShiftedOffset = Offset;
14566 }
14567 }
14568 }
14569
14570 N0 = SRL.getOperand(0);
14571 }
14572
14573 // If the load is shifted left (and the result isn't shifted back right), we
14574 // can fold a truncate through the shift. The typical scenario is that N
14575 // points at a TRUNCATE here so the attempted fold is:
14576 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14577 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14578 unsigned ShLeftAmt = 0;
14579 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14580 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14581 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14582 ShLeftAmt = N01->getZExtValue();
14583 N0 = N0.getOperand(0);
14584 }
14585 }
14586
14587 // If we haven't found a load, we can't narrow it.
14588 if (!isa<LoadSDNode>(N0))
14589 return SDValue();
14590
14591 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14592 // Reducing the width of a volatile load is illegal. For atomics, we may be
14593 // able to reduce the width provided we never widen again. (see D66309)
14594 if (!LN0->isSimple() ||
14595 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14596 return SDValue();
14597
14598 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14599 unsigned LVTStoreBits =
14601 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14602 return LVTStoreBits - EVTStoreBits - ShAmt;
14603 };
14604
14605 // We need to adjust the pointer to the load by ShAmt bits in order to load
14606 // the correct bytes.
14607 unsigned PtrAdjustmentInBits =
14608 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14609
14610 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14611 SDLoc DL(LN0);
14612 // The original load itself didn't wrap, so an offset within it doesn't.
14614 Flags.setNoUnsignedWrap(true);
14615 SDValue NewPtr = DAG.getMemBasePlusOffset(
14616 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14617 AddToWorklist(NewPtr.getNode());
14618
14619 SDValue Load;
14620 if (ExtType == ISD::NON_EXTLOAD)
14621 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14622 LN0->getPointerInfo().getWithOffset(PtrOff),
14623 LN0->getOriginalAlign(),
14624 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14625 else
14626 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14627 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14628 LN0->getOriginalAlign(),
14629 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14630
14631 // Replace the old load's chain with the new load's chain.
14632 WorklistRemover DeadNodes(*this);
14633 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14634
14635 // Shift the result left, if we've swallowed a left shift.
14637 if (ShLeftAmt != 0) {
14638 // If the shift amount is as large as the result size (but, presumably,
14639 // no larger than the source) then the useful bits of the result are
14640 // zero; we can't simply return the shortened shift, because the result
14641 // of that operation is undefined.
14642 if (ShLeftAmt >= VT.getScalarSizeInBits())
14643 Result = DAG.getConstant(0, DL, VT);
14644 else
14645 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
14646 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
14647 }
14648
14649 if (ShiftedOffset != 0) {
14650 // We're using a shifted mask, so the load now has an offset. This means
14651 // that data has been loaded into the lower bytes than it would have been
14652 // before, so we need to shl the loaded data into the correct position in the
14653 // register.
14654 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14655 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14656 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14657 }
14658
14659 // Return the new loaded value.
14660 return Result;
14661}
14662
14663SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14664 SDValue N0 = N->getOperand(0);
14665 SDValue N1 = N->getOperand(1);
14666 EVT VT = N->getValueType(0);
14667 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14668 unsigned VTBits = VT.getScalarSizeInBits();
14669 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14670
14671 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14672 if (N0.isUndef())
14673 return DAG.getConstant(0, SDLoc(N), VT);
14674
14675 // fold (sext_in_reg c1) -> c1
14677 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14678
14679 // If the input is already sign extended, just drop the extension.
14680 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14681 return N0;
14682
14683 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14684 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14685 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14686 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14687 N1);
14688
14689 // fold (sext_in_reg (sext x)) -> (sext x)
14690 // fold (sext_in_reg (aext x)) -> (sext x)
14691 // if x is small enough or if we know that x has more than 1 sign bit and the
14692 // sign_extend_inreg is extending from one of them.
14693 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14694 SDValue N00 = N0.getOperand(0);
14695 unsigned N00Bits = N00.getScalarValueSizeInBits();
14696 if ((N00Bits <= ExtVTBits ||
14697 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14698 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14699 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14700 }
14701
14702 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14703 // if x is small enough or if we know that x has more than 1 sign bit and the
14704 // sign_extend_inreg is extending from one of them.
14706 SDValue N00 = N0.getOperand(0);
14707 unsigned N00Bits = N00.getScalarValueSizeInBits();
14708 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14709 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14710 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14711 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14712 if ((N00Bits == ExtVTBits ||
14713 (!IsZext && (N00Bits < ExtVTBits ||
14714 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14715 (!LegalOperations ||
14717 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14718 }
14719
14720 // fold (sext_in_reg (zext x)) -> (sext x)
14721 // iff we are extending the source sign bit.
14722 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14723 SDValue N00 = N0.getOperand(0);
14724 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14725 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14726 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14727 }
14728
14729 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14730 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14731 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14732
14733 // fold operands of sext_in_reg based on knowledge that the top bits are not
14734 // demanded.
14736 return SDValue(N, 0);
14737
14738 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14739 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14740 if (SDValue NarrowLoad = reduceLoadWidth(N))
14741 return NarrowLoad;
14742
14743 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14744 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14745 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14746 if (N0.getOpcode() == ISD::SRL) {
14747 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14748 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14749 // We can turn this into an SRA iff the input to the SRL is already sign
14750 // extended enough.
14751 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14752 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14753 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14754 N0.getOperand(1));
14755 }
14756 }
14757
14758 // fold (sext_inreg (extload x)) -> (sextload x)
14759 // If sextload is not supported by target, we can only do the combine when
14760 // load has one use. Doing otherwise can block folding the extload with other
14761 // extends that the target does support.
14762 if (ISD::isEXTLoad(N0.getNode()) &&
14764 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14765 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14766 N0.hasOneUse()) ||
14767 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14768 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14769 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14770 LN0->getChain(),
14771 LN0->getBasePtr(), ExtVT,
14772 LN0->getMemOperand());
14773 CombineTo(N, ExtLoad);
14774 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14775 AddToWorklist(ExtLoad.getNode());
14776 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14777 }
14778
14779 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14781 N0.hasOneUse() &&
14782 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14783 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14784 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14785 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14786 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14787 LN0->getChain(),
14788 LN0->getBasePtr(), ExtVT,
14789 LN0->getMemOperand());
14790 CombineTo(N, ExtLoad);
14791 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14792 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14793 }
14794
14795 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14796 // ignore it if the masked load is already sign extended
14797 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14798 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14799 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14800 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14801 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14802 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14803 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14804 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14805 CombineTo(N, ExtMaskedLoad);
14806 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14807 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14808 }
14809 }
14810
14811 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14812 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14813 if (SDValue(GN0, 0).hasOneUse() &&
14814 ExtVT == GN0->getMemoryVT() &&
14816 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14817 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14818
14819 SDValue ExtLoad = DAG.getMaskedGather(
14820 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14821 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14822
14823 CombineTo(N, ExtLoad);
14824 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14825 AddToWorklist(ExtLoad.getNode());
14826 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14827 }
14828 }
14829
14830 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14831 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14832 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14833 N0.getOperand(1), false))
14834 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14835 }
14836
14837 // Fold (iM_signext_inreg
14838 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14839 // from iN)
14840 // -> (extract_subvector (signext iN_v to iM))
14841 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14843 SDValue InnerExt = N0.getOperand(0);
14844 EVT InnerExtVT = InnerExt->getValueType(0);
14845 SDValue Extendee = InnerExt->getOperand(0);
14846
14847 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14848 (!LegalOperations ||
14849 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14850 SDValue SignExtExtendee =
14851 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14852 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14853 N0.getOperand(1));
14854 }
14855 }
14856
14857 return SDValue();
14858}
14859
14861 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14862 bool LegalOperations) {
14863 unsigned InregOpcode = N->getOpcode();
14864 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14865
14866 SDValue Src = N->getOperand(0);
14867 EVT VT = N->getValueType(0);
14868 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14869 Src.getValueType().getVectorElementType(),
14871
14872 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14873 "Expected EXTEND_VECTOR_INREG dag node in input!");
14874
14875 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14876 // FIXME: one-use check may be overly restrictive
14877 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14878 return SDValue();
14879
14880 // Profitability check: we must be extending exactly one of it's operands.
14881 // FIXME: this is probably overly restrictive.
14882 Src = Src.getOperand(0);
14883 if (Src.getValueType() != SrcVT)
14884 return SDValue();
14885
14886 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14887 return SDValue();
14888
14889 return DAG.getNode(Opcode, DL, VT, Src);
14890}
14891
14892SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14893 SDValue N0 = N->getOperand(0);
14894 EVT VT = N->getValueType(0);
14895 SDLoc DL(N);
14896
14897 if (N0.isUndef()) {
14898 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14899 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14900 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14901 ? DAG.getUNDEF(VT)
14902 : DAG.getConstant(0, DL, VT);
14903 }
14904
14905 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14906 return Res;
14907
14909 return SDValue(N, 0);
14910
14912 LegalOperations))
14913 return R;
14914
14915 return SDValue();
14916}
14917
14918SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14919 SDValue N0 = N->getOperand(0);
14920 EVT VT = N->getValueType(0);
14921 EVT SrcVT = N0.getValueType();
14922 bool isLE = DAG.getDataLayout().isLittleEndian();
14923 SDLoc DL(N);
14924
14925 // trunc(undef) = undef
14926 if (N0.isUndef())
14927 return DAG.getUNDEF(VT);
14928
14929 // fold (truncate (truncate x)) -> (truncate x)
14930 if (N0.getOpcode() == ISD::TRUNCATE)
14931 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14932
14933 // fold (truncate c1) -> c1
14934 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14935 return C;
14936
14937 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14938 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14939 N0.getOpcode() == ISD::SIGN_EXTEND ||
14940 N0.getOpcode() == ISD::ANY_EXTEND) {
14941 // if the source is smaller than the dest, we still need an extend.
14942 if (N0.getOperand(0).getValueType().bitsLT(VT))
14943 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14944 // if the source is larger than the dest, than we just need the truncate.
14945 if (N0.getOperand(0).getValueType().bitsGT(VT))
14946 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14947 // if the source and dest are the same type, we can drop both the extend
14948 // and the truncate.
14949 return N0.getOperand(0);
14950 }
14951
14952 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14953 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14954 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14955 N0.hasOneUse()) {
14956 SDValue X = N0.getOperand(0);
14957 SDValue ExtVal = N0.getOperand(1);
14958 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14959 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14960 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14961 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14962 }
14963 }
14964
14965 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14966 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14967 return SDValue();
14968
14969 // Fold extract-and-trunc into a narrow extract. For example:
14970 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14971 // i32 y = TRUNCATE(i64 x)
14972 // -- becomes --
14973 // v16i8 b = BITCAST (v2i64 val)
14974 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14975 //
14976 // Note: We only run this optimization after type legalization (which often
14977 // creates this pattern) and before operation legalization after which
14978 // we need to be more careful about the vector instructions that we generate.
14979 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14980 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14981 EVT VecTy = N0.getOperand(0).getValueType();
14982 EVT ExTy = N0.getValueType();
14983 EVT TrTy = N->getValueType(0);
14984
14985 auto EltCnt = VecTy.getVectorElementCount();
14986 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14987 auto NewEltCnt = EltCnt * SizeRatio;
14988
14989 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14990 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14991
14992 SDValue EltNo = N0->getOperand(1);
14993 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14994 int Elt = EltNo->getAsZExtVal();
14995 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14996 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14997 DAG.getBitcast(NVT, N0.getOperand(0)),
14999 }
15000 }
15001
15002 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
15003 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
15004 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
15005 TLI.isTruncateFree(SrcVT, VT)) {
15006 SDLoc SL(N0);
15007 SDValue Cond = N0.getOperand(0);
15008 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15009 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15010 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15011 }
15012 }
15013
15014 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15015 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15016 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15017 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15018 SDValue Amt = N0.getOperand(1);
15019 KnownBits Known = DAG.computeKnownBits(Amt);
15020 unsigned Size = VT.getScalarSizeInBits();
15021 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15022 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15023 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15024 if (AmtVT != Amt.getValueType()) {
15025 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15026 AddToWorklist(Amt.getNode());
15027 }
15028 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15029 }
15030 }
15031
15032 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15033 return V;
15034
15035 if (SDValue ABD = foldABSToABD(N, DL))
15036 return ABD;
15037
15038 // Attempt to pre-truncate BUILD_VECTOR sources.
15039 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15040 N0.hasOneUse() &&
15041 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15042 // Avoid creating illegal types if running after type legalizer.
15043 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15044 EVT SVT = VT.getScalarType();
15045 SmallVector<SDValue, 8> TruncOps;
15046 for (const SDValue &Op : N0->op_values()) {
15047 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15048 TruncOps.push_back(TruncOp);
15049 }
15050 return DAG.getBuildVector(VT, DL, TruncOps);
15051 }
15052
15053 // trunc (splat_vector x) -> splat_vector (trunc x)
15054 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15055 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15056 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15057 EVT SVT = VT.getScalarType();
15058 return DAG.getSplatVector(
15059 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15060 }
15061
15062 // Fold a series of buildvector, bitcast, and truncate if possible.
15063 // For example fold
15064 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15065 // (2xi32 (buildvector x, y)).
15066 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15067 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15069 N0.getOperand(0).hasOneUse()) {
15070 SDValue BuildVect = N0.getOperand(0);
15071 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15072 EVT TruncVecEltTy = VT.getVectorElementType();
15073
15074 // Check that the element types match.
15075 if (BuildVectEltTy == TruncVecEltTy) {
15076 // Now we only need to compute the offset of the truncated elements.
15077 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15078 unsigned TruncVecNumElts = VT.getVectorNumElements();
15079 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15080
15081 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15082 "Invalid number of elements");
15083
15085 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
15086 Opnds.push_back(BuildVect.getOperand(i));
15087
15088 return DAG.getBuildVector(VT, DL, Opnds);
15089 }
15090 }
15091
15092 // fold (truncate (load x)) -> (smaller load x)
15093 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15094 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15095 if (SDValue Reduced = reduceLoadWidth(N))
15096 return Reduced;
15097
15098 // Handle the case where the truncated result is at least as wide as the
15099 // loaded type.
15100 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15101 auto *LN0 = cast<LoadSDNode>(N0);
15102 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15103 SDValue NewLoad = DAG.getExtLoad(
15104 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15105 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15106 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15107 return NewLoad;
15108 }
15109 }
15110 }
15111
15112 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15113 // where ... are all 'undef'.
15114 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15116 SDValue V;
15117 unsigned Idx = 0;
15118 unsigned NumDefs = 0;
15119
15120 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15121 SDValue X = N0.getOperand(i);
15122 if (!X.isUndef()) {
15123 V = X;
15124 Idx = i;
15125 NumDefs++;
15126 }
15127 // Stop if more than one members are non-undef.
15128 if (NumDefs > 1)
15129 break;
15130
15133 X.getValueType().getVectorElementCount()));
15134 }
15135
15136 if (NumDefs == 0)
15137 return DAG.getUNDEF(VT);
15138
15139 if (NumDefs == 1) {
15140 assert(V.getNode() && "The single defined operand is empty!");
15142 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15143 if (i != Idx) {
15144 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15145 continue;
15146 }
15147 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15148 AddToWorklist(NV.getNode());
15149 Opnds.push_back(NV);
15150 }
15151 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15152 }
15153 }
15154
15155 // Fold truncate of a bitcast of a vector to an extract of the low vector
15156 // element.
15157 //
15158 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15159 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15160 SDValue VecSrc = N0.getOperand(0);
15161 EVT VecSrcVT = VecSrc.getValueType();
15162 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15163 (!LegalOperations ||
15164 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15165 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15166 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15168 }
15169 }
15170
15171 // Simplify the operands using demanded-bits information.
15173 return SDValue(N, 0);
15174
15175 // fold (truncate (extract_subvector(ext x))) ->
15176 // (extract_subvector x)
15177 // TODO: This can be generalized to cover cases where the truncate and extract
15178 // do not fully cancel each other out.
15179 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15180 SDValue N00 = N0.getOperand(0);
15181 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15182 N00.getOpcode() == ISD::ZERO_EXTEND ||
15183 N00.getOpcode() == ISD::ANY_EXTEND) {
15184 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15186 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15187 N00.getOperand(0), N0.getOperand(1));
15188 }
15189 }
15190
15191 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15192 return NewVSel;
15193
15194 // Narrow a suitable binary operation with a non-opaque constant operand by
15195 // moving it ahead of the truncate. This is limited to pre-legalization
15196 // because targets may prefer a wider type during later combines and invert
15197 // this transform.
15198 switch (N0.getOpcode()) {
15199 case ISD::ADD:
15200 case ISD::SUB:
15201 case ISD::MUL:
15202 case ISD::AND:
15203 case ISD::OR:
15204 case ISD::XOR:
15205 if (!LegalOperations && N0.hasOneUse() &&
15206 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15207 isConstantOrConstantVector(N0.getOperand(1), true))) {
15208 // TODO: We already restricted this to pre-legalization, but for vectors
15209 // we are extra cautious to not create an unsupported operation.
15210 // Target-specific changes are likely needed to avoid regressions here.
15211 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15212 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15213 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15214 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15215 }
15216 }
15217 break;
15218 case ISD::ADDE:
15219 case ISD::UADDO_CARRY:
15220 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15221 // (trunc uaddo_carry(X, Y, Carry)) ->
15222 // (uaddo_carry trunc(X), trunc(Y), Carry)
15223 // When the adde's carry is not used.
15224 // We only do for uaddo_carry before legalize operation
15225 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15226 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15227 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15228 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15229 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15230 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15231 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15232 }
15233 break;
15234 case ISD::USUBSAT:
15235 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15236 // enough to know that the upper bits are zero we must ensure that we don't
15237 // introduce an extra truncate.
15238 if (!LegalOperations && N0.hasOneUse() &&
15241 VT.getScalarSizeInBits() &&
15242 hasOperation(N0.getOpcode(), VT)) {
15243 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15244 DAG, DL);
15245 }
15246 break;
15247 }
15248
15249 return SDValue();
15250}
15251
15252static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15253 SDValue Elt = N->getOperand(i);
15254 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15255 return Elt.getNode();
15256 return Elt.getOperand(Elt.getResNo()).getNode();
15257}
15258
15259/// build_pair (load, load) -> load
15260/// if load locations are consecutive.
15261SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15262 assert(N->getOpcode() == ISD::BUILD_PAIR);
15263
15264 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15265 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15266
15267 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15268 // most significant part in elt 1. So when combining into one large load, we
15269 // need to consider the endianness.
15270 if (DAG.getDataLayout().isBigEndian())
15271 std::swap(LD1, LD2);
15272
15273 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15274 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15275 LD1->getAddressSpace() != LD2->getAddressSpace())
15276 return SDValue();
15277
15278 unsigned LD1Fast = 0;
15279 EVT LD1VT = LD1->getValueType(0);
15280 unsigned LD1Bytes = LD1VT.getStoreSize();
15281 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15282 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15283 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15284 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15285 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15286 LD1->getPointerInfo(), LD1->getAlign());
15287
15288 return SDValue();
15289}
15290
15291static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15292 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15293 // and Lo parts; on big-endian machines it doesn't.
15294 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15295}
15296
15297SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15298 const TargetLowering &TLI) {
15299 // If this is not a bitcast to an FP type or if the target doesn't have
15300 // IEEE754-compliant FP logic, we're done.
15301 EVT VT = N->getValueType(0);
15302 SDValue N0 = N->getOperand(0);
15303 EVT SourceVT = N0.getValueType();
15304
15305 if (!VT.isFloatingPoint())
15306 return SDValue();
15307
15308 // TODO: Handle cases where the integer constant is a different scalar
15309 // bitwidth to the FP.
15310 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15311 return SDValue();
15312
15313 unsigned FPOpcode;
15314 APInt SignMask;
15315 switch (N0.getOpcode()) {
15316 case ISD::AND:
15317 FPOpcode = ISD::FABS;
15318 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15319 break;
15320 case ISD::XOR:
15321 FPOpcode = ISD::FNEG;
15322 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15323 break;
15324 case ISD::OR:
15325 FPOpcode = ISD::FABS;
15326 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15327 break;
15328 default:
15329 return SDValue();
15330 }
15331
15332 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15333 return SDValue();
15334
15335 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15336 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15337 // removing this would require more changes.
15338 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15339 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15340 return true;
15341
15342 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15343 };
15344
15345 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15346 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15347 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15348 // fneg (fabs X)
15349 SDValue LogicOp0 = N0.getOperand(0);
15350 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15351 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15352 IsBitCastOrFree(LogicOp0, VT)) {
15353 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15354 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15355 NumFPLogicOpsConv++;
15356 if (N0.getOpcode() == ISD::OR)
15357 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15358 return FPOp;
15359 }
15360
15361 return SDValue();
15362}
15363
15364SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15365 SDValue N0 = N->getOperand(0);
15366 EVT VT = N->getValueType(0);
15367
15368 if (N0.isUndef())
15369 return DAG.getUNDEF(VT);
15370
15371 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15372 // Only do this before legalize types, unless both types are integer and the
15373 // scalar type is legal. Only do this before legalize ops, since the target
15374 // maybe depending on the bitcast.
15375 // First check to see if this is all constant.
15376 // TODO: Support FP bitcasts after legalize types.
15377 if (VT.isVector() &&
15378 (!LegalTypes ||
15379 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15380 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15381 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15382 cast<BuildVectorSDNode>(N0)->isConstant())
15383 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15385
15386 // If the input is a constant, let getNode fold it.
15387 if (isIntOrFPConstant(N0)) {
15388 // If we can't allow illegal operations, we need to check that this is just
15389 // a fp -> int or int -> conversion and that the resulting operation will
15390 // be legal.
15391 if (!LegalOperations ||
15392 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15394 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15395 TLI.isOperationLegal(ISD::Constant, VT))) {
15396 SDValue C = DAG.getBitcast(VT, N0);
15397 if (C.getNode() != N)
15398 return C;
15399 }
15400 }
15401
15402 // (conv (conv x, t1), t2) -> (conv x, t2)
15403 if (N0.getOpcode() == ISD::BITCAST)
15404 return DAG.getBitcast(VT, N0.getOperand(0));
15405
15406 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15407 // iff the current bitwise logicop type isn't legal
15408 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15409 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15410 auto IsFreeBitcast = [VT](SDValue V) {
15411 return (V.getOpcode() == ISD::BITCAST &&
15412 V.getOperand(0).getValueType() == VT) ||
15414 V->hasOneUse());
15415 };
15416 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15417 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15418 DAG.getBitcast(VT, N0.getOperand(0)),
15419 DAG.getBitcast(VT, N0.getOperand(1)));
15420 }
15421
15422 // fold (conv (load x)) -> (load (conv*)x)
15423 // If the resultant load doesn't need a higher alignment than the original!
15424 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15425 // Do not remove the cast if the types differ in endian layout.
15427 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15428 // If the load is volatile, we only want to change the load type if the
15429 // resulting load is legal. Otherwise we might increase the number of
15430 // memory accesses. We don't care if the original type was legal or not
15431 // as we assume software couldn't rely on the number of accesses of an
15432 // illegal type.
15433 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15434 TLI.isOperationLegal(ISD::LOAD, VT))) {
15435 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15436
15437 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15438 *LN0->getMemOperand())) {
15439 SDValue Load =
15440 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15441 LN0->getMemOperand());
15442 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15443 return Load;
15444 }
15445 }
15446
15447 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15448 return V;
15449
15450 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15451 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15452 //
15453 // For ppc_fp128:
15454 // fold (bitcast (fneg x)) ->
15455 // flipbit = signbit
15456 // (xor (bitcast x) (build_pair flipbit, flipbit))
15457 //
15458 // fold (bitcast (fabs x)) ->
15459 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15460 // (xor (bitcast x) (build_pair flipbit, flipbit))
15461 // This often reduces constant pool loads.
15462 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15463 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15464 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15465 !N0.getValueType().isVector()) {
15466 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15467 AddToWorklist(NewConv.getNode());
15468
15469 SDLoc DL(N);
15470 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15471 assert(VT.getSizeInBits() == 128);
15472 SDValue SignBit = DAG.getConstant(
15473 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15474 SDValue FlipBit;
15475 if (N0.getOpcode() == ISD::FNEG) {
15476 FlipBit = SignBit;
15477 AddToWorklist(FlipBit.getNode());
15478 } else {
15479 assert(N0.getOpcode() == ISD::FABS);
15480 SDValue Hi =
15481 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15483 SDLoc(NewConv)));
15484 AddToWorklist(Hi.getNode());
15485 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15486 AddToWorklist(FlipBit.getNode());
15487 }
15488 SDValue FlipBits =
15489 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15490 AddToWorklist(FlipBits.getNode());
15491 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15492 }
15493 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15494 if (N0.getOpcode() == ISD::FNEG)
15495 return DAG.getNode(ISD::XOR, DL, VT,
15496 NewConv, DAG.getConstant(SignBit, DL, VT));
15497 assert(N0.getOpcode() == ISD::FABS);
15498 return DAG.getNode(ISD::AND, DL, VT,
15499 NewConv, DAG.getConstant(~SignBit, DL, VT));
15500 }
15501
15502 // fold (bitconvert (fcopysign cst, x)) ->
15503 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15504 // Note that we don't handle (copysign x, cst) because this can always be
15505 // folded to an fneg or fabs.
15506 //
15507 // For ppc_fp128:
15508 // fold (bitcast (fcopysign cst, x)) ->
15509 // flipbit = (and (extract_element
15510 // (xor (bitcast cst), (bitcast x)), 0),
15511 // signbit)
15512 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15513 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15514 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15515 !VT.isVector()) {
15516 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15517 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15518 if (isTypeLegal(IntXVT)) {
15519 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15520 AddToWorklist(X.getNode());
15521
15522 // If X has a different width than the result/lhs, sext it or truncate it.
15523 unsigned VTWidth = VT.getSizeInBits();
15524 if (OrigXWidth < VTWidth) {
15525 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15526 AddToWorklist(X.getNode());
15527 } else if (OrigXWidth > VTWidth) {
15528 // To get the sign bit in the right place, we have to shift it right
15529 // before truncating.
15530 SDLoc DL(X);
15531 X = DAG.getNode(ISD::SRL, DL,
15532 X.getValueType(), X,
15533 DAG.getConstant(OrigXWidth-VTWidth, DL,
15534 X.getValueType()));
15535 AddToWorklist(X.getNode());
15536 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15537 AddToWorklist(X.getNode());
15538 }
15539
15540 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15541 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15542 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15543 AddToWorklist(Cst.getNode());
15544 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15545 AddToWorklist(X.getNode());
15546 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15547 AddToWorklist(XorResult.getNode());
15548 SDValue XorResult64 = DAG.getNode(
15549 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15551 SDLoc(XorResult)));
15552 AddToWorklist(XorResult64.getNode());
15553 SDValue FlipBit =
15554 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15555 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15556 AddToWorklist(FlipBit.getNode());
15557 SDValue FlipBits =
15558 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15559 AddToWorklist(FlipBits.getNode());
15560 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15561 }
15562 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15563 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15564 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15565 AddToWorklist(X.getNode());
15566
15567 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15568 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15569 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15570 AddToWorklist(Cst.getNode());
15571
15572 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15573 }
15574 }
15575
15576 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15577 if (N0.getOpcode() == ISD::BUILD_PAIR)
15578 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15579 return CombineLD;
15580
15581 // Remove double bitcasts from shuffles - this is often a legacy of
15582 // XformToShuffleWithZero being used to combine bitmaskings (of
15583 // float vectors bitcast to integer vectors) into shuffles.
15584 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15585 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15586 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15589 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15590
15591 // If operands are a bitcast, peek through if it casts the original VT.
15592 // If operands are a constant, just bitcast back to original VT.
15593 auto PeekThroughBitcast = [&](SDValue Op) {
15594 if (Op.getOpcode() == ISD::BITCAST &&
15595 Op.getOperand(0).getValueType() == VT)
15596 return SDValue(Op.getOperand(0));
15597 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15598 return DAG.getBitcast(VT, Op);
15599 return SDValue();
15600 };
15601
15602 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15603 // the result type of this bitcast. This would eliminate at least one
15604 // bitcast. See the transform in InstCombine.
15605 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15606 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15607 if (!(SV0 && SV1))
15608 return SDValue();
15609
15610 int MaskScale =
15612 SmallVector<int, 8> NewMask;
15613 for (int M : SVN->getMask())
15614 for (int i = 0; i != MaskScale; ++i)
15615 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15616
15617 SDValue LegalShuffle =
15618 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15619 if (LegalShuffle)
15620 return LegalShuffle;
15621 }
15622
15623 return SDValue();
15624}
15625
15626SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15627 EVT VT = N->getValueType(0);
15628 return CombineConsecutiveLoads(N, VT);
15629}
15630
15631SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15632 SDValue N0 = N->getOperand(0);
15633
15634 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15635 return N0;
15636
15637 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
15638 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15639 // example https://reviews.llvm.org/D136529#4120959.
15640 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15641 return SDValue();
15642
15643 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15644 // Try to push freeze through instructions that propagate but don't produce
15645 // poison as far as possible. If an operand of freeze follows three
15646 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15647 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15648 // the freeze through to the operands that are not guaranteed non-poison.
15649 // NOTE: we will strip poison-generating flags, so ignore them here.
15650 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15651 /*ConsiderFlags*/ false) ||
15652 N0->getNumValues() != 1 || !N0->hasOneUse())
15653 return SDValue();
15654
15655 bool AllowMultipleMaybePoisonOperands =
15656 N0.getOpcode() == ISD::SELECT_CC ||
15657 N0.getOpcode() == ISD::SETCC ||
15658 N0.getOpcode() == ISD::BUILD_VECTOR ||
15659 N0.getOpcode() == ISD::BUILD_PAIR ||
15662
15663 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15664 // ones" or "constant" into something that depends on FrozenUndef. We can
15665 // instead pick undef values to keep those properties, while at the same time
15666 // folding away the freeze.
15667 // If we implement a more general solution for folding away freeze(undef) in
15668 // the future, then this special handling can be removed.
15669 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15670 SDLoc DL(N0);
15671 EVT VT = N0.getValueType();
15673 return DAG.getAllOnesConstant(DL, VT);
15676 for (const SDValue &Op : N0->op_values())
15677 NewVecC.push_back(
15678 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15679 return DAG.getBuildVector(VT, DL, NewVecC);
15680 }
15681 }
15682
15683 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15684 for (SDValue Op : N0->ops()) {
15685 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15686 /*Depth*/ 1))
15687 continue;
15688 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15689 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15690 if (!HadMaybePoisonOperands)
15691 continue;
15692 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15693 // Multiple maybe-poison ops when not allowed - bail out.
15694 return SDValue();
15695 }
15696 }
15697 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15698 // it could create undef or poison due to it's poison-generating flags.
15699 // So not finding any maybe-poison operands is fine.
15700
15701 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15702 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15703 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15704 continue;
15705 // First, freeze each offending operand.
15706 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15707 // Then, change all other uses of unfrozen operand to use frozen operand.
15708 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15709 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15710 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15711 // But, that also updated the use in the freeze we just created, thus
15712 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15713 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15714 MaybePoisonOperand);
15715 }
15716 }
15717
15718 // This node has been merged with another.
15719 if (N->getOpcode() == ISD::DELETED_NODE)
15720 return SDValue(N, 0);
15721
15722 // The whole node may have been updated, so the value we were holding
15723 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15724 N0 = N->getOperand(0);
15725
15726 // Finally, recreate the node, it's operands were updated to use
15727 // frozen operands, so we just need to use it's "original" operands.
15728 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15729 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15730 for (SDValue &Op : Ops) {
15731 if (Op.getOpcode() == ISD::UNDEF)
15732 Op = DAG.getFreeze(Op);
15733 }
15734
15735 SDValue R;
15736 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
15737 // Special case handling for ShuffleVectorSDNode nodes.
15738 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
15739 SVN->getMask());
15740 } else {
15741 // NOTE: this strips poison generating flags.
15742 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15743 }
15744 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15745 "Can't create node that may be undef/poison!");
15746 return R;
15747}
15748
15749/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15750/// operands. DstEltVT indicates the destination element value type.
15751SDValue DAGCombiner::
15752ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15753 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15754
15755 // If this is already the right type, we're done.
15756 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15757
15758 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15759 unsigned DstBitSize = DstEltVT.getSizeInBits();
15760
15761 // If this is a conversion of N elements of one type to N elements of another
15762 // type, convert each element. This handles FP<->INT cases.
15763 if (SrcBitSize == DstBitSize) {
15765 for (SDValue Op : BV->op_values()) {
15766 // If the vector element type is not legal, the BUILD_VECTOR operands
15767 // are promoted and implicitly truncated. Make that explicit here.
15768 if (Op.getValueType() != SrcEltVT)
15769 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15770 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15771 AddToWorklist(Ops.back().getNode());
15772 }
15773 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15775 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15776 }
15777
15778 // Otherwise, we're growing or shrinking the elements. To avoid having to
15779 // handle annoying details of growing/shrinking FP values, we convert them to
15780 // int first.
15781 if (SrcEltVT.isFloatingPoint()) {
15782 // Convert the input float vector to a int vector where the elements are the
15783 // same sizes.
15784 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15785 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15786 SrcEltVT = IntVT;
15787 }
15788
15789 // Now we know the input is an integer vector. If the output is a FP type,
15790 // convert to integer first, then to FP of the right size.
15791 if (DstEltVT.isFloatingPoint()) {
15792 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15793 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15794
15795 // Next, convert to FP elements of the same size.
15796 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15797 }
15798
15799 // Okay, we know the src/dst types are both integers of differing types.
15800 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15801
15802 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15803 // BuildVectorSDNode?
15804 auto *BVN = cast<BuildVectorSDNode>(BV);
15805
15806 // Extract the constant raw bit data.
15807 BitVector UndefElements;
15808 SmallVector<APInt> RawBits;
15809 bool IsLE = DAG.getDataLayout().isLittleEndian();
15810 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15811 return SDValue();
15812
15813 SDLoc DL(BV);
15815 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15816 if (UndefElements[I])
15817 Ops.push_back(DAG.getUNDEF(DstEltVT));
15818 else
15819 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15820 }
15821
15822 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15823 return DAG.getBuildVector(VT, DL, Ops);
15824}
15825
15826// Returns true if floating point contraction is allowed on the FMUL-SDValue
15827// `N`
15829 assert(N.getOpcode() == ISD::FMUL);
15830
15831 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15832 N->getFlags().hasAllowContract();
15833}
15834
15835// Returns true if `N` can assume no infinities involved in its computation.
15837 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15838}
15839
15840/// Try to perform FMA combining on a given FADD node.
15841template <class MatchContextClass>
15842SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15843 SDValue N0 = N->getOperand(0);
15844 SDValue N1 = N->getOperand(1);
15845 EVT VT = N->getValueType(0);
15846 SDLoc SL(N);
15847 MatchContextClass matcher(DAG, TLI, N);
15848 const TargetOptions &Options = DAG.getTarget().Options;
15849
15850 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15851
15852 // Floating-point multiply-add with intermediate rounding.
15853 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15854 // FIXME: Add VP_FMAD opcode.
15855 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15856
15857 // Floating-point multiply-add without intermediate rounding.
15858 bool HasFMA =
15860 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15861
15862 // No valid opcode, do not combine.
15863 if (!HasFMAD && !HasFMA)
15864 return SDValue();
15865
15866 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15867 Options.UnsafeFPMath || HasFMAD);
15868 // If the addition is not contractable, do not combine.
15869 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15870 return SDValue();
15871
15872 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15873 // beneficial. It does not reduce latency. It increases register pressure. It
15874 // replaces an fadd with an fma which is a more complex instruction, so is
15875 // likely to have a larger encoding, use more functional units, etc.
15876 if (N0 == N1)
15877 return SDValue();
15878
15879 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15880 return SDValue();
15881
15882 // Always prefer FMAD to FMA for precision.
15883 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15885
15886 auto isFusedOp = [&](SDValue N) {
15887 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15888 };
15889
15890 // Is the node an FMUL and contractable either due to global flags or
15891 // SDNodeFlags.
15892 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15893 if (!matcher.match(N, ISD::FMUL))
15894 return false;
15895 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15896 };
15897 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15898 // prefer to fold the multiply with fewer uses.
15900 if (N0->use_size() > N1->use_size())
15901 std::swap(N0, N1);
15902 }
15903
15904 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15905 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15906 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15907 N0.getOperand(1), N1);
15908 }
15909
15910 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15911 // Note: Commutes FADD operands.
15912 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15913 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15914 N1.getOperand(1), N0);
15915 }
15916
15917 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15918 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15919 // This also works with nested fma instructions:
15920 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15921 // fma A, B, (fma C, D, fma (E, F, G))
15922 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15923 // fma A, B, (fma C, D, fma (E, F, G)).
15924 // This requires reassociation because it changes the order of operations.
15925 bool CanReassociate =
15926 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15927 if (CanReassociate) {
15928 SDValue FMA, E;
15929 if (isFusedOp(N0) && N0.hasOneUse()) {
15930 FMA = N0;
15931 E = N1;
15932 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15933 FMA = N1;
15934 E = N0;
15935 }
15936
15937 SDValue TmpFMA = FMA;
15938 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15939 SDValue FMul = TmpFMA->getOperand(2);
15940 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15941 SDValue C = FMul.getOperand(0);
15942 SDValue D = FMul.getOperand(1);
15943 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15945 // Replacing the inner FMul could cause the outer FMA to be simplified
15946 // away.
15947 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15948 }
15949
15950 TmpFMA = TmpFMA->getOperand(2);
15951 }
15952 }
15953
15954 // Look through FP_EXTEND nodes to do more combining.
15955
15956 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15957 if (matcher.match(N0, ISD::FP_EXTEND)) {
15958 SDValue N00 = N0.getOperand(0);
15959 if (isContractableFMUL(N00) &&
15960 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15961 N00.getValueType())) {
15962 return matcher.getNode(
15963 PreferredFusedOpcode, SL, VT,
15964 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15965 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15966 }
15967 }
15968
15969 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15970 // Note: Commutes FADD operands.
15971 if (matcher.match(N1, ISD::FP_EXTEND)) {
15972 SDValue N10 = N1.getOperand(0);
15973 if (isContractableFMUL(N10) &&
15974 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15975 N10.getValueType())) {
15976 return matcher.getNode(
15977 PreferredFusedOpcode, SL, VT,
15978 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15979 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15980 }
15981 }
15982
15983 // More folding opportunities when target permits.
15984 if (Aggressive) {
15985 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15986 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15987 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15988 SDValue Z) {
15989 return matcher.getNode(
15990 PreferredFusedOpcode, SL, VT, X, Y,
15991 matcher.getNode(PreferredFusedOpcode, SL, VT,
15992 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15993 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15994 };
15995 if (isFusedOp(N0)) {
15996 SDValue N02 = N0.getOperand(2);
15997 if (matcher.match(N02, ISD::FP_EXTEND)) {
15998 SDValue N020 = N02.getOperand(0);
15999 if (isContractableFMUL(N020) &&
16000 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16001 N020.getValueType())) {
16002 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
16003 N020.getOperand(0), N020.getOperand(1),
16004 N1);
16005 }
16006 }
16007 }
16008
16009 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16010 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16011 // FIXME: This turns two single-precision and one double-precision
16012 // operation into two double-precision operations, which might not be
16013 // interesting for all targets, especially GPUs.
16014 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16015 SDValue Z) {
16016 return matcher.getNode(
16017 PreferredFusedOpcode, SL, VT,
16018 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16019 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16020 matcher.getNode(PreferredFusedOpcode, SL, VT,
16021 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16022 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16023 };
16024 if (N0.getOpcode() == ISD::FP_EXTEND) {
16025 SDValue N00 = N0.getOperand(0);
16026 if (isFusedOp(N00)) {
16027 SDValue N002 = N00.getOperand(2);
16028 if (isContractableFMUL(N002) &&
16029 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16030 N00.getValueType())) {
16031 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16032 N002.getOperand(0), N002.getOperand(1),
16033 N1);
16034 }
16035 }
16036 }
16037
16038 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16039 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16040 if (isFusedOp(N1)) {
16041 SDValue N12 = N1.getOperand(2);
16042 if (N12.getOpcode() == ISD::FP_EXTEND) {
16043 SDValue N120 = N12.getOperand(0);
16044 if (isContractableFMUL(N120) &&
16045 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16046 N120.getValueType())) {
16047 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16048 N120.getOperand(0), N120.getOperand(1),
16049 N0);
16050 }
16051 }
16052 }
16053
16054 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16055 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16056 // FIXME: This turns two single-precision and one double-precision
16057 // operation into two double-precision operations, which might not be
16058 // interesting for all targets, especially GPUs.
16059 if (N1.getOpcode() == ISD::FP_EXTEND) {
16060 SDValue N10 = N1.getOperand(0);
16061 if (isFusedOp(N10)) {
16062 SDValue N102 = N10.getOperand(2);
16063 if (isContractableFMUL(N102) &&
16064 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16065 N10.getValueType())) {
16066 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16067 N102.getOperand(0), N102.getOperand(1),
16068 N0);
16069 }
16070 }
16071 }
16072 }
16073
16074 return SDValue();
16075}
16076
16077/// Try to perform FMA combining on a given FSUB node.
16078template <class MatchContextClass>
16079SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16080 SDValue N0 = N->getOperand(0);
16081 SDValue N1 = N->getOperand(1);
16082 EVT VT = N->getValueType(0);
16083 SDLoc SL(N);
16084 MatchContextClass matcher(DAG, TLI, N);
16085 const TargetOptions &Options = DAG.getTarget().Options;
16086
16087 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16088
16089 // Floating-point multiply-add with intermediate rounding.
16090 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16091 // FIXME: Add VP_FMAD opcode.
16092 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16093
16094 // Floating-point multiply-add without intermediate rounding.
16095 bool HasFMA =
16097 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
16098
16099 // No valid opcode, do not combine.
16100 if (!HasFMAD && !HasFMA)
16101 return SDValue();
16102
16103 const SDNodeFlags Flags = N->getFlags();
16104 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16105 Options.UnsafeFPMath || HasFMAD);
16106
16107 // If the subtraction is not contractable, do not combine.
16108 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16109 return SDValue();
16110
16111 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16112 return SDValue();
16113
16114 // Always prefer FMAD to FMA for precision.
16115 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16117 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16118
16119 // Is the node an FMUL and contractable either due to global flags or
16120 // SDNodeFlags.
16121 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16122 if (!matcher.match(N, ISD::FMUL))
16123 return false;
16124 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16125 };
16126
16127 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16128 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16129 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16130 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16131 XY.getOperand(1),
16132 matcher.getNode(ISD::FNEG, SL, VT, Z));
16133 }
16134 return SDValue();
16135 };
16136
16137 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16138 // Note: Commutes FSUB operands.
16139 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16140 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16141 return matcher.getNode(
16142 PreferredFusedOpcode, SL, VT,
16143 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16144 YZ.getOperand(1), X);
16145 }
16146 return SDValue();
16147 };
16148
16149 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16150 // prefer to fold the multiply with fewer uses.
16151 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16152 (N0->use_size() > N1->use_size())) {
16153 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16154 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16155 return V;
16156 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16157 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16158 return V;
16159 } else {
16160 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16161 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16162 return V;
16163 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16164 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16165 return V;
16166 }
16167
16168 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16169 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16170 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16171 SDValue N00 = N0.getOperand(0).getOperand(0);
16172 SDValue N01 = N0.getOperand(0).getOperand(1);
16173 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16174 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16175 matcher.getNode(ISD::FNEG, SL, VT, N1));
16176 }
16177
16178 // Look through FP_EXTEND nodes to do more combining.
16179
16180 // fold (fsub (fpext (fmul x, y)), z)
16181 // -> (fma (fpext x), (fpext y), (fneg z))
16182 if (matcher.match(N0, ISD::FP_EXTEND)) {
16183 SDValue N00 = N0.getOperand(0);
16184 if (isContractableFMUL(N00) &&
16185 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16186 N00.getValueType())) {
16187 return matcher.getNode(
16188 PreferredFusedOpcode, SL, VT,
16189 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16190 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16191 matcher.getNode(ISD::FNEG, SL, VT, N1));
16192 }
16193 }
16194
16195 // fold (fsub x, (fpext (fmul y, z)))
16196 // -> (fma (fneg (fpext y)), (fpext z), x)
16197 // Note: Commutes FSUB operands.
16198 if (matcher.match(N1, ISD::FP_EXTEND)) {
16199 SDValue N10 = N1.getOperand(0);
16200 if (isContractableFMUL(N10) &&
16201 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16202 N10.getValueType())) {
16203 return matcher.getNode(
16204 PreferredFusedOpcode, SL, VT,
16205 matcher.getNode(
16206 ISD::FNEG, SL, VT,
16207 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16208 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16209 }
16210 }
16211
16212 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16213 // -> (fneg (fma (fpext x), (fpext y), z))
16214 // Note: This could be removed with appropriate canonicalization of the
16215 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16216 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16217 // from implementing the canonicalization in visitFSUB.
16218 if (matcher.match(N0, ISD::FP_EXTEND)) {
16219 SDValue N00 = N0.getOperand(0);
16220 if (matcher.match(N00, ISD::FNEG)) {
16221 SDValue N000 = N00.getOperand(0);
16222 if (isContractableFMUL(N000) &&
16223 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16224 N00.getValueType())) {
16225 return matcher.getNode(
16226 ISD::FNEG, SL, VT,
16227 matcher.getNode(
16228 PreferredFusedOpcode, SL, VT,
16229 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16230 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16231 N1));
16232 }
16233 }
16234 }
16235
16236 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16237 // -> (fneg (fma (fpext x)), (fpext y), z)
16238 // Note: This could be removed with appropriate canonicalization of the
16239 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16240 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16241 // from implementing the canonicalization in visitFSUB.
16242 if (matcher.match(N0, ISD::FNEG)) {
16243 SDValue N00 = N0.getOperand(0);
16244 if (matcher.match(N00, ISD::FP_EXTEND)) {
16245 SDValue N000 = N00.getOperand(0);
16246 if (isContractableFMUL(N000) &&
16247 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16248 N000.getValueType())) {
16249 return matcher.getNode(
16250 ISD::FNEG, SL, VT,
16251 matcher.getNode(
16252 PreferredFusedOpcode, SL, VT,
16253 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16254 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16255 N1));
16256 }
16257 }
16258 }
16259
16260 auto isReassociable = [&Options](SDNode *N) {
16261 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16262 };
16263
16264 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16265 &isReassociable](SDValue N) {
16266 return isContractableFMUL(N) && isReassociable(N.getNode());
16267 };
16268
16269 auto isFusedOp = [&](SDValue N) {
16270 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16271 };
16272
16273 // More folding opportunities when target permits.
16274 if (Aggressive && isReassociable(N)) {
16275 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16276 // fold (fsub (fma x, y, (fmul u, v)), z)
16277 // -> (fma x, y (fma u, v, (fneg z)))
16278 if (CanFuse && isFusedOp(N0) &&
16279 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16280 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16281 return matcher.getNode(
16282 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16283 matcher.getNode(PreferredFusedOpcode, SL, VT,
16284 N0.getOperand(2).getOperand(0),
16285 N0.getOperand(2).getOperand(1),
16286 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16287 }
16288
16289 // fold (fsub x, (fma y, z, (fmul u, v)))
16290 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16291 if (CanFuse && isFusedOp(N1) &&
16292 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16293 N1->hasOneUse() && NoSignedZero) {
16294 SDValue N20 = N1.getOperand(2).getOperand(0);
16295 SDValue N21 = N1.getOperand(2).getOperand(1);
16296 return matcher.getNode(
16297 PreferredFusedOpcode, SL, VT,
16298 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16299 N1.getOperand(1),
16300 matcher.getNode(PreferredFusedOpcode, SL, VT,
16301 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16302 }
16303
16304 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16305 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16306 if (isFusedOp(N0) && N0->hasOneUse()) {
16307 SDValue N02 = N0.getOperand(2);
16308 if (matcher.match(N02, ISD::FP_EXTEND)) {
16309 SDValue N020 = N02.getOperand(0);
16310 if (isContractableAndReassociableFMUL(N020) &&
16311 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16312 N020.getValueType())) {
16313 return matcher.getNode(
16314 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16315 matcher.getNode(
16316 PreferredFusedOpcode, SL, VT,
16317 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16318 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16319 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16320 }
16321 }
16322 }
16323
16324 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16325 // -> (fma (fpext x), (fpext y),
16326 // (fma (fpext u), (fpext v), (fneg z)))
16327 // FIXME: This turns two single-precision and one double-precision
16328 // operation into two double-precision operations, which might not be
16329 // interesting for all targets, especially GPUs.
16330 if (matcher.match(N0, ISD::FP_EXTEND)) {
16331 SDValue N00 = N0.getOperand(0);
16332 if (isFusedOp(N00)) {
16333 SDValue N002 = N00.getOperand(2);
16334 if (isContractableAndReassociableFMUL(N002) &&
16335 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16336 N00.getValueType())) {
16337 return matcher.getNode(
16338 PreferredFusedOpcode, SL, VT,
16339 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16340 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16341 matcher.getNode(
16342 PreferredFusedOpcode, SL, VT,
16343 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16344 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16345 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16346 }
16347 }
16348 }
16349
16350 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16351 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16352 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16353 N1->hasOneUse()) {
16354 SDValue N120 = N1.getOperand(2).getOperand(0);
16355 if (isContractableAndReassociableFMUL(N120) &&
16356 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16357 N120.getValueType())) {
16358 SDValue N1200 = N120.getOperand(0);
16359 SDValue N1201 = N120.getOperand(1);
16360 return matcher.getNode(
16361 PreferredFusedOpcode, SL, VT,
16362 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16363 N1.getOperand(1),
16364 matcher.getNode(
16365 PreferredFusedOpcode, SL, VT,
16366 matcher.getNode(ISD::FNEG, SL, VT,
16367 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16368 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16369 }
16370 }
16371
16372 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16373 // -> (fma (fneg (fpext y)), (fpext z),
16374 // (fma (fneg (fpext u)), (fpext v), x))
16375 // FIXME: This turns two single-precision and one double-precision
16376 // operation into two double-precision operations, which might not be
16377 // interesting for all targets, especially GPUs.
16378 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16379 SDValue CvtSrc = N1.getOperand(0);
16380 SDValue N100 = CvtSrc.getOperand(0);
16381 SDValue N101 = CvtSrc.getOperand(1);
16382 SDValue N102 = CvtSrc.getOperand(2);
16383 if (isContractableAndReassociableFMUL(N102) &&
16384 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16385 CvtSrc.getValueType())) {
16386 SDValue N1020 = N102.getOperand(0);
16387 SDValue N1021 = N102.getOperand(1);
16388 return matcher.getNode(
16389 PreferredFusedOpcode, SL, VT,
16390 matcher.getNode(ISD::FNEG, SL, VT,
16391 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16392 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16393 matcher.getNode(
16394 PreferredFusedOpcode, SL, VT,
16395 matcher.getNode(ISD::FNEG, SL, VT,
16396 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16397 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16398 }
16399 }
16400 }
16401
16402 return SDValue();
16403}
16404
16405/// Try to perform FMA combining on a given FMUL node based on the distributive
16406/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16407/// subtraction instead of addition).
16408SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16409 SDValue N0 = N->getOperand(0);
16410 SDValue N1 = N->getOperand(1);
16411 EVT VT = N->getValueType(0);
16412 SDLoc SL(N);
16413
16414 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16415
16416 const TargetOptions &Options = DAG.getTarget().Options;
16417
16418 // The transforms below are incorrect when x == 0 and y == inf, because the
16419 // intermediate multiplication produces a nan.
16420 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16421 if (!hasNoInfs(Options, FAdd))
16422 return SDValue();
16423
16424 // Floating-point multiply-add without intermediate rounding.
16425 bool HasFMA =
16428 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16429
16430 // Floating-point multiply-add with intermediate rounding. This can result
16431 // in a less precise result due to the changed rounding order.
16432 bool HasFMAD = Options.UnsafeFPMath &&
16433 (LegalOperations && TLI.isFMADLegal(DAG, N));
16434
16435 // No valid opcode, do not combine.
16436 if (!HasFMAD && !HasFMA)
16437 return SDValue();
16438
16439 // Always prefer FMAD to FMA for precision.
16440 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16442
16443 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16444 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16445 auto FuseFADD = [&](SDValue X, SDValue Y) {
16446 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16447 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16448 if (C->isExactlyValue(+1.0))
16449 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16450 Y);
16451 if (C->isExactlyValue(-1.0))
16452 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16453 DAG.getNode(ISD::FNEG, SL, VT, Y));
16454 }
16455 }
16456 return SDValue();
16457 };
16458
16459 if (SDValue FMA = FuseFADD(N0, N1))
16460 return FMA;
16461 if (SDValue FMA = FuseFADD(N1, N0))
16462 return FMA;
16463
16464 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16465 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16466 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16467 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16468 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16469 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16470 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16471 if (C0->isExactlyValue(+1.0))
16472 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16473 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16474 Y);
16475 if (C0->isExactlyValue(-1.0))
16476 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16477 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16478 DAG.getNode(ISD::FNEG, SL, VT, Y));
16479 }
16480 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16481 if (C1->isExactlyValue(+1.0))
16482 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16483 DAG.getNode(ISD::FNEG, SL, VT, Y));
16484 if (C1->isExactlyValue(-1.0))
16485 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16486 Y);
16487 }
16488 }
16489 return SDValue();
16490 };
16491
16492 if (SDValue FMA = FuseFSUB(N0, N1))
16493 return FMA;
16494 if (SDValue FMA = FuseFSUB(N1, N0))
16495 return FMA;
16496
16497 return SDValue();
16498}
16499
16500SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16501 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16502
16503 // FADD -> FMA combines:
16504 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16505 if (Fused.getOpcode() != ISD::DELETED_NODE)
16506 AddToWorklist(Fused.getNode());
16507 return Fused;
16508 }
16509 return SDValue();
16510}
16511
16512SDValue DAGCombiner::visitFADD(SDNode *N) {
16513 SDValue N0 = N->getOperand(0);
16514 SDValue N1 = N->getOperand(1);
16517 EVT VT = N->getValueType(0);
16518 SDLoc DL(N);
16519 const TargetOptions &Options = DAG.getTarget().Options;
16520 SDNodeFlags Flags = N->getFlags();
16521 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16522
16523 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16524 return R;
16525
16526 // fold (fadd c1, c2) -> c1 + c2
16527 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16528 return C;
16529
16530 // canonicalize constant to RHS
16531 if (N0CFP && !N1CFP)
16532 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16533
16534 // fold vector ops
16535 if (VT.isVector())
16536 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16537 return FoldedVOp;
16538
16539 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16540 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16541 if (N1C && N1C->isZero())
16542 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16543 return N0;
16544
16545 if (SDValue NewSel = foldBinOpIntoSelect(N))
16546 return NewSel;
16547
16548 // fold (fadd A, (fneg B)) -> (fsub A, B)
16549 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16550 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16551 N1, DAG, LegalOperations, ForCodeSize))
16552 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16553
16554 // fold (fadd (fneg A), B) -> (fsub B, A)
16555 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16556 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16557 N0, DAG, LegalOperations, ForCodeSize))
16558 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16559
16560 auto isFMulNegTwo = [](SDValue FMul) {
16561 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16562 return false;
16563 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16564 return C && C->isExactlyValue(-2.0);
16565 };
16566
16567 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16568 if (isFMulNegTwo(N0)) {
16569 SDValue B = N0.getOperand(0);
16570 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16571 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16572 }
16573 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16574 if (isFMulNegTwo(N1)) {
16575 SDValue B = N1.getOperand(0);
16576 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16577 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16578 }
16579
16580 // No FP constant should be created after legalization as Instruction
16581 // Selection pass has a hard time dealing with FP constants.
16582 bool AllowNewConst = (Level < AfterLegalizeDAG);
16583
16584 // If nnan is enabled, fold lots of things.
16585 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16586 // If allowed, fold (fadd (fneg x), x) -> 0.0
16587 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16588 return DAG.getConstantFP(0.0, DL, VT);
16589
16590 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16591 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16592 return DAG.getConstantFP(0.0, DL, VT);
16593 }
16594
16595 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16596 // TODO: break out portions of the transformations below for which Unsafe is
16597 // considered and which do not require both nsz and reassoc
16598 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16599 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16600 AllowNewConst) {
16601 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16602 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16604 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16605 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16606 }
16607
16608 // We can fold chains of FADD's of the same value into multiplications.
16609 // This transform is not safe in general because we are reducing the number
16610 // of rounding steps.
16611 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16612 if (N0.getOpcode() == ISD::FMUL) {
16613 SDNode *CFP00 =
16615 SDNode *CFP01 =
16617
16618 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16619 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16620 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16621 DAG.getConstantFP(1.0, DL, VT));
16622 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16623 }
16624
16625 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16626 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16627 N1.getOperand(0) == N1.getOperand(1) &&
16628 N0.getOperand(0) == N1.getOperand(0)) {
16629 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16630 DAG.getConstantFP(2.0, DL, VT));
16631 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16632 }
16633 }
16634
16635 if (N1.getOpcode() == ISD::FMUL) {
16636 SDNode *CFP10 =
16638 SDNode *CFP11 =
16640
16641 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16642 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16643 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16644 DAG.getConstantFP(1.0, DL, VT));
16645 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16646 }
16647
16648 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16649 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16650 N0.getOperand(0) == N0.getOperand(1) &&
16651 N1.getOperand(0) == N0.getOperand(0)) {
16652 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16653 DAG.getConstantFP(2.0, DL, VT));
16654 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16655 }
16656 }
16657
16658 if (N0.getOpcode() == ISD::FADD) {
16659 SDNode *CFP00 =
16661 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16662 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16663 (N0.getOperand(0) == N1)) {
16664 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16665 DAG.getConstantFP(3.0, DL, VT));
16666 }
16667 }
16668
16669 if (N1.getOpcode() == ISD::FADD) {
16670 SDNode *CFP10 =
16672 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16673 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16674 N1.getOperand(0) == N0) {
16675 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16676 DAG.getConstantFP(3.0, DL, VT));
16677 }
16678 }
16679
16680 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16681 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16682 N0.getOperand(0) == N0.getOperand(1) &&
16683 N1.getOperand(0) == N1.getOperand(1) &&
16684 N0.getOperand(0) == N1.getOperand(0)) {
16685 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16686 DAG.getConstantFP(4.0, DL, VT));
16687 }
16688 }
16689
16690 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16691 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16692 VT, N0, N1, Flags))
16693 return SD;
16694 } // enable-unsafe-fp-math
16695
16696 // FADD -> FMA combines:
16697 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16698 if (Fused.getOpcode() != ISD::DELETED_NODE)
16699 AddToWorklist(Fused.getNode());
16700 return Fused;
16701 }
16702 return SDValue();
16703}
16704
16705SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16706 SDValue Chain = N->getOperand(0);
16707 SDValue N0 = N->getOperand(1);
16708 SDValue N1 = N->getOperand(2);
16709 EVT VT = N->getValueType(0);
16710 EVT ChainVT = N->getValueType(1);
16711 SDLoc DL(N);
16712 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16713
16714 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16715 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16716 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16717 N1, DAG, LegalOperations, ForCodeSize)) {
16718 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16719 {Chain, N0, NegN1});
16720 }
16721
16722 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16723 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16724 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16725 N0, DAG, LegalOperations, ForCodeSize)) {
16726 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16727 {Chain, N1, NegN0});
16728 }
16729 return SDValue();
16730}
16731
16732SDValue DAGCombiner::visitFSUB(SDNode *N) {
16733 SDValue N0 = N->getOperand(0);
16734 SDValue N1 = N->getOperand(1);
16735 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16736 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16737 EVT VT = N->getValueType(0);
16738 SDLoc DL(N);
16739 const TargetOptions &Options = DAG.getTarget().Options;
16740 const SDNodeFlags Flags = N->getFlags();
16741 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16742
16743 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16744 return R;
16745
16746 // fold (fsub c1, c2) -> c1-c2
16747 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16748 return C;
16749
16750 // fold vector ops
16751 if (VT.isVector())
16752 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16753 return FoldedVOp;
16754
16755 if (SDValue NewSel = foldBinOpIntoSelect(N))
16756 return NewSel;
16757
16758 // (fsub A, 0) -> A
16759 if (N1CFP && N1CFP->isZero()) {
16760 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16761 Flags.hasNoSignedZeros()) {
16762 return N0;
16763 }
16764 }
16765
16766 if (N0 == N1) {
16767 // (fsub x, x) -> 0.0
16768 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16769 return DAG.getConstantFP(0.0f, DL, VT);
16770 }
16771
16772 // (fsub -0.0, N1) -> -N1
16773 if (N0CFP && N0CFP->isZero()) {
16774 if (N0CFP->isNegative() ||
16775 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16776 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16777 // flushed to zero, unless all users treat denorms as zero (DAZ).
16778 // FIXME: This transform will change the sign of a NaN and the behavior
16779 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16780 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16781 if (DenormMode == DenormalMode::getIEEE()) {
16782 if (SDValue NegN1 =
16783 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16784 return NegN1;
16785 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16786 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16787 }
16788 }
16789 }
16790
16791 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16792 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16793 N1.getOpcode() == ISD::FADD) {
16794 // X - (X + Y) -> -Y
16795 if (N0 == N1->getOperand(0))
16796 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16797 // X - (Y + X) -> -Y
16798 if (N0 == N1->getOperand(1))
16799 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16800 }
16801
16802 // fold (fsub A, (fneg B)) -> (fadd A, B)
16803 if (SDValue NegN1 =
16804 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16805 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16806
16807 // FSUB -> FMA combines:
16808 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16809 AddToWorklist(Fused.getNode());
16810 return Fused;
16811 }
16812
16813 return SDValue();
16814}
16815
16816// Transform IEEE Floats:
16817// (fmul C, (uitofp Pow2))
16818// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16819// (fdiv C, (uitofp Pow2))
16820// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16821//
16822// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16823// there is no need for more than an add/sub.
16824//
16825// This is valid under the following circumstances:
16826// 1) We are dealing with IEEE floats
16827// 2) C is normal
16828// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16829// TODO: Much of this could also be used for generating `ldexp` on targets the
16830// prefer it.
16831SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16832 EVT VT = N->getValueType(0);
16833 SDValue ConstOp, Pow2Op;
16834
16835 std::optional<int> Mantissa;
16836 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16837 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16838 return false;
16839
16840 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16841 Pow2Op = N->getOperand(1 - ConstOpIdx);
16842 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16843 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16844 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16845 return false;
16846
16847 Pow2Op = Pow2Op.getOperand(0);
16848
16849 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16850 // TODO: We could use knownbits to make this bound more precise.
16851 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16852
16853 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16854 if (CFP == nullptr)
16855 return false;
16856
16857 const APFloat &APF = CFP->getValueAPF();
16858
16859 // Make sure we have normal/ieee constant.
16860 if (!APF.isNormal() || !APF.isIEEE())
16861 return false;
16862
16863 // Make sure the floats exponent is within the bounds that this transform
16864 // produces bitwise equals value.
16865 int CurExp = ilogb(APF);
16866 // FMul by pow2 will only increase exponent.
16867 int MinExp =
16868 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16869 // FDiv by pow2 will only decrease exponent.
16870 int MaxExp =
16871 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16872 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16874 return false;
16875
16876 // Finally make sure we actually know the mantissa for the float type.
16877 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16878 if (!Mantissa)
16879 Mantissa = ThisMantissa;
16880
16881 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16882 };
16883
16884 // TODO: We may be able to include undefs.
16885 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16886 };
16887
16888 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16889 return SDValue();
16890
16891 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16892 return SDValue();
16893
16894 // Get log2 after all other checks have taken place. This is because
16895 // BuildLogBase2 may create a new node.
16896 SDLoc DL(N);
16897 // Get Log2 type with same bitwidth as the float type (VT).
16898 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16899 if (VT.isVector())
16900 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16902
16903 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16904 /*InexpensiveOnly*/ true, NewIntVT);
16905 if (!Log2)
16906 return SDValue();
16907
16908 // Perform actual transform.
16909 SDValue MantissaShiftCnt =
16910 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
16911 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16912 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16913 // cast. We could implement that by handle here to handle the casts.
16914 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16915 SDValue ResAsInt =
16916 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16917 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16918 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16919 return ResAsFP;
16920}
16921
16922SDValue DAGCombiner::visitFMUL(SDNode *N) {
16923 SDValue N0 = N->getOperand(0);
16924 SDValue N1 = N->getOperand(1);
16925 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16926 EVT VT = N->getValueType(0);
16927 SDLoc DL(N);
16928 const TargetOptions &Options = DAG.getTarget().Options;
16929 const SDNodeFlags Flags = N->getFlags();
16930 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16931
16932 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16933 return R;
16934
16935 // fold (fmul c1, c2) -> c1*c2
16936 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16937 return C;
16938
16939 // canonicalize constant to RHS
16942 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16943
16944 // fold vector ops
16945 if (VT.isVector())
16946 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16947 return FoldedVOp;
16948
16949 if (SDValue NewSel = foldBinOpIntoSelect(N))
16950 return NewSel;
16951
16952 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16953 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16955 N0.getOpcode() == ISD::FMUL) {
16956 SDValue N00 = N0.getOperand(0);
16957 SDValue N01 = N0.getOperand(1);
16958 // Avoid an infinite loop by making sure that N00 is not a constant
16959 // (the inner multiply has not been constant folded yet).
16962 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16963 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16964 }
16965 }
16966
16967 // Match a special-case: we convert X * 2.0 into fadd.
16968 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16969 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16970 N0.getOperand(0) == N0.getOperand(1)) {
16971 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16972 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16973 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16974 }
16975
16976 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16977 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16978 VT, N0, N1, Flags))
16979 return SD;
16980 }
16981
16982 // fold (fmul X, 2.0) -> (fadd X, X)
16983 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16984 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16985
16986 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16987 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16988 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16989 return DAG.getNode(ISD::FSUB, DL, VT,
16990 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16991 }
16992 }
16993
16994 // -N0 * -N1 --> N0 * N1
16999 SDValue NegN0 =
17000 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17001 if (NegN0) {
17002 HandleSDNode NegN0Handle(NegN0);
17003 SDValue NegN1 =
17004 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17005 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17007 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
17008 }
17009
17010 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17011 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17012 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17013 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17014 TLI.isOperationLegal(ISD::FABS, VT)) {
17015 SDValue Select = N0, X = N1;
17016 if (Select.getOpcode() != ISD::SELECT)
17017 std::swap(Select, X);
17018
17019 SDValue Cond = Select.getOperand(0);
17020 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17021 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17022
17023 if (TrueOpnd && FalseOpnd &&
17024 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17025 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17026 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17027 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17028 switch (CC) {
17029 default: break;
17030 case ISD::SETOLT:
17031 case ISD::SETULT:
17032 case ISD::SETOLE:
17033 case ISD::SETULE:
17034 case ISD::SETLT:
17035 case ISD::SETLE:
17036 std::swap(TrueOpnd, FalseOpnd);
17037 [[fallthrough]];
17038 case ISD::SETOGT:
17039 case ISD::SETUGT:
17040 case ISD::SETOGE:
17041 case ISD::SETUGE:
17042 case ISD::SETGT:
17043 case ISD::SETGE:
17044 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17045 TLI.isOperationLegal(ISD::FNEG, VT))
17046 return DAG.getNode(ISD::FNEG, DL, VT,
17047 DAG.getNode(ISD::FABS, DL, VT, X));
17048 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17049 return DAG.getNode(ISD::FABS, DL, VT, X);
17050
17051 break;
17052 }
17053 }
17054 }
17055
17056 // FMUL -> FMA combines:
17057 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17058 AddToWorklist(Fused.getNode());
17059 return Fused;
17060 }
17061
17062 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17063 // able to run.
17064 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17065 return R;
17066
17067 return SDValue();
17068}
17069
17070template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17071 SDValue N0 = N->getOperand(0);
17072 SDValue N1 = N->getOperand(1);
17073 SDValue N2 = N->getOperand(2);
17074 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17075 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17076 EVT VT = N->getValueType(0);
17077 SDLoc DL(N);
17078 const TargetOptions &Options = DAG.getTarget().Options;
17079 // FMA nodes have flags that propagate to the created nodes.
17080 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17081 MatchContextClass matcher(DAG, TLI, N);
17082
17083 // Constant fold FMA.
17084 if (isa<ConstantFPSDNode>(N0) &&
17085 isa<ConstantFPSDNode>(N1) &&
17086 isa<ConstantFPSDNode>(N2)) {
17087 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
17088 }
17089
17090 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17095 SDValue NegN0 =
17096 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17097 if (NegN0) {
17098 HandleSDNode NegN0Handle(NegN0);
17099 SDValue NegN1 =
17100 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17101 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17103 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17104 }
17105
17106 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17107 if (Options.UnsafeFPMath) {
17108 if (N0CFP && N0CFP->isZero())
17109 return N2;
17110 if (N1CFP && N1CFP->isZero())
17111 return N2;
17112 }
17113
17114 // FIXME: Support splat of constant.
17115 if (N0CFP && N0CFP->isExactlyValue(1.0))
17116 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
17117 if (N1CFP && N1CFP->isExactlyValue(1.0))
17118 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
17119
17120 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17123 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
17124
17125 bool CanReassociate =
17126 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17127 if (CanReassociate) {
17128 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17129 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17132 return matcher.getNode(
17133 ISD::FMUL, DL, VT, N0,
17134 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17135 }
17136
17137 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17138 if (matcher.match(N0, ISD::FMUL) &&
17141 return matcher.getNode(
17142 ISD::FMA, DL, VT, N0.getOperand(0),
17143 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17144 }
17145 }
17146
17147 // (fma x, -1, y) -> (fadd (fneg x), y)
17148 // FIXME: Support splat of constant.
17149 if (N1CFP) {
17150 if (N1CFP->isExactlyValue(1.0))
17151 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17152
17153 if (N1CFP->isExactlyValue(-1.0) &&
17154 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17155 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17156 AddToWorklist(RHSNeg.getNode());
17157 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17158 }
17159
17160 // fma (fneg x), K, y -> fma x -K, y
17161 if (matcher.match(N0, ISD::FNEG) &&
17163 (N1.hasOneUse() &&
17164 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17165 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17166 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17167 }
17168 }
17169
17170 // FIXME: Support splat of constant.
17171 if (CanReassociate) {
17172 // (fma x, c, x) -> (fmul x, (c+1))
17173 if (N1CFP && N0 == N2) {
17174 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17175 matcher.getNode(ISD::FADD, DL, VT, N1,
17176 DAG.getConstantFP(1.0, DL, VT)));
17177 }
17178
17179 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17180 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17181 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17182 matcher.getNode(ISD::FADD, DL, VT, N1,
17183 DAG.getConstantFP(-1.0, DL, VT)));
17184 }
17185 }
17186
17187 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17188 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17189 if (!TLI.isFNegFree(VT))
17191 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17192 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17193 return SDValue();
17194}
17195
17196SDValue DAGCombiner::visitFMAD(SDNode *N) {
17197 SDValue N0 = N->getOperand(0);
17198 SDValue N1 = N->getOperand(1);
17199 SDValue N2 = N->getOperand(2);
17200 EVT VT = N->getValueType(0);
17201 SDLoc DL(N);
17202
17203 // Constant fold FMAD.
17204 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17205 isa<ConstantFPSDNode>(N2))
17206 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17207
17208 return SDValue();
17209}
17210
17211// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17212// reciprocal.
17213// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17214// Notice that this is not always beneficial. One reason is different targets
17215// may have different costs for FDIV and FMUL, so sometimes the cost of two
17216// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17217// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17218SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17219 // TODO: Limit this transform based on optsize/minsize - it always creates at
17220 // least 1 extra instruction. But the perf win may be substantial enough
17221 // that only minsize should restrict this.
17222 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17223 const SDNodeFlags Flags = N->getFlags();
17224 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17225 return SDValue();
17226
17227 // Skip if current node is a reciprocal/fneg-reciprocal.
17228 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17229 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17230 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17231 return SDValue();
17232
17233 // Exit early if the target does not want this transform or if there can't
17234 // possibly be enough uses of the divisor to make the transform worthwhile.
17235 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17236
17237 // For splat vectors, scale the number of uses by the splat factor. If we can
17238 // convert the division into a scalar op, that will likely be much faster.
17239 unsigned NumElts = 1;
17240 EVT VT = N->getValueType(0);
17241 if (VT.isVector() && DAG.isSplatValue(N1))
17242 NumElts = VT.getVectorMinNumElements();
17243
17244 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17245 return SDValue();
17246
17247 // Find all FDIV users of the same divisor.
17248 // Use a set because duplicates may be present in the user list.
17250 for (auto *U : N1->uses()) {
17251 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17252 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17253 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17254 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17255 U->getFlags().hasAllowReassociation() &&
17256 U->getFlags().hasNoSignedZeros())
17257 continue;
17258
17259 // This division is eligible for optimization only if global unsafe math
17260 // is enabled or if this division allows reciprocal formation.
17261 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17262 Users.insert(U);
17263 }
17264 }
17265
17266 // Now that we have the actual number of divisor uses, make sure it meets
17267 // the minimum threshold specified by the target.
17268 if ((Users.size() * NumElts) < MinUses)
17269 return SDValue();
17270
17271 SDLoc DL(N);
17272 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17273 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17274
17275 // Dividend / Divisor -> Dividend * Reciprocal
17276 for (auto *U : Users) {
17277 SDValue Dividend = U->getOperand(0);
17278 if (Dividend != FPOne) {
17279 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17280 Reciprocal, Flags);
17281 CombineTo(U, NewNode);
17282 } else if (U != Reciprocal.getNode()) {
17283 // In the absence of fast-math-flags, this user node is always the
17284 // same node as Reciprocal, but with FMF they may be different nodes.
17285 CombineTo(U, Reciprocal);
17286 }
17287 }
17288 return SDValue(N, 0); // N was replaced.
17289}
17290
17291SDValue DAGCombiner::visitFDIV(SDNode *N) {
17292 SDValue N0 = N->getOperand(0);
17293 SDValue N1 = N->getOperand(1);
17294 EVT VT = N->getValueType(0);
17295 SDLoc DL(N);
17296 const TargetOptions &Options = DAG.getTarget().Options;
17297 SDNodeFlags Flags = N->getFlags();
17298 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17299
17300 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17301 return R;
17302
17303 // fold (fdiv c1, c2) -> c1/c2
17304 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17305 return C;
17306
17307 // fold vector ops
17308 if (VT.isVector())
17309 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17310 return FoldedVOp;
17311
17312 if (SDValue NewSel = foldBinOpIntoSelect(N))
17313 return NewSel;
17314
17316 return V;
17317
17318 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17319 // the loss is acceptable with AllowReciprocal.
17320 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17321 // Compute the reciprocal 1.0 / c2.
17322 const APFloat &N1APF = N1CFP->getValueAPF();
17323 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17325 // Only do the transform if the reciprocal is a legal fp immediate that
17326 // isn't too nasty (eg NaN, denormal, ...).
17327 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17328 (st == APFloat::opInexact &&
17329 (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17330 (!LegalOperations ||
17331 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17332 // backend)... we should handle this gracefully after Legalize.
17333 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17335 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17336 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17337 DAG.getConstantFP(Recip, DL, VT));
17338 }
17339
17340 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17341 // If this FDIV is part of a reciprocal square root, it may be folded
17342 // into a target-specific square root estimate instruction.
17343 if (N1.getOpcode() == ISD::FSQRT) {
17344 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17345 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17346 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17347 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17348 if (SDValue RV =
17349 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17350 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17351 AddToWorklist(RV.getNode());
17352 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17353 }
17354 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17355 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17356 if (SDValue RV =
17357 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17358 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17359 AddToWorklist(RV.getNode());
17360 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17361 }
17362 } else if (N1.getOpcode() == ISD::FMUL) {
17363 // Look through an FMUL. Even though this won't remove the FDIV directly,
17364 // it's still worthwhile to get rid of the FSQRT if possible.
17365 SDValue Sqrt, Y;
17366 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17367 Sqrt = N1.getOperand(0);
17368 Y = N1.getOperand(1);
17369 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17370 Sqrt = N1.getOperand(1);
17371 Y = N1.getOperand(0);
17372 }
17373 if (Sqrt.getNode()) {
17374 // If the other multiply operand is known positive, pull it into the
17375 // sqrt. That will eliminate the division if we convert to an estimate.
17376 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17377 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17378 SDValue A;
17379 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17380 A = Y.getOperand(0);
17381 else if (Y == Sqrt.getOperand(0))
17382 A = Y;
17383 if (A) {
17384 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17385 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17386 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17387 SDValue AAZ =
17388 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17389 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17390 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17391
17392 // Estimate creation failed. Clean up speculatively created nodes.
17393 recursivelyDeleteUnusedNodes(AAZ.getNode());
17394 }
17395 }
17396
17397 // We found a FSQRT, so try to make this fold:
17398 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17399 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17400 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17401 AddToWorklist(Div.getNode());
17402 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17403 }
17404 }
17405 }
17406
17407 // Fold into a reciprocal estimate and multiply instead of a real divide.
17408 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17409 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17410 return RV;
17411 }
17412
17413 // Fold X/Sqrt(X) -> Sqrt(X)
17414 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17415 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17416 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17417 return N1;
17418
17419 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17424 SDValue NegN0 =
17425 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17426 if (NegN0) {
17427 HandleSDNode NegN0Handle(NegN0);
17428 SDValue NegN1 =
17429 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17430 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17432 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17433 }
17434
17435 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17436 return R;
17437
17438 return SDValue();
17439}
17440
17441SDValue DAGCombiner::visitFREM(SDNode *N) {
17442 SDValue N0 = N->getOperand(0);
17443 SDValue N1 = N->getOperand(1);
17444 EVT VT = N->getValueType(0);
17445 SDNodeFlags Flags = N->getFlags();
17446 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17447 SDLoc DL(N);
17448
17449 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17450 return R;
17451
17452 // fold (frem c1, c2) -> fmod(c1,c2)
17453 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17454 return C;
17455
17456 if (SDValue NewSel = foldBinOpIntoSelect(N))
17457 return NewSel;
17458
17459 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17460 // power of 2.
17461 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17465 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17466 bool NeedsCopySign =
17467 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17468 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17469 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17470 SDValue MLA;
17472 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17473 N1, N0);
17474 } else {
17475 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17476 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17477 }
17478 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17479 }
17480
17481 return SDValue();
17482}
17483
17484SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17485 SDNodeFlags Flags = N->getFlags();
17486 const TargetOptions &Options = DAG.getTarget().Options;
17487
17488 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17489 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17490 if (!Flags.hasApproximateFuncs() ||
17491 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17492 return SDValue();
17493
17494 SDValue N0 = N->getOperand(0);
17495 if (TLI.isFsqrtCheap(N0, DAG))
17496 return SDValue();
17497
17498 // FSQRT nodes have flags that propagate to the created nodes.
17499 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17500 // transform the fdiv, we may produce a sub-optimal estimate sequence
17501 // because the reciprocal calculation may not have to filter out a
17502 // 0.0 input.
17503 return buildSqrtEstimate(N0, Flags);
17504}
17505
17506/// copysign(x, fp_extend(y)) -> copysign(x, y)
17507/// copysign(x, fp_round(y)) -> copysign(x, y)
17508/// Operands to the functions are the type of X and Y respectively.
17509static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17510 // Always fold no-op FP casts.
17511 if (XTy == YTy)
17512 return true;
17513
17514 // Do not optimize out type conversion of f128 type yet.
17515 // For some targets like x86_64, configuration is changed to keep one f128
17516 // value in one SSE register, but instruction selection cannot handle
17517 // FCOPYSIGN on SSE registers yet.
17518 if (YTy == MVT::f128)
17519 return false;
17520
17522}
17523
17525 SDValue N1 = N->getOperand(1);
17526 if (N1.getOpcode() != ISD::FP_EXTEND &&
17527 N1.getOpcode() != ISD::FP_ROUND)
17528 return false;
17529 EVT N1VT = N1->getValueType(0);
17530 EVT N1Op0VT = N1->getOperand(0).getValueType();
17531 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17532}
17533
17534SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17535 SDValue N0 = N->getOperand(0);
17536 SDValue N1 = N->getOperand(1);
17537 EVT VT = N->getValueType(0);
17538 SDLoc DL(N);
17539
17540 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17541 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
17542 return C;
17543
17544 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17545 const APFloat &V = N1C->getValueAPF();
17546 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17547 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17548 if (!V.isNegative()) {
17549 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17550 return DAG.getNode(ISD::FABS, DL, VT, N0);
17551 } else {
17552 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17553 return DAG.getNode(ISD::FNEG, DL, VT,
17554 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17555 }
17556 }
17557
17558 // copysign(fabs(x), y) -> copysign(x, y)
17559 // copysign(fneg(x), y) -> copysign(x, y)
17560 // copysign(copysign(x,z), y) -> copysign(x, y)
17561 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17562 N0.getOpcode() == ISD::FCOPYSIGN)
17563 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1);
17564
17565 // copysign(x, abs(y)) -> abs(x)
17566 if (N1.getOpcode() == ISD::FABS)
17567 return DAG.getNode(ISD::FABS, DL, VT, N0);
17568
17569 // copysign(x, copysign(y,z)) -> copysign(x, z)
17570 if (N1.getOpcode() == ISD::FCOPYSIGN)
17571 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1));
17572
17573 // copysign(x, fp_extend(y)) -> copysign(x, y)
17574 // copysign(x, fp_round(y)) -> copysign(x, y)
17576 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
17577
17578 // We only take the sign bit from the sign operand.
17579 EVT SignVT = N1.getValueType();
17580 if (SimplifyDemandedBits(N1,
17582 return SDValue(N, 0);
17583
17584 // We only take the non-sign bits from the value operand
17585 if (SimplifyDemandedBits(N0,
17587 return SDValue(N, 0);
17588
17589 return SDValue();
17590}
17591
17592SDValue DAGCombiner::visitFPOW(SDNode *N) {
17593 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17594 if (!ExponentC)
17595 return SDValue();
17596 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17597
17598 // Try to convert x ** (1/3) into cube root.
17599 // TODO: Handle the various flavors of long double.
17600 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17601 // Some range near 1/3 should be fine.
17602 EVT VT = N->getValueType(0);
17603 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17604 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17605 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17606 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17607 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17608 // For regular numbers, rounding may cause the results to differ.
17609 // Therefore, we require { nsz ninf nnan afn } for this transform.
17610 // TODO: We could select out the special cases if we don't have nsz/ninf.
17611 SDNodeFlags Flags = N->getFlags();
17612 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17613 !Flags.hasApproximateFuncs())
17614 return SDValue();
17615
17616 // Do not create a cbrt() libcall if the target does not have it, and do not
17617 // turn a pow that has lowering support into a cbrt() libcall.
17618 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17621 return SDValue();
17622
17623 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17624 }
17625
17626 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17627 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17628 // TODO: This could be extended (using a target hook) to handle smaller
17629 // power-of-2 fractional exponents.
17630 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17631 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17632 if (ExponentIs025 || ExponentIs075) {
17633 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17634 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17635 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17636 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17637 // For regular numbers, rounding may cause the results to differ.
17638 // Therefore, we require { nsz ninf afn } for this transform.
17639 // TODO: We could select out the special cases if we don't have nsz/ninf.
17640 SDNodeFlags Flags = N->getFlags();
17641
17642 // We only need no signed zeros for the 0.25 case.
17643 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17644 !Flags.hasApproximateFuncs())
17645 return SDValue();
17646
17647 // Don't double the number of libcalls. We are trying to inline fast code.
17649 return SDValue();
17650
17651 // Assume that libcalls are the smallest code.
17652 // TODO: This restriction should probably be lifted for vectors.
17653 if (ForCodeSize)
17654 return SDValue();
17655
17656 // pow(X, 0.25) --> sqrt(sqrt(X))
17657 SDLoc DL(N);
17658 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17659 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17660 if (ExponentIs025)
17661 return SqrtSqrt;
17662 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17663 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17664 }
17665
17666 return SDValue();
17667}
17668
17670 const TargetLowering &TLI) {
17671 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17672 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17673 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17674 // conversions would return +0.0.
17675 // FIXME: We should be able to use node-level FMF here.
17676 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17677 EVT VT = N->getValueType(0);
17678 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17680 return SDValue();
17681
17682 // fptosi/fptoui round towards zero, so converting from FP to integer and
17683 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17684 SDValue N0 = N->getOperand(0);
17685 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17686 N0.getOperand(0).getValueType() == VT)
17687 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17688
17689 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17690 N0.getOperand(0).getValueType() == VT)
17691 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17692
17693 return SDValue();
17694}
17695
17696SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17697 SDValue N0 = N->getOperand(0);
17698 EVT VT = N->getValueType(0);
17699 EVT OpVT = N0.getValueType();
17700
17701 // [us]itofp(undef) = 0, because the result value is bounded.
17702 if (N0.isUndef())
17703 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17704
17705 // fold (sint_to_fp c1) -> c1fp
17707 // ...but only if the target supports immediate floating-point values
17708 (!LegalOperations ||
17710 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17711
17712 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17713 // but UINT_TO_FP is legal on this target, try to convert.
17714 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17715 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17716 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17717 if (DAG.SignBitIsZero(N0))
17718 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17719 }
17720
17721 // The next optimizations are desirable only if SELECT_CC can be lowered.
17722 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17723 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17724 !VT.isVector() &&
17725 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17726 SDLoc DL(N);
17727 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17728 DAG.getConstantFP(0.0, DL, VT));
17729 }
17730
17731 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17732 // (select (setcc x, y, cc), 1.0, 0.0)
17733 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17734 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17735 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17736 SDLoc DL(N);
17737 return DAG.getSelect(DL, VT, N0.getOperand(0),
17738 DAG.getConstantFP(1.0, DL, VT),
17739 DAG.getConstantFP(0.0, DL, VT));
17740 }
17741
17742 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17743 return FTrunc;
17744
17745 return SDValue();
17746}
17747
17748SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17749 SDValue N0 = N->getOperand(0);
17750 EVT VT = N->getValueType(0);
17751 EVT OpVT = N0.getValueType();
17752
17753 // [us]itofp(undef) = 0, because the result value is bounded.
17754 if (N0.isUndef())
17755 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17756
17757 // fold (uint_to_fp c1) -> c1fp
17759 // ...but only if the target supports immediate floating-point values
17760 (!LegalOperations ||
17762 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17763
17764 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17765 // but SINT_TO_FP is legal on this target, try to convert.
17766 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17767 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17768 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17769 if (DAG.SignBitIsZero(N0))
17770 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17771 }
17772
17773 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17774 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17775 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17776 SDLoc DL(N);
17777 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17778 DAG.getConstantFP(0.0, DL, VT));
17779 }
17780
17781 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17782 return FTrunc;
17783
17784 return SDValue();
17785}
17786
17787// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17789 SDValue N0 = N->getOperand(0);
17790 EVT VT = N->getValueType(0);
17791
17792 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17793 return SDValue();
17794
17795 SDValue Src = N0.getOperand(0);
17796 EVT SrcVT = Src.getValueType();
17797 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17798 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17799
17800 // We can safely assume the conversion won't overflow the output range,
17801 // because (for example) (uint8_t)18293.f is undefined behavior.
17802
17803 // Since we can assume the conversion won't overflow, our decision as to
17804 // whether the input will fit in the float should depend on the minimum
17805 // of the input range and output range.
17806
17807 // This means this is also safe for a signed input and unsigned output, since
17808 // a negative input would lead to undefined behavior.
17809 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17810 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17811 unsigned ActualSize = std::min(InputSize, OutputSize);
17812 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17813
17814 // We can only fold away the float conversion if the input range can be
17815 // represented exactly in the float range.
17816 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17817 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17818 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17820 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17821 }
17822 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17823 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17824 return DAG.getBitcast(VT, Src);
17825 }
17826 return SDValue();
17827}
17828
17829SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17830 SDValue N0 = N->getOperand(0);
17831 EVT VT = N->getValueType(0);
17832
17833 // fold (fp_to_sint undef) -> undef
17834 if (N0.isUndef())
17835 return DAG.getUNDEF(VT);
17836
17837 // fold (fp_to_sint c1fp) -> c1
17839 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17840
17841 return FoldIntToFPToInt(N, DAG);
17842}
17843
17844SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17845 SDValue N0 = N->getOperand(0);
17846 EVT VT = N->getValueType(0);
17847
17848 // fold (fp_to_uint undef) -> undef
17849 if (N0.isUndef())
17850 return DAG.getUNDEF(VT);
17851
17852 // fold (fp_to_uint c1fp) -> c1
17854 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17855
17856 return FoldIntToFPToInt(N, DAG);
17857}
17858
17859SDValue DAGCombiner::visitXRINT(SDNode *N) {
17860 SDValue N0 = N->getOperand(0);
17861 EVT VT = N->getValueType(0);
17862
17863 // fold (lrint|llrint undef) -> undef
17864 if (N0.isUndef())
17865 return DAG.getUNDEF(VT);
17866
17867 // fold (lrint|llrint c1fp) -> c1
17869 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17870
17871 return SDValue();
17872}
17873
17874SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17875 SDValue N0 = N->getOperand(0);
17876 SDValue N1 = N->getOperand(1);
17877 EVT VT = N->getValueType(0);
17878
17879 // fold (fp_round c1fp) -> c1fp
17880 if (SDValue C =
17881 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17882 return C;
17883
17884 // fold (fp_round (fp_extend x)) -> x
17885 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17886 return N0.getOperand(0);
17887
17888 // fold (fp_round (fp_round x)) -> (fp_round x)
17889 if (N0.getOpcode() == ISD::FP_ROUND) {
17890 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17891 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17892
17893 // Avoid folding legal fp_rounds into non-legal ones.
17894 if (!hasOperation(ISD::FP_ROUND, VT))
17895 return SDValue();
17896
17897 // Skip this folding if it results in an fp_round from f80 to f16.
17898 //
17899 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17900 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17901 // instructions from f32 or f64. Moreover, the first (value-preserving)
17902 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17903 // x86.
17904 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17905 return SDValue();
17906
17907 // If the first fp_round isn't a value preserving truncation, it might
17908 // introduce a tie in the second fp_round, that wouldn't occur in the
17909 // single-step fp_round we want to fold to.
17910 // In other words, double rounding isn't the same as rounding.
17911 // Also, this is a value preserving truncation iff both fp_round's are.
17912 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17913 SDLoc DL(N);
17914 return DAG.getNode(
17915 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17916 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17917 }
17918 }
17919
17920 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17921 // Note: From a legality perspective, this is a two step transform. First,
17922 // we duplicate the fp_round to the arguments of the copysign, then we
17923 // eliminate the fp_round on Y. The second step requires an additional
17924 // predicate to match the implementation above.
17925 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17927 N0.getValueType())) {
17928 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17929 N0.getOperand(0), N1);
17930 AddToWorklist(Tmp.getNode());
17931 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17932 Tmp, N0.getOperand(1));
17933 }
17934
17935 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17936 return NewVSel;
17937
17938 return SDValue();
17939}
17940
17941SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17942 SDValue N0 = N->getOperand(0);
17943 EVT VT = N->getValueType(0);
17944
17945 if (VT.isVector())
17946 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17947 return FoldedVOp;
17948
17949 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17950 if (N->hasOneUse() &&
17951 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17952 return SDValue();
17953
17954 // fold (fp_extend c1fp) -> c1fp
17956 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17957
17958 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17959 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17961 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17962
17963 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17964 // value of X.
17965 if (N0.getOpcode() == ISD::FP_ROUND
17966 && N0.getConstantOperandVal(1) == 1) {
17967 SDValue In = N0.getOperand(0);
17968 if (In.getValueType() == VT) return In;
17969 if (VT.bitsLT(In.getValueType()))
17970 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17971 In, N0.getOperand(1));
17972 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17973 }
17974
17975 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17976 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17978 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17979 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17980 LN0->getChain(),
17981 LN0->getBasePtr(), N0.getValueType(),
17982 LN0->getMemOperand());
17983 CombineTo(N, ExtLoad);
17984 CombineTo(
17985 N0.getNode(),
17986 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17987 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17988 ExtLoad.getValue(1));
17989 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17990 }
17991
17992 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17993 return NewVSel;
17994
17995 return SDValue();
17996}
17997
17998SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17999 SDValue N0 = N->getOperand(0);
18000 EVT VT = N->getValueType(0);
18001
18002 // fold (fceil c1) -> fceil(c1)
18004 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
18005
18006 return SDValue();
18007}
18008
18009SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
18010 SDValue N0 = N->getOperand(0);
18011 EVT VT = N->getValueType(0);
18012
18013 // fold (ftrunc c1) -> ftrunc(c1)
18015 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
18016
18017 // fold ftrunc (known rounded int x) -> x
18018 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
18019 // likely to be generated to extract integer from a rounded floating value.
18020 switch (N0.getOpcode()) {
18021 default: break;
18022 case ISD::FRINT:
18023 case ISD::FTRUNC:
18024 case ISD::FNEARBYINT:
18025 case ISD::FROUNDEVEN:
18026 case ISD::FFLOOR:
18027 case ISD::FCEIL:
18028 return N0;
18029 }
18030
18031 return SDValue();
18032}
18033
18034SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18035 SDValue N0 = N->getOperand(0);
18036
18037 // fold (ffrexp c1) -> ffrexp(c1)
18039 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18040 return SDValue();
18041}
18042
18043SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18044 SDValue N0 = N->getOperand(0);
18045 EVT VT = N->getValueType(0);
18046
18047 // fold (ffloor c1) -> ffloor(c1)
18049 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
18050
18051 return SDValue();
18052}
18053
18054SDValue DAGCombiner::visitFNEG(SDNode *N) {
18055 SDValue N0 = N->getOperand(0);
18056 EVT VT = N->getValueType(0);
18057 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18058
18059 // Constant fold FNEG.
18061 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
18062
18063 if (SDValue NegN0 =
18064 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18065 return NegN0;
18066
18067 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18068 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18069 // know it was called from a context with a nsz flag if the input fsub does
18070 // not.
18071 if (N0.getOpcode() == ISD::FSUB &&
18073 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18074 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18075 N0.getOperand(0));
18076 }
18077
18078 if (SDValue Cast = foldSignChangeInBitcast(N))
18079 return Cast;
18080
18081 return SDValue();
18082}
18083
18084SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18085 SDValue N0 = N->getOperand(0);
18086 SDValue N1 = N->getOperand(1);
18087 EVT VT = N->getValueType(0);
18088 const SDNodeFlags Flags = N->getFlags();
18089 unsigned Opc = N->getOpcode();
18090 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18091 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18092 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18093
18094 // Constant fold.
18095 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18096 return C;
18097
18098 // Canonicalize to constant on RHS.
18101 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18102
18103 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18104 const APFloat &AF = N1CFP->getValueAPF();
18105
18106 // minnum(X, nan) -> X
18107 // maxnum(X, nan) -> X
18108 // minimum(X, nan) -> nan
18109 // maximum(X, nan) -> nan
18110 if (AF.isNaN())
18111 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18112
18113 // In the following folds, inf can be replaced with the largest finite
18114 // float, if the ninf flag is set.
18115 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18116 // minnum(X, -inf) -> -inf
18117 // maxnum(X, +inf) -> +inf
18118 // minimum(X, -inf) -> -inf if nnan
18119 // maximum(X, +inf) -> +inf if nnan
18120 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18121 return N->getOperand(1);
18122
18123 // minnum(X, +inf) -> X if nnan
18124 // maxnum(X, -inf) -> X if nnan
18125 // minimum(X, +inf) -> X
18126 // maximum(X, -inf) -> X
18127 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18128 return N->getOperand(0);
18129 }
18130 }
18131
18132 if (SDValue SD = reassociateReduction(
18133 PropagatesNaN
18136 Opc, SDLoc(N), VT, N0, N1, Flags))
18137 return SD;
18138
18139 return SDValue();
18140}
18141
18142SDValue DAGCombiner::visitFABS(SDNode *N) {
18143 SDValue N0 = N->getOperand(0);
18144 EVT VT = N->getValueType(0);
18145
18146 // fold (fabs c1) -> fabs(c1)
18148 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
18149
18150 // fold (fabs (fabs x)) -> (fabs x)
18151 if (N0.getOpcode() == ISD::FABS)
18152 return N->getOperand(0);
18153
18154 // fold (fabs (fneg x)) -> (fabs x)
18155 // fold (fabs (fcopysign x, y)) -> (fabs x)
18156 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18157 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
18158
18159 if (SDValue Cast = foldSignChangeInBitcast(N))
18160 return Cast;
18161
18162 return SDValue();
18163}
18164
18165SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18166 SDValue Chain = N->getOperand(0);
18167 SDValue N1 = N->getOperand(1);
18168 SDValue N2 = N->getOperand(2);
18169
18170 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18171 // nondeterministic jumps).
18172 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18173 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18174 N1->getOperand(0), N2);
18175 }
18176
18177 // Variant of the previous fold where there is a SETCC in between:
18178 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18179 // =>
18180 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18181 // =>
18182 // BRCOND(SETCC(X, CONST, Cond))
18183 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18184 // isn't equivalent to true or false.
18185 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18186 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18187 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18188 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18189 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18190 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18191 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18192 bool Updated = false;
18193
18194 // Is 'X Cond C' always true or false?
18195 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18196 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18197 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18198 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18199 (Cond == ISD::SETGT && C->isMaxSignedValue());
18200 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18201 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18202 (Cond == ISD::SETUGE && C->isZero()) ||
18203 (Cond == ISD::SETGE && C->isMinSignedValue());
18204 return True || False;
18205 };
18206
18207 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18208 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18209 S0 = S0->getOperand(0);
18210 Updated = true;
18211 }
18212 }
18213 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18214 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18215 S1 = S1->getOperand(0);
18216 Updated = true;
18217 }
18218 }
18219
18220 if (Updated)
18221 return DAG.getNode(
18222 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18223 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
18224 }
18225
18226 // If N is a constant we could fold this into a fallthrough or unconditional
18227 // branch. However that doesn't happen very often in normal code, because
18228 // Instcombine/SimplifyCFG should have handled the available opportunities.
18229 // If we did this folding here, it would be necessary to update the
18230 // MachineBasicBlock CFG, which is awkward.
18231
18232 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18233 // on the target.
18234 if (N1.getOpcode() == ISD::SETCC &&
18236 N1.getOperand(0).getValueType())) {
18237 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18238 Chain, N1.getOperand(2),
18239 N1.getOperand(0), N1.getOperand(1), N2);
18240 }
18241
18242 if (N1.hasOneUse()) {
18243 // rebuildSetCC calls visitXor which may change the Chain when there is a
18244 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18245 HandleSDNode ChainHandle(Chain);
18246 if (SDValue NewN1 = rebuildSetCC(N1))
18247 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18248 ChainHandle.getValue(), NewN1, N2);
18249 }
18250
18251 return SDValue();
18252}
18253
18254SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18255 if (N.getOpcode() == ISD::SRL ||
18256 (N.getOpcode() == ISD::TRUNCATE &&
18257 (N.getOperand(0).hasOneUse() &&
18258 N.getOperand(0).getOpcode() == ISD::SRL))) {
18259 // Look pass the truncate.
18260 if (N.getOpcode() == ISD::TRUNCATE)
18261 N = N.getOperand(0);
18262
18263 // Match this pattern so that we can generate simpler code:
18264 //
18265 // %a = ...
18266 // %b = and i32 %a, 2
18267 // %c = srl i32 %b, 1
18268 // brcond i32 %c ...
18269 //
18270 // into
18271 //
18272 // %a = ...
18273 // %b = and i32 %a, 2
18274 // %c = setcc eq %b, 0
18275 // brcond %c ...
18276 //
18277 // This applies only when the AND constant value has one bit set and the
18278 // SRL constant is equal to the log2 of the AND constant. The back-end is
18279 // smart enough to convert the result into a TEST/JMP sequence.
18280 SDValue Op0 = N.getOperand(0);
18281 SDValue Op1 = N.getOperand(1);
18282
18283 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18284 SDValue AndOp1 = Op0.getOperand(1);
18285
18286 if (AndOp1.getOpcode() == ISD::Constant) {
18287 const APInt &AndConst = AndOp1->getAsAPIntVal();
18288
18289 if (AndConst.isPowerOf2() &&
18290 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18291 SDLoc DL(N);
18292 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18293 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18294 ISD::SETNE);
18295 }
18296 }
18297 }
18298 }
18299
18300 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18301 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18302 if (N.getOpcode() == ISD::XOR) {
18303 // Because we may call this on a speculatively constructed
18304 // SimplifiedSetCC Node, we need to simplify this node first.
18305 // Ideally this should be folded into SimplifySetCC and not
18306 // here. For now, grab a handle to N so we don't lose it from
18307 // replacements interal to the visit.
18308 HandleSDNode XORHandle(N);
18309 while (N.getOpcode() == ISD::XOR) {
18310 SDValue Tmp = visitXOR(N.getNode());
18311 // No simplification done.
18312 if (!Tmp.getNode())
18313 break;
18314 // Returning N is form in-visit replacement that may invalidated
18315 // N. Grab value from Handle.
18316 if (Tmp.getNode() == N.getNode())
18317 N = XORHandle.getValue();
18318 else // Node simplified. Try simplifying again.
18319 N = Tmp;
18320 }
18321
18322 if (N.getOpcode() != ISD::XOR)
18323 return N;
18324
18325 SDValue Op0 = N->getOperand(0);
18326 SDValue Op1 = N->getOperand(1);
18327
18328 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18329 bool Equal = false;
18330 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18331 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18332 Op0.getValueType() == MVT::i1) {
18333 N = Op0;
18334 Op0 = N->getOperand(0);
18335 Op1 = N->getOperand(1);
18336 Equal = true;
18337 }
18338
18339 EVT SetCCVT = N.getValueType();
18340 if (LegalTypes)
18341 SetCCVT = getSetCCResultType(SetCCVT);
18342 // Replace the uses of XOR with SETCC
18343 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18344 Equal ? ISD::SETEQ : ISD::SETNE);
18345 }
18346 }
18347
18348 return SDValue();
18349}
18350
18351// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18352//
18353SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18354 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18355 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18356
18357 // If N is a constant we could fold this into a fallthrough or unconditional
18358 // branch. However that doesn't happen very often in normal code, because
18359 // Instcombine/SimplifyCFG should have handled the available opportunities.
18360 // If we did this folding here, it would be necessary to update the
18361 // MachineBasicBlock CFG, which is awkward.
18362
18363 // Use SimplifySetCC to simplify SETCC's.
18365 CondLHS, CondRHS, CC->get(), SDLoc(N),
18366 false);
18367 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18368
18369 // fold to a simpler setcc
18370 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18371 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18372 N->getOperand(0), Simp.getOperand(2),
18373 Simp.getOperand(0), Simp.getOperand(1),
18374 N->getOperand(4));
18375
18376 return SDValue();
18377}
18378
18379static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18380 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18381 const TargetLowering &TLI) {
18382 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18383 if (LD->isIndexed())
18384 return false;
18385 EVT VT = LD->getMemoryVT();
18386 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18387 return false;
18388 Ptr = LD->getBasePtr();
18389 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18390 if (ST->isIndexed())
18391 return false;
18392 EVT VT = ST->getMemoryVT();
18393 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18394 return false;
18395 Ptr = ST->getBasePtr();
18396 IsLoad = false;
18397 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18398 if (LD->isIndexed())
18399 return false;
18400 EVT VT = LD->getMemoryVT();
18401 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18402 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18403 return false;
18404 Ptr = LD->getBasePtr();
18405 IsMasked = true;
18406 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18407 if (ST->isIndexed())
18408 return false;
18409 EVT VT = ST->getMemoryVT();
18410 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18411 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18412 return false;
18413 Ptr = ST->getBasePtr();
18414 IsLoad = false;
18415 IsMasked = true;
18416 } else {
18417 return false;
18418 }
18419 return true;
18420}
18421
18422/// Try turning a load/store into a pre-indexed load/store when the base
18423/// pointer is an add or subtract and it has other uses besides the load/store.
18424/// After the transformation, the new indexed load/store has effectively folded
18425/// the add/subtract in and all of its other uses are redirected to the
18426/// new load/store.
18427bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18428 if (Level < AfterLegalizeDAG)
18429 return false;
18430
18431 bool IsLoad = true;
18432 bool IsMasked = false;
18433 SDValue Ptr;
18434 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18435 Ptr, TLI))
18436 return false;
18437
18438 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18439 // out. There is no reason to make this a preinc/predec.
18440 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18441 Ptr->hasOneUse())
18442 return false;
18443
18444 // Ask the target to do addressing mode selection.
18448 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18449 return false;
18450
18451 // Backends without true r+i pre-indexed forms may need to pass a
18452 // constant base with a variable offset so that constant coercion
18453 // will work with the patterns in canonical form.
18454 bool Swapped = false;
18455 if (isa<ConstantSDNode>(BasePtr)) {
18456 std::swap(BasePtr, Offset);
18457 Swapped = true;
18458 }
18459
18460 // Don't create a indexed load / store with zero offset.
18462 return false;
18463
18464 // Try turning it into a pre-indexed load / store except when:
18465 // 1) The new base ptr is a frame index.
18466 // 2) If N is a store and the new base ptr is either the same as or is a
18467 // predecessor of the value being stored.
18468 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18469 // that would create a cycle.
18470 // 4) All uses are load / store ops that use it as old base ptr.
18471
18472 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18473 // (plus the implicit offset) to a register to preinc anyway.
18474 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18475 return false;
18476
18477 // Check #2.
18478 if (!IsLoad) {
18479 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18480 : cast<StoreSDNode>(N)->getValue();
18481
18482 // Would require a copy.
18483 if (Val == BasePtr)
18484 return false;
18485
18486 // Would create a cycle.
18487 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18488 return false;
18489 }
18490
18491 // Caches for hasPredecessorHelper.
18494 Worklist.push_back(N);
18495
18496 // If the offset is a constant, there may be other adds of constants that
18497 // can be folded with this one. We should do this to avoid having to keep
18498 // a copy of the original base pointer.
18499 SmallVector<SDNode *, 16> OtherUses;
18500 constexpr unsigned int MaxSteps = 8192;
18501 if (isa<ConstantSDNode>(Offset))
18502 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18503 UE = BasePtr->use_end();
18504 UI != UE; ++UI) {
18505 SDUse &Use = UI.getUse();
18506 // Skip the use that is Ptr and uses of other results from BasePtr's
18507 // node (important for nodes that return multiple results).
18508 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18509 continue;
18510
18511 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18512 MaxSteps))
18513 continue;
18514
18515 if (Use.getUser()->getOpcode() != ISD::ADD &&
18516 Use.getUser()->getOpcode() != ISD::SUB) {
18517 OtherUses.clear();
18518 break;
18519 }
18520
18521 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18522 if (!isa<ConstantSDNode>(Op1)) {
18523 OtherUses.clear();
18524 break;
18525 }
18526
18527 // FIXME: In some cases, we can be smarter about this.
18528 if (Op1.getValueType() != Offset.getValueType()) {
18529 OtherUses.clear();
18530 break;
18531 }
18532
18533 OtherUses.push_back(Use.getUser());
18534 }
18535
18536 if (Swapped)
18537 std::swap(BasePtr, Offset);
18538
18539 // Now check for #3 and #4.
18540 bool RealUse = false;
18541
18542 for (SDNode *Use : Ptr->uses()) {
18543 if (Use == N)
18544 continue;
18545 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18546 return false;
18547
18548 // If Ptr may be folded in addressing mode of other use, then it's
18549 // not profitable to do this transformation.
18550 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18551 RealUse = true;
18552 }
18553
18554 if (!RealUse)
18555 return false;
18556
18558 if (!IsMasked) {
18559 if (IsLoad)
18560 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18561 else
18562 Result =
18563 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18564 } else {
18565 if (IsLoad)
18566 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18567 Offset, AM);
18568 else
18569 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18570 Offset, AM);
18571 }
18572 ++PreIndexedNodes;
18573 ++NodesCombined;
18574 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18575 Result.dump(&DAG); dbgs() << '\n');
18576 WorklistRemover DeadNodes(*this);
18577 if (IsLoad) {
18578 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18579 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18580 } else {
18581 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18582 }
18583
18584 // Finally, since the node is now dead, remove it from the graph.
18585 deleteAndRecombine(N);
18586
18587 if (Swapped)
18588 std::swap(BasePtr, Offset);
18589
18590 // Replace other uses of BasePtr that can be updated to use Ptr
18591 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18592 unsigned OffsetIdx = 1;
18593 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18594 OffsetIdx = 0;
18595 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18596 BasePtr.getNode() && "Expected BasePtr operand");
18597
18598 // We need to replace ptr0 in the following expression:
18599 // x0 * offset0 + y0 * ptr0 = t0
18600 // knowing that
18601 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18602 //
18603 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18604 // indexed load/store and the expression that needs to be re-written.
18605 //
18606 // Therefore, we have:
18607 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18608
18609 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18610 const APInt &Offset0 = CN->getAPIntValue();
18611 const APInt &Offset1 = Offset->getAsAPIntVal();
18612 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18613 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18614 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18615 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18616
18617 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18618
18619 APInt CNV = Offset0;
18620 if (X0 < 0) CNV = -CNV;
18621 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18622 else CNV = CNV - Offset1;
18623
18624 SDLoc DL(OtherUses[i]);
18625
18626 // We can now generate the new expression.
18627 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18628 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18629
18630 SDValue NewUse = DAG.getNode(Opcode,
18631 DL,
18632 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18633 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18634 deleteAndRecombine(OtherUses[i]);
18635 }
18636
18637 // Replace the uses of Ptr with uses of the updated base value.
18638 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18639 deleteAndRecombine(Ptr.getNode());
18640 AddToWorklist(Result.getNode());
18641
18642 return true;
18643}
18644
18646 SDValue &BasePtr, SDValue &Offset,
18648 SelectionDAG &DAG,
18649 const TargetLowering &TLI) {
18650 if (PtrUse == N ||
18651 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18652 return false;
18653
18654 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18655 return false;
18656
18657 // Don't create a indexed load / store with zero offset.
18659 return false;
18660
18661 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18662 return false;
18663
18665 for (SDNode *Use : BasePtr->uses()) {
18666 if (Use == Ptr.getNode())
18667 continue;
18668
18669 // No if there's a later user which could perform the index instead.
18670 if (isa<MemSDNode>(Use)) {
18671 bool IsLoad = true;
18672 bool IsMasked = false;
18673 SDValue OtherPtr;
18675 IsMasked, OtherPtr, TLI)) {
18677 Worklist.push_back(Use);
18678 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18679 return false;
18680 }
18681 }
18682
18683 // If all the uses are load / store addresses, then don't do the
18684 // transformation.
18685 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18686 for (SDNode *UseUse : Use->uses())
18687 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18688 return false;
18689 }
18690 }
18691 return true;
18692}
18693
18695 bool &IsMasked, SDValue &Ptr,
18696 SDValue &BasePtr, SDValue &Offset,
18698 SelectionDAG &DAG,
18699 const TargetLowering &TLI) {
18701 IsMasked, Ptr, TLI) ||
18702 Ptr->hasOneUse())
18703 return nullptr;
18704
18705 // Try turning it into a post-indexed load / store except when
18706 // 1) All uses are load / store ops that use it as base ptr (and
18707 // it may be folded as addressing mmode).
18708 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18709 // nor a successor of N. Otherwise, if Op is folded that would
18710 // create a cycle.
18711 for (SDNode *Op : Ptr->uses()) {
18712 // Check for #1.
18713 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18714 continue;
18715
18716 // Check for #2.
18719 constexpr unsigned int MaxSteps = 8192;
18720 // Ptr is predecessor to both N and Op.
18721 Visited.insert(Ptr.getNode());
18722 Worklist.push_back(N);
18723 Worklist.push_back(Op);
18724 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18725 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18726 return Op;
18727 }
18728 return nullptr;
18729}
18730
18731/// Try to combine a load/store with a add/sub of the base pointer node into a
18732/// post-indexed load/store. The transformation folded the add/subtract into the
18733/// new indexed load/store effectively and all of its uses are redirected to the
18734/// new load/store.
18735bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18736 if (Level < AfterLegalizeDAG)
18737 return false;
18738
18739 bool IsLoad = true;
18740 bool IsMasked = false;
18741 SDValue Ptr;
18745 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18746 Offset, AM, DAG, TLI);
18747 if (!Op)
18748 return false;
18749
18751 if (!IsMasked)
18752 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18753 Offset, AM)
18754 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18755 BasePtr, Offset, AM);
18756 else
18757 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18758 BasePtr, Offset, AM)
18760 BasePtr, Offset, AM);
18761 ++PostIndexedNodes;
18762 ++NodesCombined;
18763 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18764 Result.dump(&DAG); dbgs() << '\n');
18765 WorklistRemover DeadNodes(*this);
18766 if (IsLoad) {
18767 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18768 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18769 } else {
18770 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18771 }
18772
18773 // Finally, since the node is now dead, remove it from the graph.
18774 deleteAndRecombine(N);
18775
18776 // Replace the uses of Use with uses of the updated base value.
18778 Result.getValue(IsLoad ? 1 : 0));
18779 deleteAndRecombine(Op);
18780 return true;
18781}
18782
18783/// Return the base-pointer arithmetic from an indexed \p LD.
18784SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18785 ISD::MemIndexedMode AM = LD->getAddressingMode();
18786 assert(AM != ISD::UNINDEXED);
18787 SDValue BP = LD->getOperand(1);
18788 SDValue Inc = LD->getOperand(2);
18789
18790 // Some backends use TargetConstants for load offsets, but don't expect
18791 // TargetConstants in general ADD nodes. We can convert these constants into
18792 // regular Constants (if the constant is not opaque).
18794 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18795 "Cannot split out indexing using opaque target constants");
18796 if (Inc.getOpcode() == ISD::TargetConstant) {
18797 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18798 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18799 ConstInc->getValueType(0));
18800 }
18801
18802 unsigned Opc =
18803 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18804 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18805}
18806
18808 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18809}
18810
18811bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18812 EVT STType = Val.getValueType();
18813 EVT STMemType = ST->getMemoryVT();
18814 if (STType == STMemType)
18815 return true;
18816 if (isTypeLegal(STMemType))
18817 return false; // fail.
18818 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18819 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18820 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18821 return true;
18822 }
18823 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18824 STType.isInteger() && STMemType.isInteger()) {
18825 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18826 return true;
18827 }
18828 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18829 Val = DAG.getBitcast(STMemType, Val);
18830 return true;
18831 }
18832 return false; // fail.
18833}
18834
18835bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18836 EVT LDMemType = LD->getMemoryVT();
18837 EVT LDType = LD->getValueType(0);
18838 assert(Val.getValueType() == LDMemType &&
18839 "Attempting to extend value of non-matching type");
18840 if (LDType == LDMemType)
18841 return true;
18842 if (LDMemType.isInteger() && LDType.isInteger()) {
18843 switch (LD->getExtensionType()) {
18844 case ISD::NON_EXTLOAD:
18845 Val = DAG.getBitcast(LDType, Val);
18846 return true;
18847 case ISD::EXTLOAD:
18848 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18849 return true;
18850 case ISD::SEXTLOAD:
18851 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18852 return true;
18853 case ISD::ZEXTLOAD:
18854 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18855 return true;
18856 }
18857 }
18858 return false;
18859}
18860
18861StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18862 int64_t &Offset) {
18863 SDValue Chain = LD->getOperand(0);
18864
18865 // Look through CALLSEQ_START.
18866 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18867 Chain = Chain->getOperand(0);
18868
18869 StoreSDNode *ST = nullptr;
18871 if (Chain.getOpcode() == ISD::TokenFactor) {
18872 // Look for unique store within the TokenFactor.
18873 for (SDValue Op : Chain->ops()) {
18874 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18875 if (!Store)
18876 continue;
18877 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18878 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18879 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18880 continue;
18881 // Make sure the store is not aliased with any nodes in TokenFactor.
18882 GatherAllAliases(Store, Chain, Aliases);
18883 if (Aliases.empty() ||
18884 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18885 ST = Store;
18886 break;
18887 }
18888 } else {
18889 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18890 if (Store) {
18891 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18892 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18893 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18894 ST = Store;
18895 }
18896 }
18897
18898 return ST;
18899}
18900
18901SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18902 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18903 return SDValue();
18904 SDValue Chain = LD->getOperand(0);
18905 int64_t Offset;
18906
18907 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18908 // TODO: Relax this restriction for unordered atomics (see D66309)
18909 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18910 return SDValue();
18911
18912 EVT LDType = LD->getValueType(0);
18913 EVT LDMemType = LD->getMemoryVT();
18914 EVT STMemType = ST->getMemoryVT();
18915 EVT STType = ST->getValue().getValueType();
18916
18917 // There are two cases to consider here:
18918 // 1. The store is fixed width and the load is scalable. In this case we
18919 // don't know at compile time if the store completely envelops the load
18920 // so we abandon the optimisation.
18921 // 2. The store is scalable and the load is fixed width. We could
18922 // potentially support a limited number of cases here, but there has been
18923 // no cost-benefit analysis to prove it's worth it.
18924 bool LdStScalable = LDMemType.isScalableVT();
18925 if (LdStScalable != STMemType.isScalableVT())
18926 return SDValue();
18927
18928 // If we are dealing with scalable vectors on a big endian platform the
18929 // calculation of offsets below becomes trickier, since we do not know at
18930 // compile time the absolute size of the vector. Until we've done more
18931 // analysis on big-endian platforms it seems better to bail out for now.
18932 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18933 return SDValue();
18934
18935 // Normalize for Endianness. After this Offset=0 will denote that the least
18936 // significant bit in the loaded value maps to the least significant bit in
18937 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18938 // n:th least significant byte of the stored value.
18939 int64_t OrigOffset = Offset;
18940 if (DAG.getDataLayout().isBigEndian())
18941 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18942 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18943 8 -
18944 Offset;
18945
18946 // Check that the stored value cover all bits that are loaded.
18947 bool STCoversLD;
18948
18949 TypeSize LdMemSize = LDMemType.getSizeInBits();
18950 TypeSize StMemSize = STMemType.getSizeInBits();
18951 if (LdStScalable)
18952 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18953 else
18954 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18955 StMemSize.getFixedValue());
18956
18957 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18958 if (LD->isIndexed()) {
18959 // Cannot handle opaque target constants and we must respect the user's
18960 // request not to split indexes from loads.
18961 if (!canSplitIdx(LD))
18962 return SDValue();
18963 SDValue Idx = SplitIndexingFromLoad(LD);
18964 SDValue Ops[] = {Val, Idx, Chain};
18965 return CombineTo(LD, Ops, 3);
18966 }
18967 return CombineTo(LD, Val, Chain);
18968 };
18969
18970 if (!STCoversLD)
18971 return SDValue();
18972
18973 // Memory as copy space (potentially masked).
18974 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18975 // Simple case: Direct non-truncating forwarding
18976 if (LDType.getSizeInBits() == LdMemSize)
18977 return ReplaceLd(LD, ST->getValue(), Chain);
18978 // Can we model the truncate and extension with an and mask?
18979 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18980 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18981 // Mask to size of LDMemType
18982 auto Mask =
18984 StMemSize.getFixedValue()),
18985 SDLoc(ST), STType);
18986 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18987 return ReplaceLd(LD, Val, Chain);
18988 }
18989 }
18990
18991 // Handle some cases for big-endian that would be Offset 0 and handled for
18992 // little-endian.
18993 SDValue Val = ST->getValue();
18994 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18995 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18996 !LDType.isVector() && isTypeLegal(STType) &&
18997 TLI.isOperationLegal(ISD::SRL, STType)) {
18998 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18999 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
19000 Offset = 0;
19001 }
19002 }
19003
19004 // TODO: Deal with nonzero offset.
19005 if (LD->getBasePtr().isUndef() || Offset != 0)
19006 return SDValue();
19007 // Model necessary truncations / extenstions.
19008 // Truncate Value To Stored Memory Size.
19009 do {
19010 if (!getTruncatedStoreValue(ST, Val))
19011 break;
19012 if (!isTypeLegal(LDMemType))
19013 break;
19014 if (STMemType != LDMemType) {
19015 // TODO: Support vectors? This requires extract_subvector/bitcast.
19016 if (!STMemType.isVector() && !LDMemType.isVector() &&
19017 STMemType.isInteger() && LDMemType.isInteger())
19018 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
19019 else
19020 break;
19021 }
19022 if (!extendLoadedValueToExtension(LD, Val))
19023 break;
19024 return ReplaceLd(LD, Val, Chain);
19025 } while (false);
19026
19027 // On failure, cleanup dead nodes we may have created.
19028 if (Val->use_empty())
19029 deleteAndRecombine(Val.getNode());
19030 return SDValue();
19031}
19032
19033SDValue DAGCombiner::visitLOAD(SDNode *N) {
19034 LoadSDNode *LD = cast<LoadSDNode>(N);
19035 SDValue Chain = LD->getChain();
19036 SDValue Ptr = LD->getBasePtr();
19037
19038 // If load is not volatile and there are no uses of the loaded value (and
19039 // the updated indexed value in case of indexed loads), change uses of the
19040 // chain value into uses of the chain input (i.e. delete the dead load).
19041 // TODO: Allow this for unordered atomics (see D66309)
19042 if (LD->isSimple()) {
19043 if (N->getValueType(1) == MVT::Other) {
19044 // Unindexed loads.
19045 if (!N->hasAnyUseOfValue(0)) {
19046 // It's not safe to use the two value CombineTo variant here. e.g.
19047 // v1, chain2 = load chain1, loc
19048 // v2, chain3 = load chain2, loc
19049 // v3 = add v2, c
19050 // Now we replace use of chain2 with chain1. This makes the second load
19051 // isomorphic to the one we are deleting, and thus makes this load live.
19052 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19053 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19054 dbgs() << "\n");
19055 WorklistRemover DeadNodes(*this);
19056 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19057 AddUsersToWorklist(Chain.getNode());
19058 if (N->use_empty())
19059 deleteAndRecombine(N);
19060
19061 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19062 }
19063 } else {
19064 // Indexed loads.
19065 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19066
19067 // If this load has an opaque TargetConstant offset, then we cannot split
19068 // the indexing into an add/sub directly (that TargetConstant may not be
19069 // valid for a different type of node, and we cannot convert an opaque
19070 // target constant into a regular constant).
19071 bool CanSplitIdx = canSplitIdx(LD);
19072
19073 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19074 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19075 SDValue Index;
19076 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19077 Index = SplitIndexingFromLoad(LD);
19078 // Try to fold the base pointer arithmetic into subsequent loads and
19079 // stores.
19080 AddUsersToWorklist(N);
19081 } else
19082 Index = DAG.getUNDEF(N->getValueType(1));
19083 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19084 dbgs() << "\nWith: "; Undef.dump(&DAG);
19085 dbgs() << " and 2 other values\n");
19086 WorklistRemover DeadNodes(*this);
19087 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19089 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19090 deleteAndRecombine(N);
19091 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19092 }
19093 }
19094 }
19095
19096 // If this load is directly stored, replace the load value with the stored
19097 // value.
19098 if (auto V = ForwardStoreValueToDirectLoad(LD))
19099 return V;
19100
19101 // Try to infer better alignment information than the load already has.
19102 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19103 !LD->isAtomic()) {
19104 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19105 if (*Alignment > LD->getAlign() &&
19106 isAligned(*Alignment, LD->getSrcValueOffset())) {
19107 SDValue NewLoad = DAG.getExtLoad(
19108 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19109 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19110 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19111 // NewLoad will always be N as we are only refining the alignment
19112 assert(NewLoad.getNode() == N);
19113 (void)NewLoad;
19114 }
19115 }
19116 }
19117
19118 if (LD->isUnindexed()) {
19119 // Walk up chain skipping non-aliasing memory nodes.
19120 SDValue BetterChain = FindBetterChain(LD, Chain);
19121
19122 // If there is a better chain.
19123 if (Chain != BetterChain) {
19124 SDValue ReplLoad;
19125
19126 // Replace the chain to void dependency.
19127 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19128 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19129 BetterChain, Ptr, LD->getMemOperand());
19130 } else {
19131 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19132 LD->getValueType(0),
19133 BetterChain, Ptr, LD->getMemoryVT(),
19134 LD->getMemOperand());
19135 }
19136
19137 // Create token factor to keep old chain connected.
19138 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19139 MVT::Other, Chain, ReplLoad.getValue(1));
19140
19141 // Replace uses with load result and token factor
19142 return CombineTo(N, ReplLoad.getValue(0), Token);
19143 }
19144 }
19145
19146 // Try transforming N to an indexed load.
19147 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19148 return SDValue(N, 0);
19149
19150 // Try to slice up N to more direct loads if the slices are mapped to
19151 // different register banks or pairing can take place.
19152 if (SliceUpLoad(N))
19153 return SDValue(N, 0);
19154
19155 return SDValue();
19156}
19157
19158namespace {
19159
19160/// Helper structure used to slice a load in smaller loads.
19161/// Basically a slice is obtained from the following sequence:
19162/// Origin = load Ty1, Base
19163/// Shift = srl Ty1 Origin, CstTy Amount
19164/// Inst = trunc Shift to Ty2
19165///
19166/// Then, it will be rewritten into:
19167/// Slice = load SliceTy, Base + SliceOffset
19168/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19169///
19170/// SliceTy is deduced from the number of bits that are actually used to
19171/// build Inst.
19172struct LoadedSlice {
19173 /// Helper structure used to compute the cost of a slice.
19174 struct Cost {
19175 /// Are we optimizing for code size.
19176 bool ForCodeSize = false;
19177
19178 /// Various cost.
19179 unsigned Loads = 0;
19180 unsigned Truncates = 0;
19181 unsigned CrossRegisterBanksCopies = 0;
19182 unsigned ZExts = 0;
19183 unsigned Shift = 0;
19184
19185 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19186
19187 /// Get the cost of one isolated slice.
19188 Cost(const LoadedSlice &LS, bool ForCodeSize)
19189 : ForCodeSize(ForCodeSize), Loads(1) {
19190 EVT TruncType = LS.Inst->getValueType(0);
19191 EVT LoadedType = LS.getLoadedType();
19192 if (TruncType != LoadedType &&
19193 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19194 ZExts = 1;
19195 }
19196
19197 /// Account for slicing gain in the current cost.
19198 /// Slicing provide a few gains like removing a shift or a
19199 /// truncate. This method allows to grow the cost of the original
19200 /// load with the gain from this slice.
19201 void addSliceGain(const LoadedSlice &LS) {
19202 // Each slice saves a truncate.
19203 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19204 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19205 ++Truncates;
19206 // If there is a shift amount, this slice gets rid of it.
19207 if (LS.Shift)
19208 ++Shift;
19209 // If this slice can merge a cross register bank copy, account for it.
19210 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19211 ++CrossRegisterBanksCopies;
19212 }
19213
19214 Cost &operator+=(const Cost &RHS) {
19215 Loads += RHS.Loads;
19216 Truncates += RHS.Truncates;
19217 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19218 ZExts += RHS.ZExts;
19219 Shift += RHS.Shift;
19220 return *this;
19221 }
19222
19223 bool operator==(const Cost &RHS) const {
19224 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19225 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19226 ZExts == RHS.ZExts && Shift == RHS.Shift;
19227 }
19228
19229 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19230
19231 bool operator<(const Cost &RHS) const {
19232 // Assume cross register banks copies are as expensive as loads.
19233 // FIXME: Do we want some more target hooks?
19234 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19235 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19236 // Unless we are optimizing for code size, consider the
19237 // expensive operation first.
19238 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19239 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19240 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19241 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19242 }
19243
19244 bool operator>(const Cost &RHS) const { return RHS < *this; }
19245
19246 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19247
19248 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19249 };
19250
19251 // The last instruction that represent the slice. This should be a
19252 // truncate instruction.
19253 SDNode *Inst;
19254
19255 // The original load instruction.
19256 LoadSDNode *Origin;
19257
19258 // The right shift amount in bits from the original load.
19259 unsigned Shift;
19260
19261 // The DAG from which Origin came from.
19262 // This is used to get some contextual information about legal types, etc.
19263 SelectionDAG *DAG;
19264
19265 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19266 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19267 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19268
19269 /// Get the bits used in a chunk of bits \p BitWidth large.
19270 /// \return Result is \p BitWidth and has used bits set to 1 and
19271 /// not used bits set to 0.
19272 APInt getUsedBits() const {
19273 // Reproduce the trunc(lshr) sequence:
19274 // - Start from the truncated value.
19275 // - Zero extend to the desired bit width.
19276 // - Shift left.
19277 assert(Origin && "No original load to compare against.");
19278 unsigned BitWidth = Origin->getValueSizeInBits(0);
19279 assert(Inst && "This slice is not bound to an instruction");
19280 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19281 "Extracted slice is bigger than the whole type!");
19282 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19283 UsedBits.setAllBits();
19284 UsedBits = UsedBits.zext(BitWidth);
19285 UsedBits <<= Shift;
19286 return UsedBits;
19287 }
19288
19289 /// Get the size of the slice to be loaded in bytes.
19290 unsigned getLoadedSize() const {
19291 unsigned SliceSize = getUsedBits().popcount();
19292 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19293 return SliceSize / 8;
19294 }
19295
19296 /// Get the type that will be loaded for this slice.
19297 /// Note: This may not be the final type for the slice.
19298 EVT getLoadedType() const {
19299 assert(DAG && "Missing context");
19300 LLVMContext &Ctxt = *DAG->getContext();
19301 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19302 }
19303
19304 /// Get the alignment of the load used for this slice.
19305 Align getAlign() const {
19306 Align Alignment = Origin->getAlign();
19307 uint64_t Offset = getOffsetFromBase();
19308 if (Offset != 0)
19309 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19310 return Alignment;
19311 }
19312
19313 /// Check if this slice can be rewritten with legal operations.
19314 bool isLegal() const {
19315 // An invalid slice is not legal.
19316 if (!Origin || !Inst || !DAG)
19317 return false;
19318
19319 // Offsets are for indexed load only, we do not handle that.
19320 if (!Origin->getOffset().isUndef())
19321 return false;
19322
19323 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19324
19325 // Check that the type is legal.
19326 EVT SliceType = getLoadedType();
19327 if (!TLI.isTypeLegal(SliceType))
19328 return false;
19329
19330 // Check that the load is legal for this type.
19331 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19332 return false;
19333
19334 // Check that the offset can be computed.
19335 // 1. Check its type.
19336 EVT PtrType = Origin->getBasePtr().getValueType();
19337 if (PtrType == MVT::Untyped || PtrType.isExtended())
19338 return false;
19339
19340 // 2. Check that it fits in the immediate.
19341 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19342 return false;
19343
19344 // 3. Check that the computation is legal.
19345 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19346 return false;
19347
19348 // Check that the zext is legal if it needs one.
19349 EVT TruncateType = Inst->getValueType(0);
19350 if (TruncateType != SliceType &&
19351 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19352 return false;
19353
19354 return true;
19355 }
19356
19357 /// Get the offset in bytes of this slice in the original chunk of
19358 /// bits.
19359 /// \pre DAG != nullptr.
19360 uint64_t getOffsetFromBase() const {
19361 assert(DAG && "Missing context.");
19362 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19363 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19364 uint64_t Offset = Shift / 8;
19365 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19366 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19367 "The size of the original loaded type is not a multiple of a"
19368 " byte.");
19369 // If Offset is bigger than TySizeInBytes, it means we are loading all
19370 // zeros. This should have been optimized before in the process.
19371 assert(TySizeInBytes > Offset &&
19372 "Invalid shift amount for given loaded size");
19373 if (IsBigEndian)
19374 Offset = TySizeInBytes - Offset - getLoadedSize();
19375 return Offset;
19376 }
19377
19378 /// Generate the sequence of instructions to load the slice
19379 /// represented by this object and redirect the uses of this slice to
19380 /// this new sequence of instructions.
19381 /// \pre this->Inst && this->Origin are valid Instructions and this
19382 /// object passed the legal check: LoadedSlice::isLegal returned true.
19383 /// \return The last instruction of the sequence used to load the slice.
19384 SDValue loadSlice() const {
19385 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19386 const SDValue &OldBaseAddr = Origin->getBasePtr();
19387 SDValue BaseAddr = OldBaseAddr;
19388 // Get the offset in that chunk of bytes w.r.t. the endianness.
19389 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19390 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19391 if (Offset) {
19392 // BaseAddr = BaseAddr + Offset.
19393 EVT ArithType = BaseAddr.getValueType();
19394 SDLoc DL(Origin);
19395 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19396 DAG->getConstant(Offset, DL, ArithType));
19397 }
19398
19399 // Create the type of the loaded slice according to its size.
19400 EVT SliceType = getLoadedType();
19401
19402 // Create the load for the slice.
19403 SDValue LastInst =
19404 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19406 Origin->getMemOperand()->getFlags());
19407 // If the final type is not the same as the loaded type, this means that
19408 // we have to pad with zero. Create a zero extend for that.
19409 EVT FinalType = Inst->getValueType(0);
19410 if (SliceType != FinalType)
19411 LastInst =
19412 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19413 return LastInst;
19414 }
19415
19416 /// Check if this slice can be merged with an expensive cross register
19417 /// bank copy. E.g.,
19418 /// i = load i32
19419 /// f = bitcast i32 i to float
19420 bool canMergeExpensiveCrossRegisterBankCopy() const {
19421 if (!Inst || !Inst->hasOneUse())
19422 return false;
19423 SDNode *Use = *Inst->use_begin();
19424 if (Use->getOpcode() != ISD::BITCAST)
19425 return false;
19426 assert(DAG && "Missing context");
19427 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19428 EVT ResVT = Use->getValueType(0);
19429 const TargetRegisterClass *ResRC =
19430 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19431 const TargetRegisterClass *ArgRC =
19432 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19433 Use->getOperand(0)->isDivergent());
19434 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19435 return false;
19436
19437 // At this point, we know that we perform a cross-register-bank copy.
19438 // Check if it is expensive.
19440 // Assume bitcasts are cheap, unless both register classes do not
19441 // explicitly share a common sub class.
19442 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19443 return false;
19444
19445 // Check if it will be merged with the load.
19446 // 1. Check the alignment / fast memory access constraint.
19447 unsigned IsFast = 0;
19448 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19449 Origin->getAddressSpace(), getAlign(),
19450 Origin->getMemOperand()->getFlags(), &IsFast) ||
19451 !IsFast)
19452 return false;
19453
19454 // 2. Check that the load is a legal operation for that type.
19455 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19456 return false;
19457
19458 // 3. Check that we do not have a zext in the way.
19459 if (Inst->getValueType(0) != getLoadedType())
19460 return false;
19461
19462 return true;
19463 }
19464};
19465
19466} // end anonymous namespace
19467
19468/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19469/// \p UsedBits looks like 0..0 1..1 0..0.
19470static bool areUsedBitsDense(const APInt &UsedBits) {
19471 // If all the bits are one, this is dense!
19472 if (UsedBits.isAllOnes())
19473 return true;
19474
19475 // Get rid of the unused bits on the right.
19476 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19477 // Get rid of the unused bits on the left.
19478 if (NarrowedUsedBits.countl_zero())
19479 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19480 // Check that the chunk of bits is completely used.
19481 return NarrowedUsedBits.isAllOnes();
19482}
19483
19484/// Check whether or not \p First and \p Second are next to each other
19485/// in memory. This means that there is no hole between the bits loaded
19486/// by \p First and the bits loaded by \p Second.
19487static bool areSlicesNextToEachOther(const LoadedSlice &First,
19488 const LoadedSlice &Second) {
19489 assert(First.Origin == Second.Origin && First.Origin &&
19490 "Unable to match different memory origins.");
19491 APInt UsedBits = First.getUsedBits();
19492 assert((UsedBits & Second.getUsedBits()) == 0 &&
19493 "Slices are not supposed to overlap.");
19494 UsedBits |= Second.getUsedBits();
19495 return areUsedBitsDense(UsedBits);
19496}
19497
19498/// Adjust the \p GlobalLSCost according to the target
19499/// paring capabilities and the layout of the slices.
19500/// \pre \p GlobalLSCost should account for at least as many loads as
19501/// there is in the slices in \p LoadedSlices.
19503 LoadedSlice::Cost &GlobalLSCost) {
19504 unsigned NumberOfSlices = LoadedSlices.size();
19505 // If there is less than 2 elements, no pairing is possible.
19506 if (NumberOfSlices < 2)
19507 return;
19508
19509 // Sort the slices so that elements that are likely to be next to each
19510 // other in memory are next to each other in the list.
19511 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19512 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19513 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19514 });
19515 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19516 // First (resp. Second) is the first (resp. Second) potentially candidate
19517 // to be placed in a paired load.
19518 const LoadedSlice *First = nullptr;
19519 const LoadedSlice *Second = nullptr;
19520 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19521 // Set the beginning of the pair.
19522 First = Second) {
19523 Second = &LoadedSlices[CurrSlice];
19524
19525 // If First is NULL, it means we start a new pair.
19526 // Get to the next slice.
19527 if (!First)
19528 continue;
19529
19530 EVT LoadedType = First->getLoadedType();
19531
19532 // If the types of the slices are different, we cannot pair them.
19533 if (LoadedType != Second->getLoadedType())
19534 continue;
19535
19536 // Check if the target supplies paired loads for this type.
19537 Align RequiredAlignment;
19538 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19539 // move to the next pair, this type is hopeless.
19540 Second = nullptr;
19541 continue;
19542 }
19543 // Check if we meet the alignment requirement.
19544 if (First->getAlign() < RequiredAlignment)
19545 continue;
19546
19547 // Check that both loads are next to each other in memory.
19548 if (!areSlicesNextToEachOther(*First, *Second))
19549 continue;
19550
19551 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19552 --GlobalLSCost.Loads;
19553 // Move to the next pair.
19554 Second = nullptr;
19555 }
19556}
19557
19558/// Check the profitability of all involved LoadedSlice.
19559/// Currently, it is considered profitable if there is exactly two
19560/// involved slices (1) which are (2) next to each other in memory, and
19561/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19562///
19563/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19564/// the elements themselves.
19565///
19566/// FIXME: When the cost model will be mature enough, we can relax
19567/// constraints (1) and (2).
19569 const APInt &UsedBits, bool ForCodeSize) {
19570 unsigned NumberOfSlices = LoadedSlices.size();
19572 return NumberOfSlices > 1;
19573
19574 // Check (1).
19575 if (NumberOfSlices != 2)
19576 return false;
19577
19578 // Check (2).
19579 if (!areUsedBitsDense(UsedBits))
19580 return false;
19581
19582 // Check (3).
19583 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19584 // The original code has one big load.
19585 OrigCost.Loads = 1;
19586 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19587 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19588 // Accumulate the cost of all the slices.
19589 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19590 GlobalSlicingCost += SliceCost;
19591
19592 // Account as cost in the original configuration the gain obtained
19593 // with the current slices.
19594 OrigCost.addSliceGain(LS);
19595 }
19596
19597 // If the target supports paired load, adjust the cost accordingly.
19598 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19599 return OrigCost > GlobalSlicingCost;
19600}
19601
19602/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19603/// operations, split it in the various pieces being extracted.
19604///
19605/// This sort of thing is introduced by SROA.
19606/// This slicing takes care not to insert overlapping loads.
19607/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19608bool DAGCombiner::SliceUpLoad(SDNode *N) {
19609 if (Level < AfterLegalizeDAG)
19610 return false;
19611
19612 LoadSDNode *LD = cast<LoadSDNode>(N);
19613 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19614 !LD->getValueType(0).isInteger())
19615 return false;
19616
19617 // The algorithm to split up a load of a scalable vector into individual
19618 // elements currently requires knowing the length of the loaded type,
19619 // so will need adjusting to work on scalable vectors.
19620 if (LD->getValueType(0).isScalableVector())
19621 return false;
19622
19623 // Keep track of already used bits to detect overlapping values.
19624 // In that case, we will just abort the transformation.
19625 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19626
19627 SmallVector<LoadedSlice, 4> LoadedSlices;
19628
19629 // Check if this load is used as several smaller chunks of bits.
19630 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19631 // of computation for each trunc.
19632 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19633 UI != UIEnd; ++UI) {
19634 // Skip the uses of the chain.
19635 if (UI.getUse().getResNo() != 0)
19636 continue;
19637
19638 SDNode *User = *UI;
19639 unsigned Shift = 0;
19640
19641 // Check if this is a trunc(lshr).
19642 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19643 isa<ConstantSDNode>(User->getOperand(1))) {
19644 Shift = User->getConstantOperandVal(1);
19645 User = *User->use_begin();
19646 }
19647
19648 // At this point, User is a Truncate, iff we encountered, trunc or
19649 // trunc(lshr).
19650 if (User->getOpcode() != ISD::TRUNCATE)
19651 return false;
19652
19653 // The width of the type must be a power of 2 and greater than 8-bits.
19654 // Otherwise the load cannot be represented in LLVM IR.
19655 // Moreover, if we shifted with a non-8-bits multiple, the slice
19656 // will be across several bytes. We do not support that.
19657 unsigned Width = User->getValueSizeInBits(0);
19658 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19659 return false;
19660
19661 // Build the slice for this chain of computations.
19662 LoadedSlice LS(User, LD, Shift, &DAG);
19663 APInt CurrentUsedBits = LS.getUsedBits();
19664
19665 // Check if this slice overlaps with another.
19666 if ((CurrentUsedBits & UsedBits) != 0)
19667 return false;
19668 // Update the bits used globally.
19669 UsedBits |= CurrentUsedBits;
19670
19671 // Check if the new slice would be legal.
19672 if (!LS.isLegal())
19673 return false;
19674
19675 // Record the slice.
19676 LoadedSlices.push_back(LS);
19677 }
19678
19679 // Abort slicing if it does not seem to be profitable.
19680 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19681 return false;
19682
19683 ++SlicedLoads;
19684
19685 // Rewrite each chain to use an independent load.
19686 // By construction, each chain can be represented by a unique load.
19687
19688 // Prepare the argument for the new token factor for all the slices.
19689 SmallVector<SDValue, 8> ArgChains;
19690 for (const LoadedSlice &LS : LoadedSlices) {
19691 SDValue SliceInst = LS.loadSlice();
19692 CombineTo(LS.Inst, SliceInst, true);
19693 if (SliceInst.getOpcode() != ISD::LOAD)
19694 SliceInst = SliceInst.getOperand(0);
19695 assert(SliceInst->getOpcode() == ISD::LOAD &&
19696 "It takes more than a zext to get to the loaded slice!!");
19697 ArgChains.push_back(SliceInst.getValue(1));
19698 }
19699
19700 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19701 ArgChains);
19702 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19703 AddToWorklist(Chain.getNode());
19704 return true;
19705}
19706
19707/// Check to see if V is (and load (ptr), imm), where the load is having
19708/// specific bytes cleared out. If so, return the byte size being masked out
19709/// and the shift amount.
19710static std::pair<unsigned, unsigned>
19712 std::pair<unsigned, unsigned> Result(0, 0);
19713
19714 // Check for the structure we're looking for.
19715 if (V->getOpcode() != ISD::AND ||
19716 !isa<ConstantSDNode>(V->getOperand(1)) ||
19717 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19718 return Result;
19719
19720 // Check the chain and pointer.
19721 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19722 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19723
19724 // This only handles simple types.
19725 if (V.getValueType() != MVT::i16 &&
19726 V.getValueType() != MVT::i32 &&
19727 V.getValueType() != MVT::i64)
19728 return Result;
19729
19730 // Check the constant mask. Invert it so that the bits being masked out are
19731 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19732 // follow the sign bit for uniformity.
19733 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19734 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19735 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19736 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19737 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19738 if (NotMaskLZ == 64) return Result; // All zero mask.
19739
19740 // See if we have a continuous run of bits. If so, we have 0*1+0*
19741 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19742 return Result;
19743
19744 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19745 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19746 NotMaskLZ -= 64-V.getValueSizeInBits();
19747
19748 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19749 switch (MaskedBytes) {
19750 case 1:
19751 case 2:
19752 case 4: break;
19753 default: return Result; // All one mask, or 5-byte mask.
19754 }
19755
19756 // Verify that the first bit starts at a multiple of mask so that the access
19757 // is aligned the same as the access width.
19758 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19759
19760 // For narrowing to be valid, it must be the case that the load the
19761 // immediately preceding memory operation before the store.
19762 if (LD == Chain.getNode())
19763 ; // ok.
19764 else if (Chain->getOpcode() == ISD::TokenFactor &&
19765 SDValue(LD, 1).hasOneUse()) {
19766 // LD has only 1 chain use so they are no indirect dependencies.
19767 if (!LD->isOperandOf(Chain.getNode()))
19768 return Result;
19769 } else
19770 return Result; // Fail.
19771
19772 Result.first = MaskedBytes;
19773 Result.second = NotMaskTZ/8;
19774 return Result;
19775}
19776
19777/// Check to see if IVal is something that provides a value as specified by
19778/// MaskInfo. If so, replace the specified store with a narrower store of
19779/// truncated IVal.
19780static SDValue
19781ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19782 SDValue IVal, StoreSDNode *St,
19783 DAGCombiner *DC) {
19784 unsigned NumBytes = MaskInfo.first;
19785 unsigned ByteShift = MaskInfo.second;
19786 SelectionDAG &DAG = DC->getDAG();
19787
19788 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19789 // that uses this. If not, this is not a replacement.
19790 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19791 ByteShift*8, (ByteShift+NumBytes)*8);
19792 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19793
19794 // Check that it is legal on the target to do this. It is legal if the new
19795 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19796 // legalization. If the source type is legal, but the store type isn't, see
19797 // if we can use a truncating store.
19798 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19799 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19800 bool UseTruncStore;
19801 if (DC->isTypeLegal(VT))
19802 UseTruncStore = false;
19803 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19804 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19805 UseTruncStore = true;
19806 else
19807 return SDValue();
19808
19809 // Can't do this for indexed stores.
19810 if (St->isIndexed())
19811 return SDValue();
19812
19813 // Check that the target doesn't think this is a bad idea.
19814 if (St->getMemOperand() &&
19815 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19816 *St->getMemOperand()))
19817 return SDValue();
19818
19819 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19820 // shifted by ByteShift and truncated down to NumBytes.
19821 if (ByteShift) {
19822 SDLoc DL(IVal);
19823 IVal = DAG.getNode(
19824 ISD::SRL, DL, IVal.getValueType(), IVal,
19825 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
19826 }
19827
19828 // Figure out the offset for the store and the alignment of the access.
19829 unsigned StOffset;
19830 if (DAG.getDataLayout().isLittleEndian())
19831 StOffset = ByteShift;
19832 else
19833 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19834
19835 SDValue Ptr = St->getBasePtr();
19836 if (StOffset) {
19837 SDLoc DL(IVal);
19839 }
19840
19841 ++OpsNarrowed;
19842 if (UseTruncStore)
19843 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19844 St->getPointerInfo().getWithOffset(StOffset),
19845 VT, St->getOriginalAlign());
19846
19847 // Truncate down to the new size.
19848 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19849
19850 return DAG
19851 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19852 St->getPointerInfo().getWithOffset(StOffset),
19853 St->getOriginalAlign());
19854}
19855
19856/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19857/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19858/// narrowing the load and store if it would end up being a win for performance
19859/// or code size.
19860SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19861 StoreSDNode *ST = cast<StoreSDNode>(N);
19862 if (!ST->isSimple())
19863 return SDValue();
19864
19865 SDValue Chain = ST->getChain();
19866 SDValue Value = ST->getValue();
19867 SDValue Ptr = ST->getBasePtr();
19868 EVT VT = Value.getValueType();
19869
19870 if (ST->isTruncatingStore() || VT.isVector())
19871 return SDValue();
19872
19873 unsigned Opc = Value.getOpcode();
19874
19875 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19876 !Value.hasOneUse())
19877 return SDValue();
19878
19879 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19880 // is a byte mask indicating a consecutive number of bytes, check to see if
19881 // Y is known to provide just those bytes. If so, we try to replace the
19882 // load + replace + store sequence with a single (narrower) store, which makes
19883 // the load dead.
19885 std::pair<unsigned, unsigned> MaskedLoad;
19886 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19887 if (MaskedLoad.first)
19888 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19889 Value.getOperand(1), ST,this))
19890 return NewST;
19891
19892 // Or is commutative, so try swapping X and Y.
19893 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19894 if (MaskedLoad.first)
19895 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19896 Value.getOperand(0), ST,this))
19897 return NewST;
19898 }
19899
19901 return SDValue();
19902
19903 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19904 return SDValue();
19905
19906 SDValue N0 = Value.getOperand(0);
19907 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19908 Chain == SDValue(N0.getNode(), 1)) {
19909 LoadSDNode *LD = cast<LoadSDNode>(N0);
19910 if (LD->getBasePtr() != Ptr ||
19911 LD->getPointerInfo().getAddrSpace() !=
19912 ST->getPointerInfo().getAddrSpace())
19913 return SDValue();
19914
19915 // Find the type to narrow it the load / op / store to.
19916 SDValue N1 = Value.getOperand(1);
19917 unsigned BitWidth = N1.getValueSizeInBits();
19918 APInt Imm = N1->getAsAPIntVal();
19919 if (Opc == ISD::AND)
19921 if (Imm == 0 || Imm.isAllOnes())
19922 return SDValue();
19923 unsigned ShAmt = Imm.countr_zero();
19924 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19925 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19926 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19927 // The narrowing should be profitable, the load/store operation should be
19928 // legal (or custom) and the store size should be equal to the NewVT width.
19929 while (NewBW < BitWidth &&
19930 (NewVT.getStoreSizeInBits() != NewBW ||
19931 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19932 !TLI.isNarrowingProfitable(VT, NewVT))) {
19933 NewBW = NextPowerOf2(NewBW);
19934 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19935 }
19936 if (NewBW >= BitWidth)
19937 return SDValue();
19938
19939 // If the lsb changed does not start at the type bitwidth boundary,
19940 // start at the previous one.
19941 if (ShAmt % NewBW)
19942 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19944 std::min(BitWidth, ShAmt + NewBW));
19945 if ((Imm & Mask) == Imm) {
19946 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19947 if (Opc == ISD::AND)
19948 NewImm ^= APInt::getAllOnes(NewBW);
19949 uint64_t PtrOff = ShAmt / 8;
19950 // For big endian targets, we need to adjust the offset to the pointer to
19951 // load the correct bytes.
19952 if (DAG.getDataLayout().isBigEndian())
19953 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19954
19955 unsigned IsFast = 0;
19956 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19957 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19958 LD->getAddressSpace(), NewAlign,
19959 LD->getMemOperand()->getFlags(), &IsFast) ||
19960 !IsFast)
19961 return SDValue();
19962
19963 SDValue NewPtr =
19965 SDValue NewLD =
19966 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19967 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19968 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19969 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19970 DAG.getConstant(NewImm, SDLoc(Value),
19971 NewVT));
19972 SDValue NewST =
19973 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19974 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19975
19976 AddToWorklist(NewPtr.getNode());
19977 AddToWorklist(NewLD.getNode());
19978 AddToWorklist(NewVal.getNode());
19979 WorklistRemover DeadNodes(*this);
19980 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19981 ++OpsNarrowed;
19982 return NewST;
19983 }
19984 }
19985
19986 return SDValue();
19987}
19988
19989/// For a given floating point load / store pair, if the load value isn't used
19990/// by any other operations, then consider transforming the pair to integer
19991/// load / store operations if the target deems the transformation profitable.
19992SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19993 StoreSDNode *ST = cast<StoreSDNode>(N);
19994 SDValue Value = ST->getValue();
19995 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19996 Value.hasOneUse()) {
19997 LoadSDNode *LD = cast<LoadSDNode>(Value);
19998 EVT VT = LD->getMemoryVT();
19999 if (!VT.isFloatingPoint() ||
20000 VT != ST->getMemoryVT() ||
20001 LD->isNonTemporal() ||
20002 ST->isNonTemporal() ||
20003 LD->getPointerInfo().getAddrSpace() != 0 ||
20004 ST->getPointerInfo().getAddrSpace() != 0)
20005 return SDValue();
20006
20007 TypeSize VTSize = VT.getSizeInBits();
20008
20009 // We don't know the size of scalable types at compile time so we cannot
20010 // create an integer of the equivalent size.
20011 if (VTSize.isScalable())
20012 return SDValue();
20013
20014 unsigned FastLD = 0, FastST = 0;
20015 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
20016 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
20017 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
20020 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20021 *LD->getMemOperand(), &FastLD) ||
20022 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20023 *ST->getMemOperand(), &FastST) ||
20024 !FastLD || !FastST)
20025 return SDValue();
20026
20027 SDValue NewLD =
20028 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
20029 LD->getPointerInfo(), LD->getAlign());
20030
20031 SDValue NewST =
20032 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
20033 ST->getPointerInfo(), ST->getAlign());
20034
20035 AddToWorklist(NewLD.getNode());
20036 AddToWorklist(NewST.getNode());
20037 WorklistRemover DeadNodes(*this);
20038 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20039 ++LdStFP2Int;
20040 return NewST;
20041 }
20042
20043 return SDValue();
20044}
20045
20046// This is a helper function for visitMUL to check the profitability
20047// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20048// MulNode is the original multiply, AddNode is (add x, c1),
20049// and ConstNode is c2.
20050//
20051// If the (add x, c1) has multiple uses, we could increase
20052// the number of adds if we make this transformation.
20053// It would only be worth doing this if we can remove a
20054// multiply in the process. Check for that here.
20055// To illustrate:
20056// (A + c1) * c3
20057// (A + c2) * c3
20058// We're checking for cases where we have common "c3 * A" expressions.
20059bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20060 SDValue ConstNode) {
20061 APInt Val;
20062
20063 // If the add only has one use, and the target thinks the folding is
20064 // profitable or does not lead to worse code, this would be OK to do.
20065 if (AddNode->hasOneUse() &&
20066 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20067 return true;
20068
20069 // Walk all the users of the constant with which we're multiplying.
20070 for (SDNode *Use : ConstNode->uses()) {
20071 if (Use == MulNode) // This use is the one we're on right now. Skip it.
20072 continue;
20073
20074 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
20075 SDNode *OtherOp;
20076 SDNode *MulVar = AddNode.getOperand(0).getNode();
20077
20078 // OtherOp is what we're multiplying against the constant.
20079 if (Use->getOperand(0) == ConstNode)
20080 OtherOp = Use->getOperand(1).getNode();
20081 else
20082 OtherOp = Use->getOperand(0).getNode();
20083
20084 // Check to see if multiply is with the same operand of our "add".
20085 //
20086 // ConstNode = CONST
20087 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
20088 // ...
20089 // AddNode = (A + c1) <-- MulVar is A.
20090 // = AddNode * ConstNode <-- current visiting instruction.
20091 //
20092 // If we make this transformation, we will have a common
20093 // multiply (ConstNode * A) that we can save.
20094 if (OtherOp == MulVar)
20095 return true;
20096
20097 // Now check to see if a future expansion will give us a common
20098 // multiply.
20099 //
20100 // ConstNode = CONST
20101 // AddNode = (A + c1)
20102 // ... = AddNode * ConstNode <-- current visiting instruction.
20103 // ...
20104 // OtherOp = (A + c2)
20105 // Use = OtherOp * ConstNode <-- visiting Use.
20106 //
20107 // If we make this transformation, we will have a common
20108 // multiply (CONST * A) after we also do the same transformation
20109 // to the "t2" instruction.
20110 if (OtherOp->getOpcode() == ISD::ADD &&
20112 OtherOp->getOperand(0).getNode() == MulVar)
20113 return true;
20114 }
20115 }
20116
20117 // Didn't find a case where this would be profitable.
20118 return false;
20119}
20120
20121SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20122 unsigned NumStores) {
20125 SDLoc StoreDL(StoreNodes[0].MemNode);
20126
20127 for (unsigned i = 0; i < NumStores; ++i) {
20128 Visited.insert(StoreNodes[i].MemNode);
20129 }
20130
20131 // don't include nodes that are children or repeated nodes.
20132 for (unsigned i = 0; i < NumStores; ++i) {
20133 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20134 Chains.push_back(StoreNodes[i].MemNode->getChain());
20135 }
20136
20137 assert(!Chains.empty() && "Chain should have generated a chain");
20138 return DAG.getTokenFactor(StoreDL, Chains);
20139}
20140
20141bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20142 const Value *UnderlyingObj = nullptr;
20143 for (const auto &MemOp : StoreNodes) {
20144 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20145 // Pseudo value like stack frame has its own frame index and size, should
20146 // not use the first store's frame index for other frames.
20147 if (MMO->getPseudoValue())
20148 return false;
20149
20150 if (!MMO->getValue())
20151 return false;
20152
20153 const Value *Obj = getUnderlyingObject(MMO->getValue());
20154
20155 if (UnderlyingObj && UnderlyingObj != Obj)
20156 return false;
20157
20158 if (!UnderlyingObj)
20159 UnderlyingObj = Obj;
20160 }
20161
20162 return true;
20163}
20164
20165bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20166 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20167 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20168 // Make sure we have something to merge.
20169 if (NumStores < 2)
20170 return false;
20171
20172 assert((!UseTrunc || !UseVector) &&
20173 "This optimization cannot emit a vector truncating store");
20174
20175 // The latest Node in the DAG.
20176 SDLoc DL(StoreNodes[0].MemNode);
20177
20178 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20179 unsigned SizeInBits = NumStores * ElementSizeBits;
20180 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20181
20182 std::optional<MachineMemOperand::Flags> Flags;
20183 AAMDNodes AAInfo;
20184 for (unsigned I = 0; I != NumStores; ++I) {
20185 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20186 if (!Flags) {
20187 Flags = St->getMemOperand()->getFlags();
20188 AAInfo = St->getAAInfo();
20189 continue;
20190 }
20191 // Skip merging if there's an inconsistent flag.
20192 if (Flags != St->getMemOperand()->getFlags())
20193 return false;
20194 // Concatenate AA metadata.
20195 AAInfo = AAInfo.concat(St->getAAInfo());
20196 }
20197
20198 EVT StoreTy;
20199 if (UseVector) {
20200 unsigned Elts = NumStores * NumMemElts;
20201 // Get the type for the merged vector store.
20202 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20203 } else
20204 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20205
20206 SDValue StoredVal;
20207 if (UseVector) {
20208 if (IsConstantSrc) {
20209 SmallVector<SDValue, 8> BuildVector;
20210 for (unsigned I = 0; I != NumStores; ++I) {
20211 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20212 SDValue Val = St->getValue();
20213 // If constant is of the wrong type, convert it now. This comes up
20214 // when one of our stores was truncating.
20215 if (MemVT != Val.getValueType()) {
20216 Val = peekThroughBitcasts(Val);
20217 // Deal with constants of wrong size.
20218 if (ElementSizeBits != Val.getValueSizeInBits()) {
20219 auto *C = dyn_cast<ConstantSDNode>(Val);
20220 if (!C)
20221 // Not clear how to truncate FP values.
20222 // TODO: Handle truncation of build_vector constants
20223 return false;
20224
20225 EVT IntMemVT =
20227 Val = DAG.getConstant(C->getAPIntValue()
20228 .zextOrTrunc(Val.getValueSizeInBits())
20229 .zextOrTrunc(ElementSizeBits),
20230 SDLoc(C), IntMemVT);
20231 }
20232 // Make sure correctly size type is the correct type.
20233 Val = DAG.getBitcast(MemVT, Val);
20234 }
20235 BuildVector.push_back(Val);
20236 }
20237 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20239 DL, StoreTy, BuildVector);
20240 } else {
20242 for (unsigned i = 0; i < NumStores; ++i) {
20243 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20245 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20246 // type MemVT. If the underlying value is not the correct
20247 // type, but it is an extraction of an appropriate vector we
20248 // can recast Val to be of the correct type. This may require
20249 // converting between EXTRACT_VECTOR_ELT and
20250 // EXTRACT_SUBVECTOR.
20251 if ((MemVT != Val.getValueType()) &&
20254 EVT MemVTScalarTy = MemVT.getScalarType();
20255 // We may need to add a bitcast here to get types to line up.
20256 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20257 Val = DAG.getBitcast(MemVT, Val);
20258 } else if (MemVT.isVector() &&
20260 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20261 } else {
20262 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20264 SDValue Vec = Val.getOperand(0);
20265 SDValue Idx = Val.getOperand(1);
20266 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20267 }
20268 }
20269 Ops.push_back(Val);
20270 }
20271
20272 // Build the extracted vector elements back into a vector.
20273 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20275 DL, StoreTy, Ops);
20276 }
20277 } else {
20278 // We should always use a vector store when merging extracted vector
20279 // elements, so this path implies a store of constants.
20280 assert(IsConstantSrc && "Merged vector elements should use vector store");
20281
20282 APInt StoreInt(SizeInBits, 0);
20283
20284 // Construct a single integer constant which is made of the smaller
20285 // constant inputs.
20286 bool IsLE = DAG.getDataLayout().isLittleEndian();
20287 for (unsigned i = 0; i < NumStores; ++i) {
20288 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20289 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20290
20291 SDValue Val = St->getValue();
20292 Val = peekThroughBitcasts(Val);
20293 StoreInt <<= ElementSizeBits;
20294 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20295 StoreInt |= C->getAPIntValue()
20296 .zextOrTrunc(ElementSizeBits)
20297 .zextOrTrunc(SizeInBits);
20298 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20299 StoreInt |= C->getValueAPF()
20300 .bitcastToAPInt()
20301 .zextOrTrunc(ElementSizeBits)
20302 .zextOrTrunc(SizeInBits);
20303 // If fp truncation is necessary give up for now.
20304 if (MemVT.getSizeInBits() != ElementSizeBits)
20305 return false;
20306 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20308 // Not yet handled
20309 return false;
20310 } else {
20311 llvm_unreachable("Invalid constant element type");
20312 }
20313 }
20314
20315 // Create the new Load and Store operations.
20316 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20317 }
20318
20319 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20320 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20321 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20322
20323 // make sure we use trunc store if it's necessary to be legal.
20324 // When generate the new widen store, if the first store's pointer info can
20325 // not be reused, discard the pointer info except the address space because
20326 // now the widen store can not be represented by the original pointer info
20327 // which is for the narrow memory object.
20328 SDValue NewStore;
20329 if (!UseTrunc) {
20330 NewStore = DAG.getStore(
20331 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20332 CanReusePtrInfo
20333 ? FirstInChain->getPointerInfo()
20334 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20335 FirstInChain->getAlign(), *Flags, AAInfo);
20336 } else { // Must be realized as a trunc store
20337 EVT LegalizedStoredValTy =
20338 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20339 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20340 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20341 SDValue ExtendedStoreVal =
20342 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20343 LegalizedStoredValTy);
20344 NewStore = DAG.getTruncStore(
20345 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20346 CanReusePtrInfo
20347 ? FirstInChain->getPointerInfo()
20348 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20349 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20350 AAInfo);
20351 }
20352
20353 // Replace all merged stores with the new store.
20354 for (unsigned i = 0; i < NumStores; ++i)
20355 CombineTo(StoreNodes[i].MemNode, NewStore);
20356
20357 AddToWorklist(NewChain.getNode());
20358 return true;
20359}
20360
20361void DAGCombiner::getStoreMergeCandidates(
20362 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20363 SDNode *&RootNode) {
20364 // This holds the base pointer, index, and the offset in bytes from the base
20365 // pointer. We must have a base and an offset. Do not handle stores to undef
20366 // base pointers.
20368 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20369 return;
20370
20372 StoreSource StoreSrc = getStoreSource(Val);
20373 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20374
20375 // Match on loadbaseptr if relevant.
20376 EVT MemVT = St->getMemoryVT();
20377 BaseIndexOffset LBasePtr;
20378 EVT LoadVT;
20379 if (StoreSrc == StoreSource::Load) {
20380 auto *Ld = cast<LoadSDNode>(Val);
20381 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20382 LoadVT = Ld->getMemoryVT();
20383 // Load and store should be the same type.
20384 if (MemVT != LoadVT)
20385 return;
20386 // Loads must only have one use.
20387 if (!Ld->hasNUsesOfValue(1, 0))
20388 return;
20389 // The memory operands must not be volatile/indexed/atomic.
20390 // TODO: May be able to relax for unordered atomics (see D66309)
20391 if (!Ld->isSimple() || Ld->isIndexed())
20392 return;
20393 }
20394 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20395 int64_t &Offset) -> bool {
20396 // The memory operands must not be volatile/indexed/atomic.
20397 // TODO: May be able to relax for unordered atomics (see D66309)
20398 if (!Other->isSimple() || Other->isIndexed())
20399 return false;
20400 // Don't mix temporal stores with non-temporal stores.
20401 if (St->isNonTemporal() != Other->isNonTemporal())
20402 return false;
20404 return false;
20405 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20406 // Allow merging constants of different types as integers.
20407 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20408 : Other->getMemoryVT() != MemVT;
20409 switch (StoreSrc) {
20410 case StoreSource::Load: {
20411 if (NoTypeMatch)
20412 return false;
20413 // The Load's Base Ptr must also match.
20414 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20415 if (!OtherLd)
20416 return false;
20417 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20418 if (LoadVT != OtherLd->getMemoryVT())
20419 return false;
20420 // Loads must only have one use.
20421 if (!OtherLd->hasNUsesOfValue(1, 0))
20422 return false;
20423 // The memory operands must not be volatile/indexed/atomic.
20424 // TODO: May be able to relax for unordered atomics (see D66309)
20425 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20426 return false;
20427 // Don't mix temporal loads with non-temporal loads.
20428 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20429 return false;
20430 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20431 *OtherLd))
20432 return false;
20433 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20434 return false;
20435 break;
20436 }
20437 case StoreSource::Constant:
20438 if (NoTypeMatch)
20439 return false;
20440 if (getStoreSource(OtherBC) != StoreSource::Constant)
20441 return false;
20442 break;
20443 case StoreSource::Extract:
20444 // Do not merge truncated stores here.
20445 if (Other->isTruncatingStore())
20446 return false;
20447 if (!MemVT.bitsEq(OtherBC.getValueType()))
20448 return false;
20449 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20450 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20451 return false;
20452 break;
20453 default:
20454 llvm_unreachable("Unhandled store source for merging");
20455 }
20457 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20458 };
20459
20460 // Check if the pair of StoreNode and the RootNode already bail out many
20461 // times which is over the limit in dependence check.
20462 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20463 SDNode *RootNode) -> bool {
20464 auto RootCount = StoreRootCountMap.find(StoreNode);
20465 return RootCount != StoreRootCountMap.end() &&
20466 RootCount->second.first == RootNode &&
20467 RootCount->second.second > StoreMergeDependenceLimit;
20468 };
20469
20470 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20471 // This must be a chain use.
20472 if (UseIter.getOperandNo() != 0)
20473 return;
20474 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20476 int64_t PtrDiff;
20477 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20478 !OverLimitInDependenceCheck(OtherStore, RootNode))
20479 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20480 }
20481 };
20482
20483 // We looking for a root node which is an ancestor to all mergable
20484 // stores. We search up through a load, to our root and then down
20485 // through all children. For instance we will find Store{1,2,3} if
20486 // St is Store1, Store2. or Store3 where the root is not a load
20487 // which always true for nonvolatile ops. TODO: Expand
20488 // the search to find all valid candidates through multiple layers of loads.
20489 //
20490 // Root
20491 // |-------|-------|
20492 // Load Load Store3
20493 // | |
20494 // Store1 Store2
20495 //
20496 // FIXME: We should be able to climb and
20497 // descend TokenFactors to find candidates as well.
20498
20499 RootNode = St->getChain().getNode();
20500
20501 unsigned NumNodesExplored = 0;
20502 const unsigned MaxSearchNodes = 1024;
20503 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20504 RootNode = Ldn->getChain().getNode();
20505 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20506 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20507 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20508 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20509 TryToAddCandidate(I2);
20510 }
20511 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20512 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20513 TryToAddCandidate(I);
20514 }
20515 }
20516 } else {
20517 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20518 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20519 TryToAddCandidate(I);
20520 }
20521}
20522
20523// We need to check that merging these stores does not cause a loop in the
20524// DAG. Any store candidate may depend on another candidate indirectly through
20525// its operands. Check in parallel by searching up from operands of candidates.
20526bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20527 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20528 SDNode *RootNode) {
20529 // FIXME: We should be able to truncate a full search of
20530 // predecessors by doing a BFS and keeping tabs the originating
20531 // stores from which worklist nodes come from in a similar way to
20532 // TokenFactor simplfication.
20533
20536
20537 // RootNode is a predecessor to all candidates so we need not search
20538 // past it. Add RootNode (peeking through TokenFactors). Do not count
20539 // these towards size check.
20540
20541 Worklist.push_back(RootNode);
20542 while (!Worklist.empty()) {
20543 auto N = Worklist.pop_back_val();
20544 if (!Visited.insert(N).second)
20545 continue; // Already present in Visited.
20546 if (N->getOpcode() == ISD::TokenFactor) {
20547 for (SDValue Op : N->ops())
20548 Worklist.push_back(Op.getNode());
20549 }
20550 }
20551
20552 // Don't count pruning nodes towards max.
20553 unsigned int Max = 1024 + Visited.size();
20554 // Search Ops of store candidates.
20555 for (unsigned i = 0; i < NumStores; ++i) {
20556 SDNode *N = StoreNodes[i].MemNode;
20557 // Of the 4 Store Operands:
20558 // * Chain (Op 0) -> We have already considered these
20559 // in candidate selection, but only by following the
20560 // chain dependencies. We could still have a chain
20561 // dependency to a load, that has a non-chain dep to
20562 // another load, that depends on a store, etc. So it is
20563 // possible to have dependencies that consist of a mix
20564 // of chain and non-chain deps, and we need to include
20565 // chain operands in the analysis here..
20566 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20567 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20568 // but aren't necessarily fromt the same base node, so
20569 // cycles possible (e.g. via indexed store).
20570 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20571 // non-indexed stores). Not constant on all targets (e.g. ARM)
20572 // and so can participate in a cycle.
20573 for (const SDValue &Op : N->op_values())
20574 Worklist.push_back(Op.getNode());
20575 }
20576 // Search through DAG. We can stop early if we find a store node.
20577 for (unsigned i = 0; i < NumStores; ++i)
20578 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20579 Max)) {
20580 // If the searching bail out, record the StoreNode and RootNode in the
20581 // StoreRootCountMap. If we have seen the pair many times over a limit,
20582 // we won't add the StoreNode into StoreNodes set again.
20583 if (Visited.size() >= Max) {
20584 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20585 if (RootCount.first == RootNode)
20586 RootCount.second++;
20587 else
20588 RootCount = {RootNode, 1};
20589 }
20590 return false;
20591 }
20592 return true;
20593}
20594
20595unsigned
20596DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20597 int64_t ElementSizeBytes) const {
20598 while (true) {
20599 // Find a store past the width of the first store.
20600 size_t StartIdx = 0;
20601 while ((StartIdx + 1 < StoreNodes.size()) &&
20602 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20603 StoreNodes[StartIdx + 1].OffsetFromBase)
20604 ++StartIdx;
20605
20606 // Bail if we don't have enough candidates to merge.
20607 if (StartIdx + 1 >= StoreNodes.size())
20608 return 0;
20609
20610 // Trim stores that overlapped with the first store.
20611 if (StartIdx)
20612 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20613
20614 // Scan the memory operations on the chain and find the first
20615 // non-consecutive store memory address.
20616 unsigned NumConsecutiveStores = 1;
20617 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20618 // Check that the addresses are consecutive starting from the second
20619 // element in the list of stores.
20620 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20621 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20622 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20623 break;
20624 NumConsecutiveStores = i + 1;
20625 }
20626 if (NumConsecutiveStores > 1)
20627 return NumConsecutiveStores;
20628
20629 // There are no consecutive stores at the start of the list.
20630 // Remove the first store and try again.
20631 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20632 }
20633}
20634
20635bool DAGCombiner::tryStoreMergeOfConstants(
20636 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20637 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20638 LLVMContext &Context = *DAG.getContext();
20639 const DataLayout &DL = DAG.getDataLayout();
20640 int64_t ElementSizeBytes = MemVT.getStoreSize();
20641 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20642 bool MadeChange = false;
20643
20644 // Store the constants into memory as one consecutive store.
20645 while (NumConsecutiveStores >= 2) {
20646 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20647 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20648 Align FirstStoreAlign = FirstInChain->getAlign();
20649 unsigned LastLegalType = 1;
20650 unsigned LastLegalVectorType = 1;
20651 bool LastIntegerTrunc = false;
20652 bool NonZero = false;
20653 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20654 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20655 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20656 SDValue StoredVal = ST->getValue();
20657 bool IsElementZero = false;
20658 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20659 IsElementZero = C->isZero();
20660 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20661 IsElementZero = C->getConstantFPValue()->isNullValue();
20662 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20663 IsElementZero = true;
20664 if (IsElementZero) {
20665 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20666 FirstZeroAfterNonZero = i;
20667 }
20668 NonZero |= !IsElementZero;
20669
20670 // Find a legal type for the constant store.
20671 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20672 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20673 unsigned IsFast = 0;
20674
20675 // Break early when size is too large to be legal.
20676 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20677 break;
20678
20679 if (TLI.isTypeLegal(StoreTy) &&
20680 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20681 DAG.getMachineFunction()) &&
20682 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20683 *FirstInChain->getMemOperand(), &IsFast) &&
20684 IsFast) {
20685 LastIntegerTrunc = false;
20686 LastLegalType = i + 1;
20687 // Or check whether a truncstore is legal.
20688 } else if (TLI.getTypeAction(Context, StoreTy) ==
20690 EVT LegalizedStoredValTy =
20691 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20692 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20693 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20694 DAG.getMachineFunction()) &&
20695 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20696 *FirstInChain->getMemOperand(), &IsFast) &&
20697 IsFast) {
20698 LastIntegerTrunc = true;
20699 LastLegalType = i + 1;
20700 }
20701 }
20702
20703 // We only use vectors if the target allows it and the function is not
20704 // marked with the noimplicitfloat attribute.
20705 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20706 AllowVectors) {
20707 // Find a legal type for the vector store.
20708 unsigned Elts = (i + 1) * NumMemElts;
20709 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20710 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20711 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20712 TLI.allowsMemoryAccess(Context, DL, Ty,
20713 *FirstInChain->getMemOperand(), &IsFast) &&
20714 IsFast)
20715 LastLegalVectorType = i + 1;
20716 }
20717 }
20718
20719 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20720 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20721 bool UseTrunc = LastIntegerTrunc && !UseVector;
20722
20723 // Check if we found a legal integer type that creates a meaningful
20724 // merge.
20725 if (NumElem < 2) {
20726 // We know that candidate stores are in order and of correct
20727 // shape. While there is no mergeable sequence from the
20728 // beginning one may start later in the sequence. The only
20729 // reason a merge of size N could have failed where another of
20730 // the same size would not have, is if the alignment has
20731 // improved or we've dropped a non-zero value. Drop as many
20732 // candidates as we can here.
20733 unsigned NumSkip = 1;
20734 while ((NumSkip < NumConsecutiveStores) &&
20735 (NumSkip < FirstZeroAfterNonZero) &&
20736 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20737 NumSkip++;
20738
20739 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20740 NumConsecutiveStores -= NumSkip;
20741 continue;
20742 }
20743
20744 // Check that we can merge these candidates without causing a cycle.
20745 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20746 RootNode)) {
20747 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20748 NumConsecutiveStores -= NumElem;
20749 continue;
20750 }
20751
20752 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20753 /*IsConstantSrc*/ true,
20754 UseVector, UseTrunc);
20755
20756 // Remove merged stores for next iteration.
20757 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20758 NumConsecutiveStores -= NumElem;
20759 }
20760 return MadeChange;
20761}
20762
20763bool DAGCombiner::tryStoreMergeOfExtracts(
20764 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20765 EVT MemVT, SDNode *RootNode) {
20766 LLVMContext &Context = *DAG.getContext();
20767 const DataLayout &DL = DAG.getDataLayout();
20768 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20769 bool MadeChange = false;
20770
20771 // Loop on Consecutive Stores on success.
20772 while (NumConsecutiveStores >= 2) {
20773 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20774 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20775 Align FirstStoreAlign = FirstInChain->getAlign();
20776 unsigned NumStoresToMerge = 1;
20777 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20778 // Find a legal type for the vector store.
20779 unsigned Elts = (i + 1) * NumMemElts;
20780 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20781 unsigned IsFast = 0;
20782
20783 // Break early when size is too large to be legal.
20784 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20785 break;
20786
20787 if (TLI.isTypeLegal(Ty) &&
20788 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20789 TLI.allowsMemoryAccess(Context, DL, Ty,
20790 *FirstInChain->getMemOperand(), &IsFast) &&
20791 IsFast)
20792 NumStoresToMerge = i + 1;
20793 }
20794
20795 // Check if we found a legal integer type creating a meaningful
20796 // merge.
20797 if (NumStoresToMerge < 2) {
20798 // We know that candidate stores are in order and of correct
20799 // shape. While there is no mergeable sequence from the
20800 // beginning one may start later in the sequence. The only
20801 // reason a merge of size N could have failed where another of
20802 // the same size would not have, is if the alignment has
20803 // improved. Drop as many candidates as we can here.
20804 unsigned NumSkip = 1;
20805 while ((NumSkip < NumConsecutiveStores) &&
20806 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20807 NumSkip++;
20808
20809 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20810 NumConsecutiveStores -= NumSkip;
20811 continue;
20812 }
20813
20814 // Check that we can merge these candidates without causing a cycle.
20815 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20816 RootNode)) {
20817 StoreNodes.erase(StoreNodes.begin(),
20818 StoreNodes.begin() + NumStoresToMerge);
20819 NumConsecutiveStores -= NumStoresToMerge;
20820 continue;
20821 }
20822
20823 MadeChange |= mergeStoresOfConstantsOrVecElts(
20824 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20825 /*UseVector*/ true, /*UseTrunc*/ false);
20826
20827 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20828 NumConsecutiveStores -= NumStoresToMerge;
20829 }
20830 return MadeChange;
20831}
20832
20833bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20834 unsigned NumConsecutiveStores, EVT MemVT,
20835 SDNode *RootNode, bool AllowVectors,
20836 bool IsNonTemporalStore,
20837 bool IsNonTemporalLoad) {
20838 LLVMContext &Context = *DAG.getContext();
20839 const DataLayout &DL = DAG.getDataLayout();
20840 int64_t ElementSizeBytes = MemVT.getStoreSize();
20841 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20842 bool MadeChange = false;
20843
20844 // Look for load nodes which are used by the stored values.
20845 SmallVector<MemOpLink, 8> LoadNodes;
20846
20847 // Find acceptable loads. Loads need to have the same chain (token factor),
20848 // must not be zext, volatile, indexed, and they must be consecutive.
20849 BaseIndexOffset LdBasePtr;
20850
20851 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20852 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20854 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20855
20856 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20857 // If this is not the first ptr that we check.
20858 int64_t LdOffset = 0;
20859 if (LdBasePtr.getBase().getNode()) {
20860 // The base ptr must be the same.
20861 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20862 break;
20863 } else {
20864 // Check that all other base pointers are the same as this one.
20865 LdBasePtr = LdPtr;
20866 }
20867
20868 // We found a potential memory operand to merge.
20869 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20870 }
20871
20872 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20873 Align RequiredAlignment;
20874 bool NeedRotate = false;
20875 if (LoadNodes.size() == 2) {
20876 // If we have load/store pair instructions and we only have two values,
20877 // don't bother merging.
20878 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20879 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20880 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20881 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20882 break;
20883 }
20884 // If the loads are reversed, see if we can rotate the halves into place.
20885 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20886 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20887 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20888 if (Offset0 - Offset1 == ElementSizeBytes &&
20889 (hasOperation(ISD::ROTL, PairVT) ||
20890 hasOperation(ISD::ROTR, PairVT))) {
20891 std::swap(LoadNodes[0], LoadNodes[1]);
20892 NeedRotate = true;
20893 }
20894 }
20895 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20896 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20897 Align FirstStoreAlign = FirstInChain->getAlign();
20898 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20899
20900 // Scan the memory operations on the chain and find the first
20901 // non-consecutive load memory address. These variables hold the index in
20902 // the store node array.
20903
20904 unsigned LastConsecutiveLoad = 1;
20905
20906 // This variable refers to the size and not index in the array.
20907 unsigned LastLegalVectorType = 1;
20908 unsigned LastLegalIntegerType = 1;
20909 bool isDereferenceable = true;
20910 bool DoIntegerTruncate = false;
20911 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20912 SDValue LoadChain = FirstLoad->getChain();
20913 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20914 // All loads must share the same chain.
20915 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20916 break;
20917
20918 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20919 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20920 break;
20921 LastConsecutiveLoad = i;
20922
20923 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20924 isDereferenceable = false;
20925
20926 // Find a legal type for the vector store.
20927 unsigned Elts = (i + 1) * NumMemElts;
20928 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20929
20930 // Break early when size is too large to be legal.
20931 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20932 break;
20933
20934 unsigned IsFastSt = 0;
20935 unsigned IsFastLd = 0;
20936 // Don't try vector types if we need a rotate. We may still fail the
20937 // legality checks for the integer type, but we can't handle the rotate
20938 // case with vectors.
20939 // FIXME: We could use a shuffle in place of the rotate.
20940 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20941 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20942 DAG.getMachineFunction()) &&
20943 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20944 *FirstInChain->getMemOperand(), &IsFastSt) &&
20945 IsFastSt &&
20946 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20947 *FirstLoad->getMemOperand(), &IsFastLd) &&
20948 IsFastLd) {
20949 LastLegalVectorType = i + 1;
20950 }
20951
20952 // Find a legal type for the integer store.
20953 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20954 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20955 if (TLI.isTypeLegal(StoreTy) &&
20956 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20957 DAG.getMachineFunction()) &&
20958 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20959 *FirstInChain->getMemOperand(), &IsFastSt) &&
20960 IsFastSt &&
20961 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20962 *FirstLoad->getMemOperand(), &IsFastLd) &&
20963 IsFastLd) {
20964 LastLegalIntegerType = i + 1;
20965 DoIntegerTruncate = false;
20966 // Or check whether a truncstore and extload is legal.
20967 } else if (TLI.getTypeAction(Context, StoreTy) ==
20969 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20970 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20971 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20972 DAG.getMachineFunction()) &&
20973 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20974 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20975 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20976 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20977 *FirstInChain->getMemOperand(), &IsFastSt) &&
20978 IsFastSt &&
20979 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20980 *FirstLoad->getMemOperand(), &IsFastLd) &&
20981 IsFastLd) {
20982 LastLegalIntegerType = i + 1;
20983 DoIntegerTruncate = true;
20984 }
20985 }
20986 }
20987
20988 // Only use vector types if the vector type is larger than the integer
20989 // type. If they are the same, use integers.
20990 bool UseVectorTy =
20991 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20992 unsigned LastLegalType =
20993 std::max(LastLegalVectorType, LastLegalIntegerType);
20994
20995 // We add +1 here because the LastXXX variables refer to location while
20996 // the NumElem refers to array/index size.
20997 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20998 NumElem = std::min(LastLegalType, NumElem);
20999 Align FirstLoadAlign = FirstLoad->getAlign();
21000
21001 if (NumElem < 2) {
21002 // We know that candidate stores are in order and of correct
21003 // shape. While there is no mergeable sequence from the
21004 // beginning one may start later in the sequence. The only
21005 // reason a merge of size N could have failed where another of
21006 // the same size would not have is if the alignment or either
21007 // the load or store has improved. Drop as many candidates as we
21008 // can here.
21009 unsigned NumSkip = 1;
21010 while ((NumSkip < LoadNodes.size()) &&
21011 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21012 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21013 NumSkip++;
21014 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21015 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
21016 NumConsecutiveStores -= NumSkip;
21017 continue;
21018 }
21019
21020 // Check that we can merge these candidates without causing a cycle.
21021 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21022 RootNode)) {
21023 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21024 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21025 NumConsecutiveStores -= NumElem;
21026 continue;
21027 }
21028
21029 // Find if it is better to use vectors or integers to load and store
21030 // to memory.
21031 EVT JointMemOpVT;
21032 if (UseVectorTy) {
21033 // Find a legal type for the vector store.
21034 unsigned Elts = NumElem * NumMemElts;
21035 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21036 } else {
21037 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21038 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21039 }
21040
21041 SDLoc LoadDL(LoadNodes[0].MemNode);
21042 SDLoc StoreDL(StoreNodes[0].MemNode);
21043
21044 // The merged loads are required to have the same incoming chain, so
21045 // using the first's chain is acceptable.
21046
21047 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21048 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21049 AddToWorklist(NewStoreChain.getNode());
21050
21051 MachineMemOperand::Flags LdMMOFlags =
21052 isDereferenceable ? MachineMemOperand::MODereferenceable
21054 if (IsNonTemporalLoad)
21056
21057 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21058
21059 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21062
21063 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21064
21065 SDValue NewLoad, NewStore;
21066 if (UseVectorTy || !DoIntegerTruncate) {
21067 NewLoad = DAG.getLoad(
21068 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21069 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21070 SDValue StoreOp = NewLoad;
21071 if (NeedRotate) {
21072 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21073 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21074 "Unexpected type for rotate-able load pair");
21075 SDValue RotAmt =
21076 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21077 // Target can convert to the identical ROTR if it does not have ROTL.
21078 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21079 }
21080 NewStore = DAG.getStore(
21081 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21082 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21083 : MachinePointerInfo(FirstStoreAS),
21084 FirstStoreAlign, StMMOFlags);
21085 } else { // This must be the truncstore/extload case
21086 EVT ExtendedTy =
21087 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21088 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21089 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21090 FirstLoad->getPointerInfo(), JointMemOpVT,
21091 FirstLoadAlign, LdMMOFlags);
21092 NewStore = DAG.getTruncStore(
21093 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21094 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21095 : MachinePointerInfo(FirstStoreAS),
21096 JointMemOpVT, FirstInChain->getAlign(),
21097 FirstInChain->getMemOperand()->getFlags());
21098 }
21099
21100 // Transfer chain users from old loads to the new load.
21101 for (unsigned i = 0; i < NumElem; ++i) {
21102 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21104 SDValue(NewLoad.getNode(), 1));
21105 }
21106
21107 // Replace all stores with the new store. Recursively remove corresponding
21108 // values if they are no longer used.
21109 for (unsigned i = 0; i < NumElem; ++i) {
21110 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21111 CombineTo(StoreNodes[i].MemNode, NewStore);
21112 if (Val->use_empty())
21113 recursivelyDeleteUnusedNodes(Val.getNode());
21114 }
21115
21116 MadeChange = true;
21117 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21118 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21119 NumConsecutiveStores -= NumElem;
21120 }
21121 return MadeChange;
21122}
21123
21124bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21125 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21126 return false;
21127
21128 // TODO: Extend this function to merge stores of scalable vectors.
21129 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21130 // store since we know <vscale x 16 x i8> is exactly twice as large as
21131 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21132 EVT MemVT = St->getMemoryVT();
21133 if (MemVT.isScalableVT())
21134 return false;
21135 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21136 return false;
21137
21138 // This function cannot currently deal with non-byte-sized memory sizes.
21139 int64_t ElementSizeBytes = MemVT.getStoreSize();
21140 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21141 return false;
21142
21143 // Do not bother looking at stored values that are not constants, loads, or
21144 // extracted vector elements.
21145 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21146 const StoreSource StoreSrc = getStoreSource(StoredVal);
21147 if (StoreSrc == StoreSource::Unknown)
21148 return false;
21149
21150 SmallVector<MemOpLink, 8> StoreNodes;
21151 SDNode *RootNode;
21152 // Find potential store merge candidates by searching through chain sub-DAG
21153 getStoreMergeCandidates(St, StoreNodes, RootNode);
21154
21155 // Check if there is anything to merge.
21156 if (StoreNodes.size() < 2)
21157 return false;
21158
21159 // Sort the memory operands according to their distance from the
21160 // base pointer.
21161 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21162 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21163 });
21164
21165 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21166 Attribute::NoImplicitFloat);
21167 bool IsNonTemporalStore = St->isNonTemporal();
21168 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21169 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21170
21171 // Store Merge attempts to merge the lowest stores. This generally
21172 // works out as if successful, as the remaining stores are checked
21173 // after the first collection of stores is merged. However, in the
21174 // case that a non-mergeable store is found first, e.g., {p[-2],
21175 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21176 // mergeable cases. To prevent this, we prune such stores from the
21177 // front of StoreNodes here.
21178 bool MadeChange = false;
21179 while (StoreNodes.size() > 1) {
21180 unsigned NumConsecutiveStores =
21181 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21182 // There are no more stores in the list to examine.
21183 if (NumConsecutiveStores == 0)
21184 return MadeChange;
21185
21186 // We have at least 2 consecutive stores. Try to merge them.
21187 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21188 switch (StoreSrc) {
21189 case StoreSource::Constant:
21190 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21191 MemVT, RootNode, AllowVectors);
21192 break;
21193
21194 case StoreSource::Extract:
21195 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21196 MemVT, RootNode);
21197 break;
21198
21199 case StoreSource::Load:
21200 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21201 MemVT, RootNode, AllowVectors,
21202 IsNonTemporalStore, IsNonTemporalLoad);
21203 break;
21204
21205 default:
21206 llvm_unreachable("Unhandled store source type");
21207 }
21208 }
21209 return MadeChange;
21210}
21211
21212SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21213 SDLoc SL(ST);
21214 SDValue ReplStore;
21215
21216 // Replace the chain to avoid dependency.
21217 if (ST->isTruncatingStore()) {
21218 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21219 ST->getBasePtr(), ST->getMemoryVT(),
21220 ST->getMemOperand());
21221 } else {
21222 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21223 ST->getMemOperand());
21224 }
21225
21226 // Create token to keep both nodes around.
21227 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21228 MVT::Other, ST->getChain(), ReplStore);
21229
21230 // Make sure the new and old chains are cleaned up.
21231 AddToWorklist(Token.getNode());
21232
21233 // Don't add users to work list.
21234 return CombineTo(ST, Token, false);
21235}
21236
21237SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21238 SDValue Value = ST->getValue();
21239 if (Value.getOpcode() == ISD::TargetConstantFP)
21240 return SDValue();
21241
21242 if (!ISD::isNormalStore(ST))
21243 return SDValue();
21244
21245 SDLoc DL(ST);
21246
21247 SDValue Chain = ST->getChain();
21248 SDValue Ptr = ST->getBasePtr();
21249
21250 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21251
21252 // NOTE: If the original store is volatile, this transform must not increase
21253 // the number of stores. For example, on x86-32 an f64 can be stored in one
21254 // processor operation but an i64 (which is not legal) requires two. So the
21255 // transform should not be done in this case.
21256
21257 SDValue Tmp;
21258 switch (CFP->getSimpleValueType(0).SimpleTy) {
21259 default:
21260 llvm_unreachable("Unknown FP type");
21261 case MVT::f16: // We don't do this for these yet.
21262 case MVT::bf16:
21263 case MVT::f80:
21264 case MVT::f128:
21265 case MVT::ppcf128:
21266 return SDValue();
21267 case MVT::f32:
21268 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21269 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21270 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21271 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21272 MVT::i32);
21273 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21274 }
21275
21276 return SDValue();
21277 case MVT::f64:
21278 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21279 ST->isSimple()) ||
21280 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21281 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21282 getZExtValue(), SDLoc(CFP), MVT::i64);
21283 return DAG.getStore(Chain, DL, Tmp,
21284 Ptr, ST->getMemOperand());
21285 }
21286
21287 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21288 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21289 // Many FP stores are not made apparent until after legalize, e.g. for
21290 // argument passing. Since this is so common, custom legalize the
21291 // 64-bit integer store into two 32-bit stores.
21293 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21294 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21295 if (DAG.getDataLayout().isBigEndian())
21296 std::swap(Lo, Hi);
21297
21298 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21299 AAMDNodes AAInfo = ST->getAAInfo();
21300
21301 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21302 ST->getOriginalAlign(), MMOFlags, AAInfo);
21304 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21305 ST->getPointerInfo().getWithOffset(4),
21306 ST->getOriginalAlign(), MMOFlags, AAInfo);
21307 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21308 St0, St1);
21309 }
21310
21311 return SDValue();
21312 }
21313}
21314
21315// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21316//
21317// If a store of a load with an element inserted into it has no other
21318// uses in between the chain, then we can consider the vector store
21319// dead and replace it with just the single scalar element store.
21320SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21321 SDLoc DL(ST);
21322 SDValue Value = ST->getValue();
21323 SDValue Ptr = ST->getBasePtr();
21324 SDValue Chain = ST->getChain();
21325 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21326 return SDValue();
21327
21328 SDValue Elt = Value.getOperand(1);
21329 SDValue Idx = Value.getOperand(2);
21330
21331 // If the element isn't byte sized or is implicitly truncated then we can't
21332 // compute an offset.
21333 EVT EltVT = Elt.getValueType();
21334 if (!EltVT.isByteSized() ||
21335 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21336 return SDValue();
21337
21338 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21339 if (!Ld || Ld->getBasePtr() != Ptr ||
21340 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21341 !ISD::isNormalStore(ST) ||
21342 Ld->getAddressSpace() != ST->getAddressSpace() ||
21344 return SDValue();
21345
21346 unsigned IsFast;
21347 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21348 Elt.getValueType(), ST->getAddressSpace(),
21349 ST->getAlign(), ST->getMemOperand()->getFlags(),
21350 &IsFast) ||
21351 !IsFast)
21352 return SDValue();
21353
21354 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21355
21356 // If the offset is a known constant then try to recover the pointer
21357 // info
21358 SDValue NewPtr;
21359 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21360 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21361 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21362 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21363 } else {
21364 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21365 }
21366
21367 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21368 ST->getMemOperand()->getFlags());
21369}
21370
21371SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21372 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21373 SDValue Val = ST->getVal();
21374 EVT VT = Val.getValueType();
21375 EVT MemVT = ST->getMemoryVT();
21376
21377 if (MemVT.bitsLT(VT)) { // Is truncating store
21378 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21379 MemVT.getScalarSizeInBits());
21380 // See if we can simplify the operation with SimplifyDemandedBits, which
21381 // only works if the value has a single use.
21382 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21383 return SDValue(N, 0);
21384 }
21385
21386 return SDValue();
21387}
21388
21389SDValue DAGCombiner::visitSTORE(SDNode *N) {
21390 StoreSDNode *ST = cast<StoreSDNode>(N);
21391 SDValue Chain = ST->getChain();
21392 SDValue Value = ST->getValue();
21393 SDValue Ptr = ST->getBasePtr();
21394
21395 // If this is a store of a bit convert, store the input value if the
21396 // resultant store does not need a higher alignment than the original.
21397 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21398 ST->isUnindexed()) {
21399 EVT SVT = Value.getOperand(0).getValueType();
21400 // If the store is volatile, we only want to change the store type if the
21401 // resulting store is legal. Otherwise we might increase the number of
21402 // memory accesses. We don't care if the original type was legal or not
21403 // as we assume software couldn't rely on the number of accesses of an
21404 // illegal type.
21405 // TODO: May be able to relax for unordered atomics (see D66309)
21406 if (((!LegalOperations && ST->isSimple()) ||
21407 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21408 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21409 DAG, *ST->getMemOperand())) {
21410 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21411 ST->getMemOperand());
21412 }
21413 }
21414
21415 // Turn 'store undef, Ptr' -> nothing.
21416 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21417 return Chain;
21418
21419 // Try to infer better alignment information than the store already has.
21420 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21421 !ST->isAtomic()) {
21422 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21423 if (*Alignment > ST->getAlign() &&
21424 isAligned(*Alignment, ST->getSrcValueOffset())) {
21425 SDValue NewStore =
21426 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21427 ST->getMemoryVT(), *Alignment,
21428 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21429 // NewStore will always be N as we are only refining the alignment
21430 assert(NewStore.getNode() == N);
21431 (void)NewStore;
21432 }
21433 }
21434 }
21435
21436 // Try transforming a pair floating point load / store ops to integer
21437 // load / store ops.
21438 if (SDValue NewST = TransformFPLoadStorePair(N))
21439 return NewST;
21440
21441 // Try transforming several stores into STORE (BSWAP).
21442 if (SDValue Store = mergeTruncStores(ST))
21443 return Store;
21444
21445 if (ST->isUnindexed()) {
21446 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21447 // adjacent stores.
21448 if (findBetterNeighborChains(ST)) {
21449 // replaceStoreChain uses CombineTo, which handled all of the worklist
21450 // manipulation. Return the original node to not do anything else.
21451 return SDValue(ST, 0);
21452 }
21453 Chain = ST->getChain();
21454 }
21455
21456 // FIXME: is there such a thing as a truncating indexed store?
21457 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21458 Value.getValueType().isInteger() &&
21459 (!isa<ConstantSDNode>(Value) ||
21460 !cast<ConstantSDNode>(Value)->isOpaque())) {
21461 // Convert a truncating store of a extension into a standard store.
21462 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21463 Value.getOpcode() == ISD::SIGN_EXTEND ||
21464 Value.getOpcode() == ISD::ANY_EXTEND) &&
21465 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21466 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21467 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21468 ST->getMemOperand());
21469
21470 APInt TruncDemandedBits =
21471 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21472 ST->getMemoryVT().getScalarSizeInBits());
21473
21474 // See if we can simplify the operation with SimplifyDemandedBits, which
21475 // only works if the value has a single use.
21476 AddToWorklist(Value.getNode());
21477 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21478 // Re-visit the store if anything changed and the store hasn't been merged
21479 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21480 // node back to the worklist if necessary, but we also need to re-visit
21481 // the Store node itself.
21482 if (N->getOpcode() != ISD::DELETED_NODE)
21483 AddToWorklist(N);
21484 return SDValue(N, 0);
21485 }
21486
21487 // Otherwise, see if we can simplify the input to this truncstore with
21488 // knowledge that only the low bits are being used. For example:
21489 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21490 if (SDValue Shorter =
21491 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21492 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21493 ST->getMemOperand());
21494
21495 // If we're storing a truncated constant, see if we can simplify it.
21496 // TODO: Move this to targetShrinkDemandedConstant?
21497 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21498 if (!Cst->isOpaque()) {
21499 const APInt &CValue = Cst->getAPIntValue();
21500 APInt NewVal = CValue & TruncDemandedBits;
21501 if (NewVal != CValue) {
21502 SDValue Shorter =
21503 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21504 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21505 ST->getMemoryVT(), ST->getMemOperand());
21506 }
21507 }
21508 }
21509
21510 // If this is a load followed by a store to the same location, then the store
21511 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21512 // TODO: Add big-endian truncate support with test coverage.
21513 // TODO: Can relax for unordered atomics (see D66309)
21514 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21516 : Value;
21517 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21518 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21519 ST->isUnindexed() && ST->isSimple() &&
21520 Ld->getAddressSpace() == ST->getAddressSpace() &&
21521 // There can't be any side effects between the load and store, such as
21522 // a call or store.
21524 // The store is dead, remove it.
21525 return Chain;
21526 }
21527 }
21528
21529 // Try scalarizing vector stores of loads where we only change one element
21530 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21531 return NewST;
21532
21533 // TODO: Can relax for unordered atomics (see D66309)
21534 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21535 if (ST->isUnindexed() && ST->isSimple() &&
21536 ST1->isUnindexed() && ST1->isSimple()) {
21537 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21538 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21539 ST->getAddressSpace() == ST1->getAddressSpace()) {
21540 // If this is a store followed by a store with the same value to the
21541 // same location, then the store is dead/noop.
21542 return Chain;
21543 }
21544
21545 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21546 !ST1->getBasePtr().isUndef() &&
21547 ST->getAddressSpace() == ST1->getAddressSpace()) {
21548 // If we consider two stores and one smaller in size is a scalable
21549 // vector type and another one a bigger size store with a fixed type,
21550 // then we could not allow the scalable store removal because we don't
21551 // know its final size in the end.
21552 if (ST->getMemoryVT().isScalableVector() ||
21553 ST1->getMemoryVT().isScalableVector()) {
21554 if (ST1->getBasePtr() == Ptr &&
21555 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21556 ST->getMemoryVT().getStoreSize())) {
21557 CombineTo(ST1, ST1->getChain());
21558 return SDValue(N, 0);
21559 }
21560 } else {
21561 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21562 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21563 // If this is a store who's preceding store to a subset of the current
21564 // location and no one other node is chained to that store we can
21565 // effectively drop the store. Do not remove stores to undef as they
21566 // may be used as data sinks.
21567 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21568 ChainBase,
21569 ST1->getMemoryVT().getFixedSizeInBits())) {
21570 CombineTo(ST1, ST1->getChain());
21571 return SDValue(N, 0);
21572 }
21573 }
21574 }
21575 }
21576 }
21577
21578 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21579 // truncating store. We can do this even if this is already a truncstore.
21580 if ((Value.getOpcode() == ISD::FP_ROUND ||
21581 Value.getOpcode() == ISD::TRUNCATE) &&
21582 Value->hasOneUse() && ST->isUnindexed() &&
21583 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21584 ST->getMemoryVT(), LegalOperations)) {
21585 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21586 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21587 }
21588
21589 // Always perform this optimization before types are legal. If the target
21590 // prefers, also try this after legalization to catch stores that were created
21591 // by intrinsics or other nodes.
21592 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21593 while (true) {
21594 // There can be multiple store sequences on the same chain.
21595 // Keep trying to merge store sequences until we are unable to do so
21596 // or until we merge the last store on the chain.
21597 bool Changed = mergeConsecutiveStores(ST);
21598 if (!Changed) break;
21599 // Return N as merge only uses CombineTo and no worklist clean
21600 // up is necessary.
21601 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21602 return SDValue(N, 0);
21603 }
21604 }
21605
21606 // Try transforming N to an indexed store.
21607 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21608 return SDValue(N, 0);
21609
21610 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21611 //
21612 // Make sure to do this only after attempting to merge stores in order to
21613 // avoid changing the types of some subset of stores due to visit order,
21614 // preventing their merging.
21615 if (isa<ConstantFPSDNode>(ST->getValue())) {
21616 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21617 return NewSt;
21618 }
21619
21620 if (SDValue NewSt = splitMergedValStore(ST))
21621 return NewSt;
21622
21623 return ReduceLoadOpStoreWidth(N);
21624}
21625
21626SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21627 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21628 if (!LifetimeEnd->hasOffset())
21629 return SDValue();
21630
21631 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21632 LifetimeEnd->getOffset(), false);
21633
21634 // We walk up the chains to find stores.
21635 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21636 while (!Chains.empty()) {
21637 SDValue Chain = Chains.pop_back_val();
21638 if (!Chain.hasOneUse())
21639 continue;
21640 switch (Chain.getOpcode()) {
21641 case ISD::TokenFactor:
21642 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21643 Chains.push_back(Chain.getOperand(--Nops));
21644 break;
21646 case ISD::LIFETIME_END:
21647 // We can forward past any lifetime start/end that can be proven not to
21648 // alias the node.
21649 if (!mayAlias(Chain.getNode(), N))
21650 Chains.push_back(Chain.getOperand(0));
21651 break;
21652 case ISD::STORE: {
21653 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21654 // TODO: Can relax for unordered atomics (see D66309)
21655 if (!ST->isSimple() || ST->isIndexed())
21656 continue;
21657 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21658 // The bounds of a scalable store are not known until runtime, so this
21659 // store cannot be elided.
21660 if (StoreSize.isScalable())
21661 continue;
21662 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21663 // If we store purely within object bounds just before its lifetime ends,
21664 // we can remove the store.
21665 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21666 StoreSize.getFixedValue() * 8)) {
21667 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21668 dbgs() << "\nwithin LIFETIME_END of : ";
21669 LifetimeEndBase.dump(); dbgs() << "\n");
21670 CombineTo(ST, ST->getChain());
21671 return SDValue(N, 0);
21672 }
21673 }
21674 }
21675 }
21676 return SDValue();
21677}
21678
21679/// For the instruction sequence of store below, F and I values
21680/// are bundled together as an i64 value before being stored into memory.
21681/// Sometimes it is more efficent to generate separate stores for F and I,
21682/// which can remove the bitwise instructions or sink them to colder places.
21683///
21684/// (store (or (zext (bitcast F to i32) to i64),
21685/// (shl (zext I to i64), 32)), addr) -->
21686/// (store F, addr) and (store I, addr+4)
21687///
21688/// Similarly, splitting for other merged store can also be beneficial, like:
21689/// For pair of {i32, i32}, i64 store --> two i32 stores.
21690/// For pair of {i32, i16}, i64 store --> two i32 stores.
21691/// For pair of {i16, i16}, i32 store --> two i16 stores.
21692/// For pair of {i16, i8}, i32 store --> two i16 stores.
21693/// For pair of {i8, i8}, i16 store --> two i8 stores.
21694///
21695/// We allow each target to determine specifically which kind of splitting is
21696/// supported.
21697///
21698/// The store patterns are commonly seen from the simple code snippet below
21699/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21700/// void goo(const std::pair<int, float> &);
21701/// hoo() {
21702/// ...
21703/// goo(std::make_pair(tmp, ftmp));
21704/// ...
21705/// }
21706///
21707SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21708 if (OptLevel == CodeGenOptLevel::None)
21709 return SDValue();
21710
21711 // Can't change the number of memory accesses for a volatile store or break
21712 // atomicity for an atomic one.
21713 if (!ST->isSimple())
21714 return SDValue();
21715
21716 SDValue Val = ST->getValue();
21717 SDLoc DL(ST);
21718
21719 // Match OR operand.
21720 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21721 return SDValue();
21722
21723 // Match SHL operand and get Lower and Higher parts of Val.
21724 SDValue Op1 = Val.getOperand(0);
21725 SDValue Op2 = Val.getOperand(1);
21726 SDValue Lo, Hi;
21727 if (Op1.getOpcode() != ISD::SHL) {
21728 std::swap(Op1, Op2);
21729 if (Op1.getOpcode() != ISD::SHL)
21730 return SDValue();
21731 }
21732 Lo = Op2;
21733 Hi = Op1.getOperand(0);
21734 if (!Op1.hasOneUse())
21735 return SDValue();
21736
21737 // Match shift amount to HalfValBitSize.
21738 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21739 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21740 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21741 return SDValue();
21742
21743 // Lo and Hi are zero-extended from int with size less equal than 32
21744 // to i64.
21745 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21746 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21747 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21748 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21749 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21750 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21751 return SDValue();
21752
21753 // Use the EVT of low and high parts before bitcast as the input
21754 // of target query.
21755 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21756 ? Lo.getOperand(0).getValueType()
21757 : Lo.getValueType();
21758 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21759 ? Hi.getOperand(0).getValueType()
21760 : Hi.getValueType();
21761 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21762 return SDValue();
21763
21764 // Start to split store.
21765 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21766 AAMDNodes AAInfo = ST->getAAInfo();
21767
21768 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21769 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21770 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21771 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21772
21773 SDValue Chain = ST->getChain();
21774 SDValue Ptr = ST->getBasePtr();
21775 // Lower value store.
21776 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21777 ST->getOriginalAlign(), MMOFlags, AAInfo);
21778 Ptr =
21779 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21780 // Higher value store.
21781 SDValue St1 = DAG.getStore(
21782 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21783 ST->getOriginalAlign(), MMOFlags, AAInfo);
21784 return St1;
21785}
21786
21787// Merge an insertion into an existing shuffle:
21788// (insert_vector_elt (vector_shuffle X, Y, Mask),
21789// .(extract_vector_elt X, N), InsIndex)
21790// --> (vector_shuffle X, Y, NewMask)
21791// and variations where shuffle operands may be CONCAT_VECTORS.
21793 SmallVectorImpl<int> &NewMask, SDValue Elt,
21794 unsigned InsIndex) {
21795 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21796 !isa<ConstantSDNode>(Elt.getOperand(1)))
21797 return false;
21798
21799 // Vec's operand 0 is using indices from 0 to N-1 and
21800 // operand 1 from N to 2N - 1, where N is the number of
21801 // elements in the vectors.
21802 SDValue InsertVal0 = Elt.getOperand(0);
21803 int ElementOffset = -1;
21804
21805 // We explore the inputs of the shuffle in order to see if we find the
21806 // source of the extract_vector_elt. If so, we can use it to modify the
21807 // shuffle rather than perform an insert_vector_elt.
21809 ArgWorkList.emplace_back(Mask.size(), Y);
21810 ArgWorkList.emplace_back(0, X);
21811
21812 while (!ArgWorkList.empty()) {
21813 int ArgOffset;
21814 SDValue ArgVal;
21815 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21816
21817 if (ArgVal == InsertVal0) {
21818 ElementOffset = ArgOffset;
21819 break;
21820 }
21821
21822 // Peek through concat_vector.
21823 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21824 int CurrentArgOffset =
21825 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21826 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21827 for (SDValue Op : reverse(ArgVal->ops())) {
21828 CurrentArgOffset -= Step;
21829 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21830 }
21831
21832 // Make sure we went through all the elements and did not screw up index
21833 // computation.
21834 assert(CurrentArgOffset == ArgOffset);
21835 }
21836 }
21837
21838 // If we failed to find a match, see if we can replace an UNDEF shuffle
21839 // operand.
21840 if (ElementOffset == -1) {
21841 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21842 return false;
21843 ElementOffset = Mask.size();
21844 Y = InsertVal0;
21845 }
21846
21847 NewMask.assign(Mask.begin(), Mask.end());
21848 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21849 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21850 "NewMask[InsIndex] is out of bound");
21851 return true;
21852}
21853
21854// Merge an insertion into an existing shuffle:
21855// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21856// InsIndex)
21857// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21858// CONCAT_VECTORS.
21859SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21860 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21861 "Expected extract_vector_elt");
21862 SDValue InsertVal = N->getOperand(1);
21863 SDValue Vec = N->getOperand(0);
21864
21865 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21866 if (!SVN || !Vec.hasOneUse())
21867 return SDValue();
21868
21869 ArrayRef<int> Mask = SVN->getMask();
21870 SDValue X = Vec.getOperand(0);
21871 SDValue Y = Vec.getOperand(1);
21872
21873 SmallVector<int, 16> NewMask(Mask);
21874 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21875 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21876 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21877 if (LegalShuffle)
21878 return LegalShuffle;
21879 }
21880
21881 return SDValue();
21882}
21883
21884// Convert a disguised subvector insertion into a shuffle:
21885// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21886// bitcast(shuffle (bitcast V), (extended X), Mask)
21887// Note: We do not use an insert_subvector node because that requires a
21888// legal subvector type.
21889SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21890 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21891 "Expected extract_vector_elt");
21892 SDValue InsertVal = N->getOperand(1);
21893
21894 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21895 !InsertVal.getOperand(0).getValueType().isVector())
21896 return SDValue();
21897
21898 SDValue SubVec = InsertVal.getOperand(0);
21899 SDValue DestVec = N->getOperand(0);
21900 EVT SubVecVT = SubVec.getValueType();
21901 EVT VT = DestVec.getValueType();
21902 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21903 // If the source only has a single vector element, the cost of creating adding
21904 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21905 if (NumSrcElts == 1)
21906 return SDValue();
21907 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21908 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21909
21910 // Step 1: Create a shuffle mask that implements this insert operation. The
21911 // vector that we are inserting into will be operand 0 of the shuffle, so
21912 // those elements are just 'i'. The inserted subvector is in the first
21913 // positions of operand 1 of the shuffle. Example:
21914 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21915 SmallVector<int, 16> Mask(NumMaskVals);
21916 for (unsigned i = 0; i != NumMaskVals; ++i) {
21917 if (i / NumSrcElts == InsIndex)
21918 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21919 else
21920 Mask[i] = i;
21921 }
21922
21923 // Bail out if the target can not handle the shuffle we want to create.
21924 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21925 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21926 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21927 return SDValue();
21928
21929 // Step 2: Create a wide vector from the inserted source vector by appending
21930 // undefined elements. This is the same size as our destination vector.
21931 SDLoc DL(N);
21932 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21933 ConcatOps[0] = SubVec;
21934 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21935
21936 // Step 3: Shuffle in the padded subvector.
21937 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21938 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21939 AddToWorklist(PaddedSubV.getNode());
21940 AddToWorklist(DestVecBC.getNode());
21941 AddToWorklist(Shuf.getNode());
21942 return DAG.getBitcast(VT, Shuf);
21943}
21944
21945// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21946// possible and the new load will be quick. We use more loads but less shuffles
21947// and inserts.
21948SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21949 EVT VT = N->getValueType(0);
21950
21951 // InsIndex is expected to be the first of last lane.
21952 if (!VT.isFixedLengthVector() ||
21953 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21954 return SDValue();
21955
21956 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21957 // depending on the InsIndex.
21958 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21959 SDValue Scalar = N->getOperand(1);
21960 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21961 return InsIndex == P.index() || P.value() < 0 ||
21962 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21963 (InsIndex == VT.getVectorNumElements() - 1 &&
21964 P.value() == (int)P.index() + 1);
21965 }))
21966 return SDValue();
21967
21968 // We optionally skip over an extend so long as both loads are extended in the
21969 // same way from the same type.
21970 unsigned Extend = 0;
21971 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21972 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21973 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21974 Extend = Scalar.getOpcode();
21975 Scalar = Scalar.getOperand(0);
21976 }
21977
21978 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21979 if (!ScalarLoad)
21980 return SDValue();
21981
21982 SDValue Vec = Shuffle->getOperand(0);
21983 if (Extend) {
21984 if (Vec.getOpcode() != Extend)
21985 return SDValue();
21986 Vec = Vec.getOperand(0);
21987 }
21988 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21989 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21990 return SDValue();
21991
21992 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21993 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21994 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21995 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21996 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21997 return SDValue();
21998
21999 // Check that the offset between the pointers to produce a single continuous
22000 // load.
22001 if (InsIndex == 0) {
22002 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
22003 -1))
22004 return SDValue();
22005 } else {
22007 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22008 return SDValue();
22009 }
22010
22011 // And that the new unaligned load will be fast.
22012 unsigned IsFast = 0;
22013 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22014 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22015 Vec.getValueType(), VecLoad->getAddressSpace(),
22016 NewAlign, VecLoad->getMemOperand()->getFlags(),
22017 &IsFast) ||
22018 !IsFast)
22019 return SDValue();
22020
22021 // Calculate the new Ptr and create the new load.
22022 SDLoc DL(N);
22023 SDValue Ptr = ScalarLoad->getBasePtr();
22024 if (InsIndex != 0)
22025 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22026 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22027 MachinePointerInfo PtrInfo =
22028 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22029 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22030
22031 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22032 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22033 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22034 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22035 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22036}
22037
22038SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22039 SDValue InVec = N->getOperand(0);
22040 SDValue InVal = N->getOperand(1);
22041 SDValue EltNo = N->getOperand(2);
22042 SDLoc DL(N);
22043
22044 EVT VT = InVec.getValueType();
22045 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22046
22047 // Insert into out-of-bounds element is undefined.
22048 if (IndexC && VT.isFixedLengthVector() &&
22049 IndexC->getZExtValue() >= VT.getVectorNumElements())
22050 return DAG.getUNDEF(VT);
22051
22052 // Remove redundant insertions:
22053 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22054 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22055 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22056 return InVec;
22057
22058 if (!IndexC) {
22059 // If this is variable insert to undef vector, it might be better to splat:
22060 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22061 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22062 return DAG.getSplat(VT, DL, InVal);
22063 return SDValue();
22064 }
22065
22066 if (VT.isScalableVector())
22067 return SDValue();
22068
22069 unsigned NumElts = VT.getVectorNumElements();
22070
22071 // We must know which element is being inserted for folds below here.
22072 unsigned Elt = IndexC->getZExtValue();
22073
22074 // Handle <1 x ???> vector insertion special cases.
22075 if (NumElts == 1) {
22076 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22077 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22078 InVal.getOperand(0).getValueType() == VT &&
22079 isNullConstant(InVal.getOperand(1)))
22080 return InVal.getOperand(0);
22081 }
22082
22083 // Canonicalize insert_vector_elt dag nodes.
22084 // Example:
22085 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22086 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22087 //
22088 // Do this only if the child insert_vector node has one use; also
22089 // do this only if indices are both constants and Idx1 < Idx0.
22090 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22091 && isa<ConstantSDNode>(InVec.getOperand(2))) {
22092 unsigned OtherElt = InVec.getConstantOperandVal(2);
22093 if (Elt < OtherElt) {
22094 // Swap nodes.
22095 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22096 InVec.getOperand(0), InVal, EltNo);
22097 AddToWorklist(NewOp.getNode());
22098 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22099 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22100 }
22101 }
22102
22103 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22104 return Shuf;
22105
22106 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22107 return Shuf;
22108
22109 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22110 return Shuf;
22111
22112 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22113 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22114 // vXi1 vector - we don't need to recurse.
22115 if (NumElts == 1)
22116 return DAG.getBuildVector(VT, DL, {InVal});
22117
22118 // If we haven't already collected the element, insert into the op list.
22119 EVT MaxEltVT = InVal.getValueType();
22120 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22121 unsigned Idx) {
22122 if (!Ops[Idx]) {
22123 Ops[Idx] = Elt;
22124 if (VT.isInteger()) {
22125 EVT EltVT = Elt.getValueType();
22126 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22127 }
22128 }
22129 };
22130
22131 // Ensure all the operands are the same value type, fill any missing
22132 // operands with UNDEF and create the BUILD_VECTOR.
22133 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22134 assert(Ops.size() == NumElts && "Unexpected vector size");
22135 for (SDValue &Op : Ops) {
22136 if (Op)
22137 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22138 else
22139 Op = DAG.getUNDEF(MaxEltVT);
22140 }
22141 return DAG.getBuildVector(VT, DL, Ops);
22142 };
22143
22144 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22145 Ops[Elt] = InVal;
22146
22147 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22148 for (SDValue CurVec = InVec; CurVec;) {
22149 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22150 if (CurVec.isUndef())
22151 return CanonicalizeBuildVector(Ops);
22152
22153 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22154 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22155 for (unsigned I = 0; I != NumElts; ++I)
22156 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22157 return CanonicalizeBuildVector(Ops);
22158 }
22159
22160 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22161 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22162 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22163 return CanonicalizeBuildVector(Ops);
22164 }
22165
22166 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22167 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22168 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22169 if (CurIdx->getAPIntValue().ult(NumElts)) {
22170 unsigned Idx = CurIdx->getZExtValue();
22171 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22172
22173 // Found entire BUILD_VECTOR.
22174 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22175 return CanonicalizeBuildVector(Ops);
22176
22177 CurVec = CurVec->getOperand(0);
22178 continue;
22179 }
22180
22181 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22182 // update the shuffle mask (and second operand if we started with unary
22183 // shuffle) and create a new legal shuffle.
22184 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22185 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22186 SDValue LHS = SVN->getOperand(0);
22187 SDValue RHS = SVN->getOperand(1);
22189 bool Merged = true;
22190 for (auto I : enumerate(Ops)) {
22191 SDValue &Op = I.value();
22192 if (Op) {
22193 SmallVector<int, 16> NewMask;
22194 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22195 Merged = false;
22196 break;
22197 }
22198 Mask = std::move(NewMask);
22199 }
22200 }
22201 if (Merged)
22202 if (SDValue NewShuffle =
22203 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22204 return NewShuffle;
22205 }
22206
22207 // If all insertions are zero value, try to convert to AND mask.
22208 // TODO: Do this for -1 with OR mask?
22209 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22210 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22211 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22212 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22213 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22215 for (unsigned I = 0; I != NumElts; ++I)
22216 Mask[I] = Ops[I] ? Zero : AllOnes;
22217 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22218 DAG.getBuildVector(VT, DL, Mask));
22219 }
22220
22221 // Failed to find a match in the chain - bail.
22222 break;
22223 }
22224
22225 // See if we can fill in the missing constant elements as zeros.
22226 // TODO: Should we do this for any constant?
22227 APInt DemandedZeroElts = APInt::getZero(NumElts);
22228 for (unsigned I = 0; I != NumElts; ++I)
22229 if (!Ops[I])
22230 DemandedZeroElts.setBit(I);
22231
22232 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22233 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22234 : DAG.getConstantFP(0, DL, MaxEltVT);
22235 for (unsigned I = 0; I != NumElts; ++I)
22236 if (!Ops[I])
22237 Ops[I] = Zero;
22238
22239 return CanonicalizeBuildVector(Ops);
22240 }
22241 }
22242
22243 return SDValue();
22244}
22245
22246SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22247 SDValue EltNo,
22248 LoadSDNode *OriginalLoad) {
22249 assert(OriginalLoad->isSimple());
22250
22251 EVT ResultVT = EVE->getValueType(0);
22252 EVT VecEltVT = InVecVT.getVectorElementType();
22253
22254 // If the vector element type is not a multiple of a byte then we are unable
22255 // to correctly compute an address to load only the extracted element as a
22256 // scalar.
22257 if (!VecEltVT.isByteSized())
22258 return SDValue();
22259
22260 ISD::LoadExtType ExtTy =
22261 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22262 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22263 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22264 return SDValue();
22265
22266 Align Alignment = OriginalLoad->getAlign();
22268 SDLoc DL(EVE);
22269 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22270 int Elt = ConstEltNo->getZExtValue();
22271 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22272 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22273 Alignment = commonAlignment(Alignment, PtrOff);
22274 } else {
22275 // Discard the pointer info except the address space because the memory
22276 // operand can't represent this new access since the offset is variable.
22277 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22278 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22279 }
22280
22281 unsigned IsFast = 0;
22282 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22283 OriginalLoad->getAddressSpace(), Alignment,
22284 OriginalLoad->getMemOperand()->getFlags(),
22285 &IsFast) ||
22286 !IsFast)
22287 return SDValue();
22288
22289 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22290 InVecVT, EltNo);
22291
22292 // We are replacing a vector load with a scalar load. The new load must have
22293 // identical memory op ordering to the original.
22294 SDValue Load;
22295 if (ResultVT.bitsGT(VecEltVT)) {
22296 // If the result type of vextract is wider than the load, then issue an
22297 // extending load instead.
22298 ISD::LoadExtType ExtType =
22299 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22300 : ISD::EXTLOAD;
22301 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22302 NewPtr, MPI, VecEltVT, Alignment,
22303 OriginalLoad->getMemOperand()->getFlags(),
22304 OriginalLoad->getAAInfo());
22305 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22306 } else {
22307 // The result type is narrower or the same width as the vector element
22308 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22309 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22310 OriginalLoad->getAAInfo());
22311 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22312 if (ResultVT.bitsLT(VecEltVT))
22313 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22314 else
22315 Load = DAG.getBitcast(ResultVT, Load);
22316 }
22317 ++OpsNarrowed;
22318 return Load;
22319}
22320
22321/// Transform a vector binary operation into a scalar binary operation by moving
22322/// the math/logic after an extract element of a vector.
22324 const SDLoc &DL, bool LegalOperations) {
22325 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22326 SDValue Vec = ExtElt->getOperand(0);
22327 SDValue Index = ExtElt->getOperand(1);
22328 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22329 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22330 Vec->getNumValues() != 1)
22331 return SDValue();
22332
22333 // Targets may want to avoid this to prevent an expensive register transfer.
22334 if (!TLI.shouldScalarizeBinop(Vec))
22335 return SDValue();
22336
22337 // Extracting an element of a vector constant is constant-folded, so this
22338 // transform is just replacing a vector op with a scalar op while moving the
22339 // extract.
22340 SDValue Op0 = Vec.getOperand(0);
22341 SDValue Op1 = Vec.getOperand(1);
22342 APInt SplatVal;
22343 if (isAnyConstantBuildVector(Op0, true) ||
22344 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22345 isAnyConstantBuildVector(Op1, true) ||
22346 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22347 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22348 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22349 EVT VT = ExtElt->getValueType(0);
22350 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22351 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22352 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22353 }
22354
22355 return SDValue();
22356}
22357
22358// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22359// recursively analyse all of it's users. and try to model themselves as
22360// bit sequence extractions. If all of them agree on the new, narrower element
22361// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22362// new element type, do so now.
22363// This is mainly useful to recover from legalization that scalarized
22364// the vector as wide elements, but tries to rebuild it with narrower elements.
22365//
22366// Some more nodes could be modelled if that helps cover interesting patterns.
22367bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22368 SDNode *N) {
22369 // We perform this optimization post type-legalization because
22370 // the type-legalizer often scalarizes integer-promoted vectors.
22371 // Performing this optimization before may cause legalizaton cycles.
22372 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22373 return false;
22374
22375 // TODO: Add support for big-endian.
22376 if (DAG.getDataLayout().isBigEndian())
22377 return false;
22378
22379 SDValue VecOp = N->getOperand(0);
22380 EVT VecVT = VecOp.getValueType();
22381 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22382
22383 // We must start with a constant extraction index.
22384 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22385 if (!IndexC)
22386 return false;
22387
22388 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22389 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22390
22391 // TODO: deal with the case of implicit anyext of the extraction.
22392 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22393 EVT ScalarVT = N->getValueType(0);
22394 if (VecVT.getScalarType() != ScalarVT)
22395 return false;
22396
22397 // TODO: deal with the cases other than everything being integer-typed.
22398 if (!ScalarVT.isScalarInteger())
22399 return false;
22400
22401 struct Entry {
22403
22404 // Which bits of VecOp does it contain?
22405 unsigned BitPos;
22406 int NumBits;
22407 // NOTE: the actual width of \p Producer may be wider than NumBits!
22408
22409 Entry(Entry &&) = default;
22410 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22411 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22412
22413 Entry() = delete;
22414 Entry(const Entry &) = delete;
22415 Entry &operator=(const Entry &) = delete;
22416 Entry &operator=(Entry &&) = delete;
22417 };
22418 SmallVector<Entry, 32> Worklist;
22420
22421 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22422 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22423 /*NumBits=*/VecEltBitWidth);
22424
22425 while (!Worklist.empty()) {
22426 Entry E = Worklist.pop_back_val();
22427 // Does the node not even use any of the VecOp bits?
22428 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22429 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22430 return false; // Let's allow the other combines clean this up first.
22431 // Did we fail to model any of the users of the Producer?
22432 bool ProducerIsLeaf = false;
22433 // Look at each user of this Producer.
22434 for (SDNode *User : E.Producer->uses()) {
22435 switch (User->getOpcode()) {
22436 // TODO: support ISD::BITCAST
22437 // TODO: support ISD::ANY_EXTEND
22438 // TODO: support ISD::ZERO_EXTEND
22439 // TODO: support ISD::SIGN_EXTEND
22440 case ISD::TRUNCATE:
22441 // Truncation simply means we keep position, but extract less bits.
22442 Worklist.emplace_back(User, E.BitPos,
22443 /*NumBits=*/User->getValueSizeInBits(0));
22444 break;
22445 // TODO: support ISD::SRA
22446 // TODO: support ISD::SHL
22447 case ISD::SRL:
22448 // We should be shifting the Producer by a constant amount.
22449 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22450 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22451 // Logical right-shift means that we start extraction later,
22452 // but stop it at the same position we did previously.
22453 unsigned ShAmt = ShAmtC->getZExtValue();
22454 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22455 break;
22456 }
22457 [[fallthrough]];
22458 default:
22459 // We can not model this user of the Producer.
22460 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22461 ProducerIsLeaf = true;
22462 // Profitability check: all users that we can not model
22463 // must be ISD::BUILD_VECTOR's.
22464 if (User->getOpcode() != ISD::BUILD_VECTOR)
22465 return false;
22466 break;
22467 }
22468 }
22469 if (ProducerIsLeaf)
22470 Leafs.emplace_back(std::move(E));
22471 }
22472
22473 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22474
22475 // If we are still at the same element granularity, give up,
22476 if (NewVecEltBitWidth == VecEltBitWidth)
22477 return false;
22478
22479 // The vector width must be a multiple of the new element width.
22480 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22481 return false;
22482
22483 // All leafs must agree on the new element width.
22484 // All leafs must not expect any "padding" bits ontop of that width.
22485 // All leafs must start extraction from multiple of that width.
22486 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22487 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22488 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22489 E.BitPos % NewVecEltBitWidth == 0;
22490 }))
22491 return false;
22492
22493 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22494 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22495 VecVT.getSizeInBits() / NewVecEltBitWidth);
22496
22497 if (LegalTypes &&
22498 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22499 return false;
22500
22501 if (LegalOperations &&
22502 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22504 return false;
22505
22506 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22507 for (const Entry &E : Leafs) {
22508 SDLoc DL(E.Producer);
22509 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22510 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22511 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22512 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22513 DAG.getVectorIdxConstant(NewIndex, DL));
22514 CombineTo(E.Producer, V);
22515 }
22516
22517 return true;
22518}
22519
22520SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22521 SDValue VecOp = N->getOperand(0);
22522 SDValue Index = N->getOperand(1);
22523 EVT ScalarVT = N->getValueType(0);
22524 EVT VecVT = VecOp.getValueType();
22525 if (VecOp.isUndef())
22526 return DAG.getUNDEF(ScalarVT);
22527
22528 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22529 //
22530 // This only really matters if the index is non-constant since other combines
22531 // on the constant elements already work.
22532 SDLoc DL(N);
22533 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22534 Index == VecOp.getOperand(2)) {
22535 SDValue Elt = VecOp.getOperand(1);
22536 AddUsersToWorklist(VecOp.getNode());
22537 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22538 }
22539
22540 // (vextract (scalar_to_vector val, 0) -> val
22541 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22542 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22543 if (DAG.isKnownNeverZero(Index))
22544 return DAG.getUNDEF(ScalarVT);
22545
22546 // Check if the result type doesn't match the inserted element type.
22547 // The inserted element and extracted element may have mismatched bitwidth.
22548 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22549 SDValue InOp = VecOp.getOperand(0);
22550 if (InOp.getValueType() != ScalarVT) {
22551 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22552 if (InOp.getValueType().bitsGT(ScalarVT))
22553 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22554 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22555 }
22556 return InOp;
22557 }
22558
22559 // extract_vector_elt of out-of-bounds element -> UNDEF
22560 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22561 if (IndexC && VecVT.isFixedLengthVector() &&
22562 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22563 return DAG.getUNDEF(ScalarVT);
22564
22565 // extract_vector_elt (build_vector x, y), 1 -> y
22566 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22567 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22568 TLI.isTypeLegal(VecVT)) {
22569 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22570 VecVT.isFixedLengthVector()) &&
22571 "BUILD_VECTOR used for scalable vectors");
22572 unsigned IndexVal =
22573 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22574 SDValue Elt = VecOp.getOperand(IndexVal);
22575 EVT InEltVT = Elt.getValueType();
22576
22577 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22578 isNullConstant(Elt)) {
22579 // Sometimes build_vector's scalar input types do not match result type.
22580 if (ScalarVT == InEltVT)
22581 return Elt;
22582
22583 // TODO: It may be useful to truncate if free if the build_vector
22584 // implicitly converts.
22585 }
22586 }
22587
22588 if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations))
22589 return BO;
22590
22591 if (VecVT.isScalableVector())
22592 return SDValue();
22593
22594 // All the code from this point onwards assumes fixed width vectors, but it's
22595 // possible that some of the combinations could be made to work for scalable
22596 // vectors too.
22597 unsigned NumElts = VecVT.getVectorNumElements();
22598 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22599
22600 // See if the extracted element is constant, in which case fold it if its
22601 // a legal fp immediate.
22602 if (IndexC && ScalarVT.isFloatingPoint()) {
22603 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22604 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22605 if (KnownElt.isConstant()) {
22606 APFloat CstFP =
22607 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22608 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22609 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22610 }
22611 }
22612
22613 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22614 // there are regressions on multiple targets without it. We can end up with a
22615 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22616 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22617 VecOp.hasOneUse()) {
22618 // The vector index of the LSBs of the source depend on the endian-ness.
22619 bool IsLE = DAG.getDataLayout().isLittleEndian();
22620 unsigned ExtractIndex = IndexC->getZExtValue();
22621 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22622 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22623 SDValue BCSrc = VecOp.getOperand(0);
22624 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22625 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22626
22627 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22628 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22629 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22630 // trunc i64 X to i32
22631 SDValue X = BCSrc.getOperand(0);
22632 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22633 "Extract element and scalar to vector can't change element type "
22634 "from FP to integer.");
22635 unsigned XBitWidth = X.getValueSizeInBits();
22636 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22637
22638 // An extract element return value type can be wider than its vector
22639 // operand element type. In that case, the high bits are undefined, so
22640 // it's possible that we may need to extend rather than truncate.
22641 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22642 assert(XBitWidth % VecEltBitWidth == 0 &&
22643 "Scalar bitwidth must be a multiple of vector element bitwidth");
22644 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22645 }
22646 }
22647 }
22648
22649 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22650 // We only perform this optimization before the op legalization phase because
22651 // we may introduce new vector instructions which are not backed by TD
22652 // patterns. For example on AVX, extracting elements from a wide vector
22653 // without using extract_subvector. However, if we can find an underlying
22654 // scalar value, then we can always use that.
22655 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22656 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22657 // Find the new index to extract from.
22658 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22659
22660 // Extracting an undef index is undef.
22661 if (OrigElt == -1)
22662 return DAG.getUNDEF(ScalarVT);
22663
22664 // Select the right vector half to extract from.
22665 SDValue SVInVec;
22666 if (OrigElt < (int)NumElts) {
22667 SVInVec = VecOp.getOperand(0);
22668 } else {
22669 SVInVec = VecOp.getOperand(1);
22670 OrigElt -= NumElts;
22671 }
22672
22673 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22674 SDValue InOp = SVInVec.getOperand(OrigElt);
22675 if (InOp.getValueType() != ScalarVT) {
22676 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22677 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22678 }
22679
22680 return InOp;
22681 }
22682
22683 // FIXME: We should handle recursing on other vector shuffles and
22684 // scalar_to_vector here as well.
22685
22686 if (!LegalOperations ||
22687 // FIXME: Should really be just isOperationLegalOrCustom.
22690 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22691 DAG.getVectorIdxConstant(OrigElt, DL));
22692 }
22693 }
22694
22695 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22696 // simplify it based on the (valid) extraction indices.
22697 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22698 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22699 Use->getOperand(0) == VecOp &&
22700 isa<ConstantSDNode>(Use->getOperand(1));
22701 })) {
22702 APInt DemandedElts = APInt::getZero(NumElts);
22703 for (SDNode *Use : VecOp->uses()) {
22704 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22705 if (CstElt->getAPIntValue().ult(NumElts))
22706 DemandedElts.setBit(CstElt->getZExtValue());
22707 }
22708 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22709 // We simplified the vector operand of this extract element. If this
22710 // extract is not dead, visit it again so it is folded properly.
22711 if (N->getOpcode() != ISD::DELETED_NODE)
22712 AddToWorklist(N);
22713 return SDValue(N, 0);
22714 }
22715 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22716 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22717 // We simplified the vector operand of this extract element. If this
22718 // extract is not dead, visit it again so it is folded properly.
22719 if (N->getOpcode() != ISD::DELETED_NODE)
22720 AddToWorklist(N);
22721 return SDValue(N, 0);
22722 }
22723 }
22724
22725 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22726 return SDValue(N, 0);
22727
22728 // Everything under here is trying to match an extract of a loaded value.
22729 // If the result of load has to be truncated, then it's not necessarily
22730 // profitable.
22731 bool BCNumEltsChanged = false;
22732 EVT ExtVT = VecVT.getVectorElementType();
22733 EVT LVT = ExtVT;
22734 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22735 return SDValue();
22736
22737 if (VecOp.getOpcode() == ISD::BITCAST) {
22738 // Don't duplicate a load with other uses.
22739 if (!VecOp.hasOneUse())
22740 return SDValue();
22741
22742 EVT BCVT = VecOp.getOperand(0).getValueType();
22743 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22744 return SDValue();
22745 if (NumElts != BCVT.getVectorNumElements())
22746 BCNumEltsChanged = true;
22747 VecOp = VecOp.getOperand(0);
22748 ExtVT = BCVT.getVectorElementType();
22749 }
22750
22751 // extract (vector load $addr), i --> load $addr + i * size
22752 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22753 ISD::isNormalLoad(VecOp.getNode()) &&
22754 !Index->hasPredecessor(VecOp.getNode())) {
22755 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22756 if (VecLoad && VecLoad->isSimple())
22757 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22758 }
22759
22760 // Perform only after legalization to ensure build_vector / vector_shuffle
22761 // optimizations have already been done.
22762 if (!LegalOperations || !IndexC)
22763 return SDValue();
22764
22765 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22766 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22767 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22768 int Elt = IndexC->getZExtValue();
22769 LoadSDNode *LN0 = nullptr;
22770 if (ISD::isNormalLoad(VecOp.getNode())) {
22771 LN0 = cast<LoadSDNode>(VecOp);
22772 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22773 VecOp.getOperand(0).getValueType() == ExtVT &&
22774 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22775 // Don't duplicate a load with other uses.
22776 if (!VecOp.hasOneUse())
22777 return SDValue();
22778
22779 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22780 }
22781 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22782 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22783 // =>
22784 // (load $addr+1*size)
22785
22786 // Don't duplicate a load with other uses.
22787 if (!VecOp.hasOneUse())
22788 return SDValue();
22789
22790 // If the bit convert changed the number of elements, it is unsafe
22791 // to examine the mask.
22792 if (BCNumEltsChanged)
22793 return SDValue();
22794
22795 // Select the input vector, guarding against out of range extract vector.
22796 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22797 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22798
22799 if (VecOp.getOpcode() == ISD::BITCAST) {
22800 // Don't duplicate a load with other uses.
22801 if (!VecOp.hasOneUse())
22802 return SDValue();
22803
22804 VecOp = VecOp.getOperand(0);
22805 }
22806 if (ISD::isNormalLoad(VecOp.getNode())) {
22807 LN0 = cast<LoadSDNode>(VecOp);
22808 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22809 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22810 }
22811 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22812 VecVT.getVectorElementType() == ScalarVT &&
22813 (!LegalTypes ||
22814 TLI.isTypeLegal(
22816 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22817 // -> extract_vector_elt a, 0
22818 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22819 // -> extract_vector_elt a, 1
22820 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22821 // -> extract_vector_elt b, 0
22822 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22823 // -> extract_vector_elt b, 1
22824 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22825 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22826 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22827 Index.getValueType());
22828
22829 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22831 ConcatVT.getVectorElementType(),
22832 ConcatOp, NewIdx);
22833 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22834 }
22835
22836 // Make sure we found a non-volatile load and the extractelement is
22837 // the only use.
22838 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22839 return SDValue();
22840
22841 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22842 if (Elt == -1)
22843 return DAG.getUNDEF(LVT);
22844
22845 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22846}
22847
22848// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22849SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22850 // We perform this optimization post type-legalization because
22851 // the type-legalizer often scalarizes integer-promoted vectors.
22852 // Performing this optimization before may create bit-casts which
22853 // will be type-legalized to complex code sequences.
22854 // We perform this optimization only before the operation legalizer because we
22855 // may introduce illegal operations.
22856 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22857 return SDValue();
22858
22859 unsigned NumInScalars = N->getNumOperands();
22860 SDLoc DL(N);
22861 EVT VT = N->getValueType(0);
22862
22863 // Check to see if this is a BUILD_VECTOR of a bunch of values
22864 // which come from any_extend or zero_extend nodes. If so, we can create
22865 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22866 // optimizations. We do not handle sign-extend because we can't fill the sign
22867 // using shuffles.
22868 EVT SourceType = MVT::Other;
22869 bool AllAnyExt = true;
22870
22871 for (unsigned i = 0; i != NumInScalars; ++i) {
22872 SDValue In = N->getOperand(i);
22873 // Ignore undef inputs.
22874 if (In.isUndef()) continue;
22875
22876 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22877 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22878
22879 // Abort if the element is not an extension.
22880 if (!ZeroExt && !AnyExt) {
22881 SourceType = MVT::Other;
22882 break;
22883 }
22884
22885 // The input is a ZeroExt or AnyExt. Check the original type.
22886 EVT InTy = In.getOperand(0).getValueType();
22887
22888 // Check that all of the widened source types are the same.
22889 if (SourceType == MVT::Other)
22890 // First time.
22891 SourceType = InTy;
22892 else if (InTy != SourceType) {
22893 // Multiple income types. Abort.
22894 SourceType = MVT::Other;
22895 break;
22896 }
22897
22898 // Check if all of the extends are ANY_EXTENDs.
22899 AllAnyExt &= AnyExt;
22900 }
22901
22902 // In order to have valid types, all of the inputs must be extended from the
22903 // same source type and all of the inputs must be any or zero extend.
22904 // Scalar sizes must be a power of two.
22905 EVT OutScalarTy = VT.getScalarType();
22906 bool ValidTypes =
22907 SourceType != MVT::Other &&
22908 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22909 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22910
22911 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22912 // turn into a single shuffle instruction.
22913 if (!ValidTypes)
22914 return SDValue();
22915
22916 // If we already have a splat buildvector, then don't fold it if it means
22917 // introducing zeros.
22918 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22919 return SDValue();
22920
22921 bool isLE = DAG.getDataLayout().isLittleEndian();
22922 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22923 assert(ElemRatio > 1 && "Invalid element size ratio");
22924 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22925 DAG.getConstant(0, DL, SourceType);
22926
22927 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22928 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22929
22930 // Populate the new build_vector
22931 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22932 SDValue Cast = N->getOperand(i);
22933 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22934 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22935 Cast.isUndef()) && "Invalid cast opcode");
22936 SDValue In;
22937 if (Cast.isUndef())
22938 In = DAG.getUNDEF(SourceType);
22939 else
22940 In = Cast->getOperand(0);
22941 unsigned Index = isLE ? (i * ElemRatio) :
22942 (i * ElemRatio + (ElemRatio - 1));
22943
22944 assert(Index < Ops.size() && "Invalid index");
22945 Ops[Index] = In;
22946 }
22947
22948 // The type of the new BUILD_VECTOR node.
22949 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22950 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22951 "Invalid vector size");
22952 // Check if the new vector type is legal.
22953 if (!isTypeLegal(VecVT) ||
22954 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22956 return SDValue();
22957
22958 // Make the new BUILD_VECTOR.
22959 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22960
22961 // The new BUILD_VECTOR node has the potential to be further optimized.
22962 AddToWorklist(BV.getNode());
22963 // Bitcast to the desired type.
22964 return DAG.getBitcast(VT, BV);
22965}
22966
22967// Simplify (build_vec (trunc $1)
22968// (trunc (srl $1 half-width))
22969// (trunc (srl $1 (2 * half-width))))
22970// to (bitcast $1)
22971SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22972 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22973
22974 EVT VT = N->getValueType(0);
22975
22976 // Don't run this before LegalizeTypes if VT is legal.
22977 // Targets may have other preferences.
22978 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22979 return SDValue();
22980
22981 // Only for little endian
22982 if (!DAG.getDataLayout().isLittleEndian())
22983 return SDValue();
22984
22985 SDLoc DL(N);
22986 EVT OutScalarTy = VT.getScalarType();
22987 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22988
22989 // Only for power of two types to be sure that bitcast works well
22990 if (!isPowerOf2_64(ScalarTypeBitsize))
22991 return SDValue();
22992
22993 unsigned NumInScalars = N->getNumOperands();
22994
22995 // Look through bitcasts
22996 auto PeekThroughBitcast = [](SDValue Op) {
22997 if (Op.getOpcode() == ISD::BITCAST)
22998 return Op.getOperand(0);
22999 return Op;
23000 };
23001
23002 // The source value where all the parts are extracted.
23003 SDValue Src;
23004 for (unsigned i = 0; i != NumInScalars; ++i) {
23005 SDValue In = PeekThroughBitcast(N->getOperand(i));
23006 // Ignore undef inputs.
23007 if (In.isUndef()) continue;
23008
23009 if (In.getOpcode() != ISD::TRUNCATE)
23010 return SDValue();
23011
23012 In = PeekThroughBitcast(In.getOperand(0));
23013
23014 if (In.getOpcode() != ISD::SRL) {
23015 // For now only build_vec without shuffling, handle shifts here in the
23016 // future.
23017 if (i != 0)
23018 return SDValue();
23019
23020 Src = In;
23021 } else {
23022 // In is SRL
23023 SDValue part = PeekThroughBitcast(In.getOperand(0));
23024
23025 if (!Src) {
23026 Src = part;
23027 } else if (Src != part) {
23028 // Vector parts do not stem from the same variable
23029 return SDValue();
23030 }
23031
23032 SDValue ShiftAmtVal = In.getOperand(1);
23033 if (!isa<ConstantSDNode>(ShiftAmtVal))
23034 return SDValue();
23035
23036 uint64_t ShiftAmt = In.getConstantOperandVal(1);
23037
23038 // The extracted value is not extracted at the right position
23039 if (ShiftAmt != i * ScalarTypeBitsize)
23040 return SDValue();
23041 }
23042 }
23043
23044 // Only cast if the size is the same
23045 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23046 return SDValue();
23047
23048 return DAG.getBitcast(VT, Src);
23049}
23050
23051SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23052 ArrayRef<int> VectorMask,
23053 SDValue VecIn1, SDValue VecIn2,
23054 unsigned LeftIdx, bool DidSplitVec) {
23055 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23056
23057 EVT VT = N->getValueType(0);
23058 EVT InVT1 = VecIn1.getValueType();
23059 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23060
23061 unsigned NumElems = VT.getVectorNumElements();
23062 unsigned ShuffleNumElems = NumElems;
23063
23064 // If we artificially split a vector in two already, then the offsets in the
23065 // operands will all be based off of VecIn1, even those in VecIn2.
23066 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23067
23068 uint64_t VTSize = VT.getFixedSizeInBits();
23069 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23070 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23071
23072 assert(InVT2Size <= InVT1Size &&
23073 "Inputs must be sorted to be in non-increasing vector size order.");
23074
23075 // We can't generate a shuffle node with mismatched input and output types.
23076 // Try to make the types match the type of the output.
23077 if (InVT1 != VT || InVT2 != VT) {
23078 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23079 // If the output vector length is a multiple of both input lengths,
23080 // we can concatenate them and pad the rest with undefs.
23081 unsigned NumConcats = VTSize / InVT1Size;
23082 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23083 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23084 ConcatOps[0] = VecIn1;
23085 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23086 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23087 VecIn2 = SDValue();
23088 } else if (InVT1Size == VTSize * 2) {
23089 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23090 return SDValue();
23091
23092 if (!VecIn2.getNode()) {
23093 // If we only have one input vector, and it's twice the size of the
23094 // output, split it in two.
23095 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23096 DAG.getVectorIdxConstant(NumElems, DL));
23097 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23098 // Since we now have shorter input vectors, adjust the offset of the
23099 // second vector's start.
23100 Vec2Offset = NumElems;
23101 } else {
23102 assert(InVT2Size <= InVT1Size &&
23103 "Second input is not going to be larger than the first one.");
23104
23105 // VecIn1 is wider than the output, and we have another, possibly
23106 // smaller input. Pad the smaller input with undefs, shuffle at the
23107 // input vector width, and extract the output.
23108 // The shuffle type is different than VT, so check legality again.
23109 if (LegalOperations &&
23111 return SDValue();
23112
23113 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23114 // lower it back into a BUILD_VECTOR. So if the inserted type is
23115 // illegal, don't even try.
23116 if (InVT1 != InVT2) {
23117 if (!TLI.isTypeLegal(InVT2))
23118 return SDValue();
23119 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23120 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23121 }
23122 ShuffleNumElems = NumElems * 2;
23123 }
23124 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23125 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23126 ConcatOps[0] = VecIn2;
23127 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23128 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23129 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23130 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23131 return SDValue();
23132 // If dest vector has less than two elements, then use shuffle and extract
23133 // from larger regs will cost even more.
23134 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23135 return SDValue();
23136 assert(InVT2Size <= InVT1Size &&
23137 "Second input is not going to be larger than the first one.");
23138
23139 // VecIn1 is wider than the output, and we have another, possibly
23140 // smaller input. Pad the smaller input with undefs, shuffle at the
23141 // input vector width, and extract the output.
23142 // The shuffle type is different than VT, so check legality again.
23143 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23144 return SDValue();
23145
23146 if (InVT1 != InVT2) {
23147 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23148 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23149 }
23150 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23151 } else {
23152 // TODO: Support cases where the length mismatch isn't exactly by a
23153 // factor of 2.
23154 // TODO: Move this check upwards, so that if we have bad type
23155 // mismatches, we don't create any DAG nodes.
23156 return SDValue();
23157 }
23158 }
23159
23160 // Initialize mask to undef.
23161 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23162
23163 // Only need to run up to the number of elements actually used, not the
23164 // total number of elements in the shuffle - if we are shuffling a wider
23165 // vector, the high lanes should be set to undef.
23166 for (unsigned i = 0; i != NumElems; ++i) {
23167 if (VectorMask[i] <= 0)
23168 continue;
23169
23170 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23171 if (VectorMask[i] == (int)LeftIdx) {
23172 Mask[i] = ExtIndex;
23173 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23174 Mask[i] = Vec2Offset + ExtIndex;
23175 }
23176 }
23177
23178 // The type the input vectors may have changed above.
23179 InVT1 = VecIn1.getValueType();
23180
23181 // If we already have a VecIn2, it should have the same type as VecIn1.
23182 // If we don't, get an undef/zero vector of the appropriate type.
23183 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23184 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23185
23186 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23187 if (ShuffleNumElems > NumElems)
23188 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23189
23190 return Shuffle;
23191}
23192
23194 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23195
23196 // First, determine where the build vector is not undef.
23197 // TODO: We could extend this to handle zero elements as well as undefs.
23198 int NumBVOps = BV->getNumOperands();
23199 int ZextElt = -1;
23200 for (int i = 0; i != NumBVOps; ++i) {
23201 SDValue Op = BV->getOperand(i);
23202 if (Op.isUndef())
23203 continue;
23204 if (ZextElt == -1)
23205 ZextElt = i;
23206 else
23207 return SDValue();
23208 }
23209 // Bail out if there's no non-undef element.
23210 if (ZextElt == -1)
23211 return SDValue();
23212
23213 // The build vector contains some number of undef elements and exactly
23214 // one other element. That other element must be a zero-extended scalar
23215 // extracted from a vector at a constant index to turn this into a shuffle.
23216 // Also, require that the build vector does not implicitly truncate/extend
23217 // its elements.
23218 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23219 EVT VT = BV->getValueType(0);
23220 SDValue Zext = BV->getOperand(ZextElt);
23221 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23223 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23225 return SDValue();
23226
23227 // The zero-extend must be a multiple of the source size, and we must be
23228 // building a vector of the same size as the source of the extract element.
23229 SDValue Extract = Zext.getOperand(0);
23230 unsigned DestSize = Zext.getValueSizeInBits();
23231 unsigned SrcSize = Extract.getValueSizeInBits();
23232 if (DestSize % SrcSize != 0 ||
23233 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23234 return SDValue();
23235
23236 // Create a shuffle mask that will combine the extracted element with zeros
23237 // and undefs.
23238 int ZextRatio = DestSize / SrcSize;
23239 int NumMaskElts = NumBVOps * ZextRatio;
23240 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23241 for (int i = 0; i != NumMaskElts; ++i) {
23242 if (i / ZextRatio == ZextElt) {
23243 // The low bits of the (potentially translated) extracted element map to
23244 // the source vector. The high bits map to zero. We will use a zero vector
23245 // as the 2nd source operand of the shuffle, so use the 1st element of
23246 // that vector (mask value is number-of-elements) for the high bits.
23247 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23248 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23249 : NumMaskElts;
23250 }
23251
23252 // Undef elements of the build vector remain undef because we initialize
23253 // the shuffle mask with -1.
23254 }
23255
23256 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23257 // bitcast (shuffle V, ZeroVec, VectorMask)
23258 SDLoc DL(BV);
23259 EVT VecVT = Extract.getOperand(0).getValueType();
23260 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23261 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23262 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23263 ZeroVec, ShufMask, DAG);
23264 if (!Shuf)
23265 return SDValue();
23266 return DAG.getBitcast(VT, Shuf);
23267}
23268
23269// FIXME: promote to STLExtras.
23270template <typename R, typename T>
23271static auto getFirstIndexOf(R &&Range, const T &Val) {
23272 auto I = find(Range, Val);
23273 if (I == Range.end())
23274 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23275 return std::distance(Range.begin(), I);
23276}
23277
23278// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23279// operations. If the types of the vectors we're extracting from allow it,
23280// turn this into a vector_shuffle node.
23281SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23282 SDLoc DL(N);
23283 EVT VT = N->getValueType(0);
23284
23285 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23286 if (!isTypeLegal(VT))
23287 return SDValue();
23288
23290 return V;
23291
23292 // May only combine to shuffle after legalize if shuffle is legal.
23293 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23294 return SDValue();
23295
23296 bool UsesZeroVector = false;
23297 unsigned NumElems = N->getNumOperands();
23298
23299 // Record, for each element of the newly built vector, which input vector
23300 // that element comes from. -1 stands for undef, 0 for the zero vector,
23301 // and positive values for the input vectors.
23302 // VectorMask maps each element to its vector number, and VecIn maps vector
23303 // numbers to their initial SDValues.
23304
23305 SmallVector<int, 8> VectorMask(NumElems, -1);
23307 VecIn.push_back(SDValue());
23308
23309 for (unsigned i = 0; i != NumElems; ++i) {
23310 SDValue Op = N->getOperand(i);
23311
23312 if (Op.isUndef())
23313 continue;
23314
23315 // See if we can use a blend with a zero vector.
23316 // TODO: Should we generalize this to a blend with an arbitrary constant
23317 // vector?
23319 UsesZeroVector = true;
23320 VectorMask[i] = 0;
23321 continue;
23322 }
23323
23324 // Not an undef or zero. If the input is something other than an
23325 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23326 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23327 !isa<ConstantSDNode>(Op.getOperand(1)))
23328 return SDValue();
23329 SDValue ExtractedFromVec = Op.getOperand(0);
23330
23331 if (ExtractedFromVec.getValueType().isScalableVector())
23332 return SDValue();
23333
23334 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23335 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23336 return SDValue();
23337
23338 // All inputs must have the same element type as the output.
23339 if (VT.getVectorElementType() !=
23340 ExtractedFromVec.getValueType().getVectorElementType())
23341 return SDValue();
23342
23343 // Have we seen this input vector before?
23344 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23345 // a map back from SDValues to numbers isn't worth it.
23346 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23347 if (Idx == -1) { // A new source vector?
23348 Idx = VecIn.size();
23349 VecIn.push_back(ExtractedFromVec);
23350 }
23351
23352 VectorMask[i] = Idx;
23353 }
23354
23355 // If we didn't find at least one input vector, bail out.
23356 if (VecIn.size() < 2)
23357 return SDValue();
23358
23359 // If all the Operands of BUILD_VECTOR extract from same
23360 // vector, then split the vector efficiently based on the maximum
23361 // vector access index and adjust the VectorMask and
23362 // VecIn accordingly.
23363 bool DidSplitVec = false;
23364 if (VecIn.size() == 2) {
23365 unsigned MaxIndex = 0;
23366 unsigned NearestPow2 = 0;
23367 SDValue Vec = VecIn.back();
23368 EVT InVT = Vec.getValueType();
23369 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23370
23371 for (unsigned i = 0; i < NumElems; i++) {
23372 if (VectorMask[i] <= 0)
23373 continue;
23374 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23375 IndexVec[i] = Index;
23376 MaxIndex = std::max(MaxIndex, Index);
23377 }
23378
23379 NearestPow2 = PowerOf2Ceil(MaxIndex);
23380 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23381 NumElems * 2 < NearestPow2) {
23382 unsigned SplitSize = NearestPow2 / 2;
23383 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23384 InVT.getVectorElementType(), SplitSize);
23385 if (TLI.isTypeLegal(SplitVT) &&
23386 SplitSize + SplitVT.getVectorNumElements() <=
23387 InVT.getVectorNumElements()) {
23388 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23389 DAG.getVectorIdxConstant(SplitSize, DL));
23390 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23391 DAG.getVectorIdxConstant(0, DL));
23392 VecIn.pop_back();
23393 VecIn.push_back(VecIn1);
23394 VecIn.push_back(VecIn2);
23395 DidSplitVec = true;
23396
23397 for (unsigned i = 0; i < NumElems; i++) {
23398 if (VectorMask[i] <= 0)
23399 continue;
23400 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23401 }
23402 }
23403 }
23404 }
23405
23406 // Sort input vectors by decreasing vector element count,
23407 // while preserving the relative order of equally-sized vectors.
23408 // Note that we keep the first "implicit zero vector as-is.
23409 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23410 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23411 [](const SDValue &a, const SDValue &b) {
23412 return a.getValueType().getVectorNumElements() >
23413 b.getValueType().getVectorNumElements();
23414 });
23415
23416 // We now also need to rebuild the VectorMask, because it referenced element
23417 // order in VecIn, and we just sorted them.
23418 for (int &SourceVectorIndex : VectorMask) {
23419 if (SourceVectorIndex <= 0)
23420 continue;
23421 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23422 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23423 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23424 SourceVectorIndex = Idx;
23425 }
23426
23427 VecIn = std::move(SortedVecIn);
23428
23429 // TODO: Should this fire if some of the input vectors has illegal type (like
23430 // it does now), or should we let legalization run its course first?
23431
23432 // Shuffle phase:
23433 // Take pairs of vectors, and shuffle them so that the result has elements
23434 // from these vectors in the correct places.
23435 // For example, given:
23436 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23437 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23438 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23439 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23440 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23441 // We will generate:
23442 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23443 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23444 SmallVector<SDValue, 4> Shuffles;
23445 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23446 unsigned LeftIdx = 2 * In + 1;
23447 SDValue VecLeft = VecIn[LeftIdx];
23448 SDValue VecRight =
23449 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23450
23451 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23452 VecRight, LeftIdx, DidSplitVec))
23453 Shuffles.push_back(Shuffle);
23454 else
23455 return SDValue();
23456 }
23457
23458 // If we need the zero vector as an "ingredient" in the blend tree, add it
23459 // to the list of shuffles.
23460 if (UsesZeroVector)
23461 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23462 : DAG.getConstantFP(0.0, DL, VT));
23463
23464 // If we only have one shuffle, we're done.
23465 if (Shuffles.size() == 1)
23466 return Shuffles[0];
23467
23468 // Update the vector mask to point to the post-shuffle vectors.
23469 for (int &Vec : VectorMask)
23470 if (Vec == 0)
23471 Vec = Shuffles.size() - 1;
23472 else
23473 Vec = (Vec - 1) / 2;
23474
23475 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23476 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23477 // generate:
23478 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23479 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23480 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23481 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23482 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23483 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23484 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23485
23486 // Make sure the initial size of the shuffle list is even.
23487 if (Shuffles.size() % 2)
23488 Shuffles.push_back(DAG.getUNDEF(VT));
23489
23490 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23491 if (CurSize % 2) {
23492 Shuffles[CurSize] = DAG.getUNDEF(VT);
23493 CurSize++;
23494 }
23495 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23496 int Left = 2 * In;
23497 int Right = 2 * In + 1;
23498 SmallVector<int, 8> Mask(NumElems, -1);
23499 SDValue L = Shuffles[Left];
23500 ArrayRef<int> LMask;
23501 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23502 L.use_empty() && L.getOperand(1).isUndef() &&
23503 L.getOperand(0).getValueType() == L.getValueType();
23504 if (IsLeftShuffle) {
23505 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23506 L = L.getOperand(0);
23507 }
23508 SDValue R = Shuffles[Right];
23509 ArrayRef<int> RMask;
23510 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23511 R.use_empty() && R.getOperand(1).isUndef() &&
23512 R.getOperand(0).getValueType() == R.getValueType();
23513 if (IsRightShuffle) {
23514 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23515 R = R.getOperand(0);
23516 }
23517 for (unsigned I = 0; I != NumElems; ++I) {
23518 if (VectorMask[I] == Left) {
23519 Mask[I] = I;
23520 if (IsLeftShuffle)
23521 Mask[I] = LMask[I];
23522 VectorMask[I] = In;
23523 } else if (VectorMask[I] == Right) {
23524 Mask[I] = I + NumElems;
23525 if (IsRightShuffle)
23526 Mask[I] = RMask[I] + NumElems;
23527 VectorMask[I] = In;
23528 }
23529 }
23530
23531 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23532 }
23533 }
23534 return Shuffles[0];
23535}
23536
23537// Try to turn a build vector of zero extends of extract vector elts into a
23538// a vector zero extend and possibly an extract subvector.
23539// TODO: Support sign extend?
23540// TODO: Allow undef elements?
23541SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23542 if (LegalOperations)
23543 return SDValue();
23544
23545 EVT VT = N->getValueType(0);
23546
23547 bool FoundZeroExtend = false;
23548 SDValue Op0 = N->getOperand(0);
23549 auto checkElem = [&](SDValue Op) -> int64_t {
23550 unsigned Opc = Op.getOpcode();
23551 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23552 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23553 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23554 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23555 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23556 return C->getZExtValue();
23557 return -1;
23558 };
23559
23560 // Make sure the first element matches
23561 // (zext (extract_vector_elt X, C))
23562 // Offset must be a constant multiple of the
23563 // known-minimum vector length of the result type.
23564 int64_t Offset = checkElem(Op0);
23565 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23566 return SDValue();
23567
23568 unsigned NumElems = N->getNumOperands();
23569 SDValue In = Op0.getOperand(0).getOperand(0);
23570 EVT InSVT = In.getValueType().getScalarType();
23571 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23572
23573 // Don't create an illegal input type after type legalization.
23574 if (LegalTypes && !TLI.isTypeLegal(InVT))
23575 return SDValue();
23576
23577 // Ensure all the elements come from the same vector and are adjacent.
23578 for (unsigned i = 1; i != NumElems; ++i) {
23579 if ((Offset + i) != checkElem(N->getOperand(i)))
23580 return SDValue();
23581 }
23582
23583 SDLoc DL(N);
23584 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23585 Op0.getOperand(0).getOperand(1));
23586 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23587 VT, In);
23588}
23589
23590// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23591// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23592// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23593// This patten can appear during legalization.
23594//
23595// NOTE: This can be generalized to allow more than a single
23596// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23597SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23598 // Don't run this after legalization. Targets may have other preferences.
23599 if (Level >= AfterLegalizeDAG)
23600 return SDValue();
23601
23602 // FIXME: support big-endian.
23603 if (DAG.getDataLayout().isBigEndian())
23604 return SDValue();
23605
23606 EVT VT = N->getValueType(0);
23607 EVT OpVT = N->getOperand(0).getValueType();
23608 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23609
23610 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23611
23612 if (!TLI.isTypeLegal(OpIntVT) ||
23613 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23614 return SDValue();
23615
23616 unsigned EltBitwidth = VT.getScalarSizeInBits();
23617 // NOTE: the actual width of operands may be wider than that!
23618
23619 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23620 // active bits they all have? We'll want to truncate them all to that width.
23621 unsigned ActiveBits = 0;
23622 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23623 for (auto I : enumerate(N->ops())) {
23624 SDValue Op = I.value();
23625 // FIXME: support UNDEF elements?
23626 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23627 unsigned OpActiveBits =
23628 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23629 if (OpActiveBits == 0) {
23630 KnownZeroOps.setBit(I.index());
23631 continue;
23632 }
23633 // Profitability check: don't allow non-zero constant operands.
23634 return SDValue();
23635 }
23636 // Profitability check: there must only be a single non-zero operand,
23637 // and it must be the first operand of the BUILD_VECTOR.
23638 if (I.index() != 0)
23639 return SDValue();
23640 // The operand must be a zero-extension itself.
23641 // FIXME: this could be generalized to known leading zeros check.
23642 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23643 return SDValue();
23644 unsigned CurrActiveBits =
23645 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23646 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23647 ActiveBits = CurrActiveBits;
23648 // We want to at least halve the element size.
23649 if (2 * ActiveBits > EltBitwidth)
23650 return SDValue();
23651 }
23652
23653 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23654 if (ActiveBits == 0)
23655 return SDValue();
23656
23657 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23658 // into how many chunks can we split our element width?
23659 EVT NewScalarIntVT, NewIntVT;
23660 std::optional<unsigned> Factor;
23661 // We can split the element into at least two chunks, but not into more
23662 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23663 // for which the element width is a multiple of it,
23664 // and the resulting types/operations on that chunk width are legal.
23665 assert(2 * ActiveBits <= EltBitwidth &&
23666 "We know that half or less bits of the element are active.");
23667 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23668 if (EltBitwidth % Scale != 0)
23669 continue;
23670 unsigned ChunkBitwidth = EltBitwidth / Scale;
23671 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23672 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23673 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23674 Scale * N->getNumOperands());
23675 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23676 (LegalOperations &&
23677 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23679 continue;
23680 Factor = Scale;
23681 break;
23682 }
23683 if (!Factor)
23684 return SDValue();
23685
23686 SDLoc DL(N);
23687 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23688
23689 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23691 NewOps.reserve(NewIntVT.getVectorNumElements());
23692 for (auto I : enumerate(N->ops())) {
23693 SDValue Op = I.value();
23694 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23695 unsigned SrcOpIdx = I.index();
23696 if (KnownZeroOps[SrcOpIdx]) {
23697 NewOps.append(*Factor, ZeroOp);
23698 continue;
23699 }
23700 Op = DAG.getBitcast(OpIntVT, Op);
23701 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23702 NewOps.emplace_back(Op);
23703 NewOps.append(*Factor - 1, ZeroOp);
23704 }
23705 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23706 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23707 NewBV = DAG.getBitcast(VT, NewBV);
23708 return NewBV;
23709}
23710
23711SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23712 EVT VT = N->getValueType(0);
23713
23714 // A vector built entirely of undefs is undef.
23716 return DAG.getUNDEF(VT);
23717
23718 // If this is a splat of a bitcast from another vector, change to a
23719 // concat_vector.
23720 // For example:
23721 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23722 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23723 //
23724 // If X is a build_vector itself, the concat can become a larger build_vector.
23725 // TODO: Maybe this is useful for non-splat too?
23726 if (!LegalOperations) {
23727 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23728 // Only change build_vector to a concat_vector if the splat value type is
23729 // same as the vector element type.
23730 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23732 EVT SrcVT = Splat.getValueType();
23733 if (SrcVT.isVector()) {
23734 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23735 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23736 SrcVT.getVectorElementType(), NumElts);
23737 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23738 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23739 SDValue Concat =
23740 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23741 return DAG.getBitcast(VT, Concat);
23742 }
23743 }
23744 }
23745 }
23746
23747 // Check if we can express BUILD VECTOR via subvector extract.
23748 if (!LegalTypes && (N->getNumOperands() > 1)) {
23749 SDValue Op0 = N->getOperand(0);
23750 auto checkElem = [&](SDValue Op) -> uint64_t {
23751 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23752 (Op0.getOperand(0) == Op.getOperand(0)))
23753 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23754 return CNode->getZExtValue();
23755 return -1;
23756 };
23757
23758 int Offset = checkElem(Op0);
23759 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23760 if (Offset + i != checkElem(N->getOperand(i))) {
23761 Offset = -1;
23762 break;
23763 }
23764 }
23765
23766 if ((Offset == 0) &&
23767 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23768 return Op0.getOperand(0);
23769 if ((Offset != -1) &&
23770 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23771 0)) // IDX must be multiple of output size.
23772 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23773 Op0.getOperand(0), Op0.getOperand(1));
23774 }
23775
23776 if (SDValue V = convertBuildVecZextToZext(N))
23777 return V;
23778
23779 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23780 return V;
23781
23782 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23783 return V;
23784
23785 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23786 return V;
23787
23788 if (SDValue V = reduceBuildVecToShuffle(N))
23789 return V;
23790
23791 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23792 // Do this late as some of the above may replace the splat.
23794 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23795 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23796 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23797 }
23798
23799 return SDValue();
23800}
23801
23803 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23804 EVT OpVT = N->getOperand(0).getValueType();
23805
23806 // If the operands are legal vectors, leave them alone.
23807 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23808 return SDValue();
23809
23810 SDLoc DL(N);
23811 EVT VT = N->getValueType(0);
23813 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23814
23815 // Keep track of what we encounter.
23816 bool AnyInteger = false;
23817 bool AnyFP = false;
23818 for (const SDValue &Op : N->ops()) {
23819 if (ISD::BITCAST == Op.getOpcode() &&
23820 !Op.getOperand(0).getValueType().isVector())
23821 Ops.push_back(Op.getOperand(0));
23822 else if (ISD::UNDEF == Op.getOpcode())
23823 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23824 else
23825 return SDValue();
23826
23827 // Note whether we encounter an integer or floating point scalar.
23828 // If it's neither, bail out, it could be something weird like x86mmx.
23829 EVT LastOpVT = Ops.back().getValueType();
23830 if (LastOpVT.isFloatingPoint())
23831 AnyFP = true;
23832 else if (LastOpVT.isInteger())
23833 AnyInteger = true;
23834 else
23835 return SDValue();
23836 }
23837
23838 // If any of the operands is a floating point scalar bitcast to a vector,
23839 // use floating point types throughout, and bitcast everything.
23840 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23841 if (AnyFP) {
23843 if (AnyInteger) {
23844 for (SDValue &Op : Ops) {
23845 if (Op.getValueType() == SVT)
23846 continue;
23847 if (Op.isUndef())
23848 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23849 else
23850 Op = DAG.getBitcast(SVT, Op);
23851 }
23852 }
23853 }
23854
23855 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23856 VT.getSizeInBits() / SVT.getSizeInBits());
23857 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23858}
23859
23860// Attempt to merge nested concat_vectors/undefs.
23861// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23862// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23864 SelectionDAG &DAG) {
23865 EVT VT = N->getValueType(0);
23866
23867 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23868 EVT SubVT;
23869 SDValue FirstConcat;
23870 for (const SDValue &Op : N->ops()) {
23871 if (Op.isUndef())
23872 continue;
23873 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23874 return SDValue();
23875 if (!FirstConcat) {
23876 SubVT = Op.getOperand(0).getValueType();
23877 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23878 return SDValue();
23879 FirstConcat = Op;
23880 continue;
23881 }
23882 if (SubVT != Op.getOperand(0).getValueType())
23883 return SDValue();
23884 }
23885 assert(FirstConcat && "Concat of all-undefs found");
23886
23887 SmallVector<SDValue> ConcatOps;
23888 for (const SDValue &Op : N->ops()) {
23889 if (Op.isUndef()) {
23890 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23891 continue;
23892 }
23893 ConcatOps.append(Op->op_begin(), Op->op_end());
23894 }
23895 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23896}
23897
23898// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23899// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23900// most two distinct vectors the same size as the result, attempt to turn this
23901// into a legal shuffle.
23903 EVT VT = N->getValueType(0);
23904 EVT OpVT = N->getOperand(0).getValueType();
23905
23906 // We currently can't generate an appropriate shuffle for a scalable vector.
23907 if (VT.isScalableVector())
23908 return SDValue();
23909
23910 int NumElts = VT.getVectorNumElements();
23911 int NumOpElts = OpVT.getVectorNumElements();
23912
23913 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23915
23916 for (SDValue Op : N->ops()) {
23918
23919 // UNDEF nodes convert to UNDEF shuffle mask values.
23920 if (Op.isUndef()) {
23921 Mask.append((unsigned)NumOpElts, -1);
23922 continue;
23923 }
23924
23925 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23926 return SDValue();
23927
23928 // What vector are we extracting the subvector from and at what index?
23929 SDValue ExtVec = Op.getOperand(0);
23930 int ExtIdx = Op.getConstantOperandVal(1);
23931
23932 // We want the EVT of the original extraction to correctly scale the
23933 // extraction index.
23934 EVT ExtVT = ExtVec.getValueType();
23935 ExtVec = peekThroughBitcasts(ExtVec);
23936
23937 // UNDEF nodes convert to UNDEF shuffle mask values.
23938 if (ExtVec.isUndef()) {
23939 Mask.append((unsigned)NumOpElts, -1);
23940 continue;
23941 }
23942
23943 // Ensure that we are extracting a subvector from a vector the same
23944 // size as the result.
23945 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23946 return SDValue();
23947
23948 // Scale the subvector index to account for any bitcast.
23949 int NumExtElts = ExtVT.getVectorNumElements();
23950 if (0 == (NumExtElts % NumElts))
23951 ExtIdx /= (NumExtElts / NumElts);
23952 else if (0 == (NumElts % NumExtElts))
23953 ExtIdx *= (NumElts / NumExtElts);
23954 else
23955 return SDValue();
23956
23957 // At most we can reference 2 inputs in the final shuffle.
23958 if (SV0.isUndef() || SV0 == ExtVec) {
23959 SV0 = ExtVec;
23960 for (int i = 0; i != NumOpElts; ++i)
23961 Mask.push_back(i + ExtIdx);
23962 } else if (SV1.isUndef() || SV1 == ExtVec) {
23963 SV1 = ExtVec;
23964 for (int i = 0; i != NumOpElts; ++i)
23965 Mask.push_back(i + ExtIdx + NumElts);
23966 } else {
23967 return SDValue();
23968 }
23969 }
23970
23971 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23972 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23973 DAG.getBitcast(VT, SV1), Mask, DAG);
23974}
23975
23977 unsigned CastOpcode = N->getOperand(0).getOpcode();
23978 switch (CastOpcode) {
23979 case ISD::SINT_TO_FP:
23980 case ISD::UINT_TO_FP:
23981 case ISD::FP_TO_SINT:
23982 case ISD::FP_TO_UINT:
23983 // TODO: Allow more opcodes?
23984 // case ISD::BITCAST:
23985 // case ISD::TRUNCATE:
23986 // case ISD::ZERO_EXTEND:
23987 // case ISD::SIGN_EXTEND:
23988 // case ISD::FP_EXTEND:
23989 break;
23990 default:
23991 return SDValue();
23992 }
23993
23994 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23995 if (!SrcVT.isVector())
23996 return SDValue();
23997
23998 // All operands of the concat must be the same kind of cast from the same
23999 // source type.
24001 for (SDValue Op : N->ops()) {
24002 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
24003 Op.getOperand(0).getValueType() != SrcVT)
24004 return SDValue();
24005 SrcOps.push_back(Op.getOperand(0));
24006 }
24007
24008 // The wider cast must be supported by the target. This is unusual because
24009 // the operation support type parameter depends on the opcode. In addition,
24010 // check the other type in the cast to make sure this is really legal.
24011 EVT VT = N->getValueType(0);
24012 EVT SrcEltVT = SrcVT.getVectorElementType();
24013 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24014 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
24015 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24016 switch (CastOpcode) {
24017 case ISD::SINT_TO_FP:
24018 case ISD::UINT_TO_FP:
24019 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
24020 !TLI.isTypeLegal(VT))
24021 return SDValue();
24022 break;
24023 case ISD::FP_TO_SINT:
24024 case ISD::FP_TO_UINT:
24025 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24026 !TLI.isTypeLegal(ConcatSrcVT))
24027 return SDValue();
24028 break;
24029 default:
24030 llvm_unreachable("Unexpected cast opcode");
24031 }
24032
24033 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24034 SDLoc DL(N);
24035 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24036 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24037}
24038
24039// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24040// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24041// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24043 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24044 bool LegalOperations) {
24045 EVT VT = N->getValueType(0);
24046 EVT OpVT = N->getOperand(0).getValueType();
24047 if (VT.isScalableVector())
24048 return SDValue();
24049
24050 // For now, only allow simple 2-operand concatenations.
24051 if (N->getNumOperands() != 2)
24052 return SDValue();
24053
24054 // Don't create illegal types/shuffles when not allowed to.
24055 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24056 (LegalOperations &&
24058 return SDValue();
24059
24060 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24061 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24062 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24063 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24064 // (4) and for now, the SHUFFLE_VECTOR must be unary.
24065 ShuffleVectorSDNode *SVN = nullptr;
24066 for (SDValue Op : N->ops()) {
24067 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24068 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24069 all_of(N->ops(), [CurSVN](SDValue Op) {
24070 // FIXME: can we allow UNDEF operands?
24071 return !Op.isUndef() &&
24072 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24073 })) {
24074 SVN = CurSVN;
24075 break;
24076 }
24077 }
24078 if (!SVN)
24079 return SDValue();
24080
24081 // We are going to pad the shuffle operands, so any indice, that was picking
24082 // from the second operand, must be adjusted.
24083 SmallVector<int, 16> AdjustedMask;
24084 AdjustedMask.reserve(SVN->getMask().size());
24085 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24086 append_range(AdjustedMask, SVN->getMask());
24087
24088 // Identity masks for the operands of the (padded) shuffle.
24089 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24090 MutableArrayRef<int> FirstShufOpIdentityMask =
24091 MutableArrayRef<int>(IdentityMask)
24093 MutableArrayRef<int> SecondShufOpIdentityMask =
24095 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24096 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24098
24099 // New combined shuffle mask.
24101 Mask.reserve(VT.getVectorNumElements());
24102 for (SDValue Op : N->ops()) {
24103 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24104 if (Op.getNode() == SVN) {
24105 append_range(Mask, AdjustedMask);
24106 continue;
24107 }
24108 if (Op == SVN->getOperand(0)) {
24109 append_range(Mask, FirstShufOpIdentityMask);
24110 continue;
24111 }
24112 if (Op == SVN->getOperand(1)) {
24113 append_range(Mask, SecondShufOpIdentityMask);
24114 continue;
24115 }
24116 llvm_unreachable("Unexpected operand!");
24117 }
24118
24119 // Don't create illegal shuffle masks.
24120 if (!TLI.isShuffleMaskLegal(Mask, VT))
24121 return SDValue();
24122
24123 // Pad the shuffle operands with UNDEF.
24124 SDLoc dl(N);
24125 std::array<SDValue, 2> ShufOps;
24126 for (auto I : zip(SVN->ops(), ShufOps)) {
24127 SDValue ShufOp = std::get<0>(I);
24128 SDValue &NewShufOp = std::get<1>(I);
24129 if (ShufOp.isUndef())
24130 NewShufOp = DAG.getUNDEF(VT);
24131 else {
24132 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24133 DAG.getUNDEF(OpVT));
24134 ShufOpParts[0] = ShufOp;
24135 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24136 }
24137 }
24138 // Finally, create the new wide shuffle.
24139 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24140}
24141
24142SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24143 // If we only have one input vector, we don't need to do any concatenation.
24144 if (N->getNumOperands() == 1)
24145 return N->getOperand(0);
24146
24147 // Check if all of the operands are undefs.
24148 EVT VT = N->getValueType(0);
24150 return DAG.getUNDEF(VT);
24151
24152 // Optimize concat_vectors where all but the first of the vectors are undef.
24153 if (all_of(drop_begin(N->ops()),
24154 [](const SDValue &Op) { return Op.isUndef(); })) {
24155 SDValue In = N->getOperand(0);
24156 assert(In.getValueType().isVector() && "Must concat vectors");
24157
24158 // If the input is a concat_vectors, just make a larger concat by padding
24159 // with smaller undefs.
24160 //
24161 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24162 // here could cause an infinite loop. That legalizing happens when LegalDAG
24163 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24164 // scalable.
24165 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24166 !(LegalDAG && In.getValueType().isScalableVector())) {
24167 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24168 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
24169 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24170 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24171 }
24172
24174
24175 // concat_vectors(scalar_to_vector(scalar), undef) ->
24176 // scalar_to_vector(scalar)
24177 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24178 Scalar.hasOneUse()) {
24179 EVT SVT = Scalar.getValueType().getVectorElementType();
24180 if (SVT == Scalar.getOperand(0).getValueType())
24181 Scalar = Scalar.getOperand(0);
24182 }
24183
24184 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24185 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24186 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24187 // look through the trunc so we can still do the transform:
24188 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24189 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24190 !TLI.isTypeLegal(Scalar.getValueType()) &&
24191 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24192 Scalar = Scalar->getOperand(0);
24193
24194 EVT SclTy = Scalar.getValueType();
24195
24196 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24197 return SDValue();
24198
24199 // Bail out if the vector size is not a multiple of the scalar size.
24200 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24201 return SDValue();
24202
24203 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24204 if (VNTNumElms < 2)
24205 return SDValue();
24206
24207 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24208 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24209 return SDValue();
24210
24211 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24212 return DAG.getBitcast(VT, Res);
24213 }
24214 }
24215
24216 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24217 // We have already tested above for an UNDEF only concatenation.
24218 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24219 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24220 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24221 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24222 };
24223 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24225 EVT SVT = VT.getScalarType();
24226
24227 EVT MinVT = SVT;
24228 if (!SVT.isFloatingPoint()) {
24229 // If BUILD_VECTOR are from built from integer, they may have different
24230 // operand types. Get the smallest type and truncate all operands to it.
24231 bool FoundMinVT = false;
24232 for (const SDValue &Op : N->ops())
24233 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24234 EVT OpSVT = Op.getOperand(0).getValueType();
24235 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24236 FoundMinVT = true;
24237 }
24238 assert(FoundMinVT && "Concat vector type mismatch");
24239 }
24240
24241 for (const SDValue &Op : N->ops()) {
24242 EVT OpVT = Op.getValueType();
24243 unsigned NumElts = OpVT.getVectorNumElements();
24244
24245 if (ISD::UNDEF == Op.getOpcode())
24246 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24247
24248 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24249 if (SVT.isFloatingPoint()) {
24250 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24251 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24252 } else {
24253 for (unsigned i = 0; i != NumElts; ++i)
24254 Opnds.push_back(
24255 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24256 }
24257 }
24258 }
24259
24260 assert(VT.getVectorNumElements() == Opnds.size() &&
24261 "Concat vector type mismatch");
24262 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24263 }
24264
24265 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24266 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24268 return V;
24269
24270 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24271 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24273 return V;
24274
24275 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24277 return V;
24278 }
24279
24280 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24281 return V;
24282
24284 N, DAG, TLI, LegalTypes, LegalOperations))
24285 return V;
24286
24287 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24288 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24289 // operands and look for a CONCAT operations that place the incoming vectors
24290 // at the exact same location.
24291 //
24292 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24293 SDValue SingleSource = SDValue();
24294 unsigned PartNumElem =
24295 N->getOperand(0).getValueType().getVectorMinNumElements();
24296
24297 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24298 SDValue Op = N->getOperand(i);
24299
24300 if (Op.isUndef())
24301 continue;
24302
24303 // Check if this is the identity extract:
24304 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24305 return SDValue();
24306
24307 // Find the single incoming vector for the extract_subvector.
24308 if (SingleSource.getNode()) {
24309 if (Op.getOperand(0) != SingleSource)
24310 return SDValue();
24311 } else {
24312 SingleSource = Op.getOperand(0);
24313
24314 // Check the source type is the same as the type of the result.
24315 // If not, this concat may extend the vector, so we can not
24316 // optimize it away.
24317 if (SingleSource.getValueType() != N->getValueType(0))
24318 return SDValue();
24319 }
24320
24321 // Check that we are reading from the identity index.
24322 unsigned IdentityIndex = i * PartNumElem;
24323 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24324 return SDValue();
24325 }
24326
24327 if (SingleSource.getNode())
24328 return SingleSource;
24329
24330 return SDValue();
24331}
24332
24333// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24334// if the subvector can be sourced for free.
24336 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24337 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24338 return V.getOperand(1);
24339 }
24340 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24341 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24342 V.getOperand(0).getValueType() == SubVT &&
24343 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24344 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24345 return V.getOperand(SubIdx);
24346 }
24347 return SDValue();
24348}
24349
24351 SelectionDAG &DAG,
24352 bool LegalOperations) {
24353 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24354 SDValue BinOp = Extract->getOperand(0);
24355 unsigned BinOpcode = BinOp.getOpcode();
24356 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24357 return SDValue();
24358
24359 EVT VecVT = BinOp.getValueType();
24360 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24361 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24362 return SDValue();
24363
24364 SDValue Index = Extract->getOperand(1);
24365 EVT SubVT = Extract->getValueType(0);
24366 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24367 return SDValue();
24368
24369 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24370 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24371
24372 // TODO: We could handle the case where only 1 operand is being inserted by
24373 // creating an extract of the other operand, but that requires checking
24374 // number of uses and/or costs.
24375 if (!Sub0 || !Sub1)
24376 return SDValue();
24377
24378 // We are inserting both operands of the wide binop only to extract back
24379 // to the narrow vector size. Eliminate all of the insert/extract:
24380 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24381 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24382 BinOp->getFlags());
24383}
24384
24385/// If we are extracting a subvector produced by a wide binary operator try
24386/// to use a narrow binary operator and/or avoid concatenation and extraction.
24388 bool LegalOperations) {
24389 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24390 // some of these bailouts with other transforms.
24391
24392 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24393 return V;
24394
24395 // The extract index must be a constant, so we can map it to a concat operand.
24396 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24397 if (!ExtractIndexC)
24398 return SDValue();
24399
24400 // We are looking for an optionally bitcasted wide vector binary operator
24401 // feeding an extract subvector.
24402 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24403 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24404 unsigned BOpcode = BinOp.getOpcode();
24405 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24406 return SDValue();
24407
24408 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24409 // reduced to the unary fneg when it is visited, and we probably want to deal
24410 // with fneg in a target-specific way.
24411 if (BOpcode == ISD::FSUB) {
24412 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24413 if (C && C->getValueAPF().isNegZero())
24414 return SDValue();
24415 }
24416
24417 // The binop must be a vector type, so we can extract some fraction of it.
24418 EVT WideBVT = BinOp.getValueType();
24419 // The optimisations below currently assume we are dealing with fixed length
24420 // vectors. It is possible to add support for scalable vectors, but at the
24421 // moment we've done no analysis to prove whether they are profitable or not.
24422 if (!WideBVT.isFixedLengthVector())
24423 return SDValue();
24424
24425 EVT VT = Extract->getValueType(0);
24426 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24427 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24428 "Extract index is not a multiple of the vector length.");
24429
24430 // Bail out if this is not a proper multiple width extraction.
24431 unsigned WideWidth = WideBVT.getSizeInBits();
24432 unsigned NarrowWidth = VT.getSizeInBits();
24433 if (WideWidth % NarrowWidth != 0)
24434 return SDValue();
24435
24436 // Bail out if we are extracting a fraction of a single operation. This can
24437 // occur because we potentially looked through a bitcast of the binop.
24438 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24439 unsigned WideNumElts = WideBVT.getVectorNumElements();
24440 if (WideNumElts % NarrowingRatio != 0)
24441 return SDValue();
24442
24443 // Bail out if the target does not support a narrower version of the binop.
24444 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24445 WideNumElts / NarrowingRatio);
24446 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24447 LegalOperations))
24448 return SDValue();
24449
24450 // If extraction is cheap, we don't need to look at the binop operands
24451 // for concat ops. The narrow binop alone makes this transform profitable.
24452 // We can't just reuse the original extract index operand because we may have
24453 // bitcasted.
24454 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24455 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24456 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24457 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24458 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24459 SDLoc DL(Extract);
24460 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24461 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24462 BinOp.getOperand(0), NewExtIndex);
24463 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24464 BinOp.getOperand(1), NewExtIndex);
24465 SDValue NarrowBinOp =
24466 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24467 return DAG.getBitcast(VT, NarrowBinOp);
24468 }
24469
24470 // Only handle the case where we are doubling and then halving. A larger ratio
24471 // may require more than two narrow binops to replace the wide binop.
24472 if (NarrowingRatio != 2)
24473 return SDValue();
24474
24475 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24476 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24477 // flavors, but no other 256-bit integer support. This could be extended to
24478 // handle any binop, but that may require fixing/adding other folds to avoid
24479 // codegen regressions.
24480 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24481 return SDValue();
24482
24483 // We need at least one concatenation operation of a binop operand to make
24484 // this transform worthwhile. The concat must double the input vector sizes.
24485 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24486 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24487 return V.getOperand(ConcatOpNum);
24488 return SDValue();
24489 };
24490 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24491 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24492
24493 if (SubVecL || SubVecR) {
24494 // If a binop operand was not the result of a concat, we must extract a
24495 // half-sized operand for our new narrow binop:
24496 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24497 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24498 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24499 SDLoc DL(Extract);
24500 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24501 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24502 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24503 BinOp.getOperand(0), IndexC);
24504
24505 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24506 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24507 BinOp.getOperand(1), IndexC);
24508
24509 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24510 return DAG.getBitcast(VT, NarrowBinOp);
24511 }
24512
24513 return SDValue();
24514}
24515
24516/// If we are extracting a subvector from a wide vector load, convert to a
24517/// narrow load to eliminate the extraction:
24518/// (extract_subvector (load wide vector)) --> (load narrow vector)
24520 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24521 if (DAG.getDataLayout().isBigEndian())
24522 return SDValue();
24523
24524 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24525 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24526 return SDValue();
24527
24528 // Allow targets to opt-out.
24529 EVT VT = Extract->getValueType(0);
24530
24531 // We can only create byte sized loads.
24532 if (!VT.isByteSized())
24533 return SDValue();
24534
24535 unsigned Index = Extract->getConstantOperandVal(1);
24536 unsigned NumElts = VT.getVectorMinNumElements();
24537 // A fixed length vector being extracted from a scalable vector
24538 // may not be any *smaller* than the scalable one.
24539 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24540 return SDValue();
24541
24542 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24543 // multiple of the minimum number of elements in the result type.
24544 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24545 "multiple of the result's element count");
24546
24547 // It's fine to use TypeSize here as we know the offset will not be negative.
24548 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24549
24550 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24551 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24552 return SDValue();
24553
24554 // The narrow load will be offset from the base address of the old load if
24555 // we are extracting from something besides index 0 (little-endian).
24556 SDLoc DL(Extract);
24557
24558 // TODO: Use "BaseIndexOffset" to make this more effective.
24559 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24560
24563 MachineMemOperand *MMO;
24564 if (Offset.isScalable()) {
24565 MachinePointerInfo MPI =
24567 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24568 } else
24569 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24570 StoreSize);
24571
24572 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24573 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24574 return NewLd;
24575}
24576
24577/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24578/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24579/// EXTRACT_SUBVECTOR(Op?, ?),
24580/// Mask'))
24581/// iff it is legal and profitable to do so. Notably, the trimmed mask
24582/// (containing only the elements that are extracted)
24583/// must reference at most two subvectors.
24585 SelectionDAG &DAG,
24586 const TargetLowering &TLI,
24587 bool LegalOperations) {
24588 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24589 "Must only be called on EXTRACT_SUBVECTOR's");
24590
24591 SDValue N0 = N->getOperand(0);
24592
24593 // Only deal with non-scalable vectors.
24594 EVT NarrowVT = N->getValueType(0);
24595 EVT WideVT = N0.getValueType();
24596 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24597 return SDValue();
24598
24599 // The operand must be a shufflevector.
24600 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24601 if (!WideShuffleVector)
24602 return SDValue();
24603
24604 // The old shuffleneeds to go away.
24605 if (!WideShuffleVector->hasOneUse())
24606 return SDValue();
24607
24608 // And the narrow shufflevector that we'll form must be legal.
24609 if (LegalOperations &&
24611 return SDValue();
24612
24613 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24614 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24615 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24616 "Extract index is not a multiple of the output vector length.");
24617
24618 int WideNumElts = WideVT.getVectorNumElements();
24619
24620 SmallVector<int, 16> NewMask;
24621 NewMask.reserve(NumEltsExtracted);
24622 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24623 DemandedSubvectors;
24624
24625 // Try to decode the wide mask into narrow mask from at most two subvectors.
24626 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24627 NumEltsExtracted)) {
24628 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24629 "Out-of-bounds shuffle mask?");
24630
24631 if (M < 0) {
24632 // Does not depend on operands, does not require adjustment.
24633 NewMask.emplace_back(M);
24634 continue;
24635 }
24636
24637 // From which operand of the shuffle does this shuffle mask element pick?
24638 int WideShufOpIdx = M / WideNumElts;
24639 // Which element of that operand is picked?
24640 int OpEltIdx = M % WideNumElts;
24641
24642 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24643 "Shuffle mask vector decomposition failure.");
24644
24645 // And which NumEltsExtracted-sized subvector of that operand is that?
24646 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24647 // And which element within that subvector of that operand is that?
24648 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24649
24650 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24651 "Shuffle mask subvector decomposition failure.");
24652
24653 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24654 WideShufOpIdx * WideNumElts) == M &&
24655 "Shuffle mask full decomposition failure.");
24656
24657 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24658
24659 if (Op.isUndef()) {
24660 // Picking from an undef operand. Let's adjust mask instead.
24661 NewMask.emplace_back(-1);
24662 continue;
24663 }
24664
24665 const std::pair<SDValue, int> DemandedSubvector =
24666 std::make_pair(Op, OpSubvecIdx);
24667
24668 if (DemandedSubvectors.insert(DemandedSubvector)) {
24669 if (DemandedSubvectors.size() > 2)
24670 return SDValue(); // We can't handle more than two subvectors.
24671 // How many elements into the WideVT does this subvector start?
24672 int Index = NumEltsExtracted * OpSubvecIdx;
24673 // Bail out if the extraction isn't going to be cheap.
24674 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24675 return SDValue();
24676 }
24677
24678 // Ok, but from which operand of the new shuffle will this element pick?
24679 int NewOpIdx =
24680 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24681 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24682
24683 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24684 NewMask.emplace_back(AdjM);
24685 }
24686 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24687 assert(DemandedSubvectors.size() <= 2 &&
24688 "Should have ended up demanding at most two subvectors.");
24689
24690 // Did we discover that the shuffle does not actually depend on operands?
24691 if (DemandedSubvectors.empty())
24692 return DAG.getUNDEF(NarrowVT);
24693
24694 // Profitability check: only deal with extractions from the first subvector
24695 // unless the mask becomes an identity mask.
24696 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24697 any_of(NewMask, [](int M) { return M < 0; }))
24698 for (auto &DemandedSubvector : DemandedSubvectors)
24699 if (DemandedSubvector.second != 0)
24700 return SDValue();
24701
24702 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24703 // operand[s]/index[es], so there is no point in checking for it's legality.
24704
24705 // Do not turn a legal shuffle into an illegal one.
24706 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24707 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24708 return SDValue();
24709
24710 SDLoc DL(N);
24711
24713 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24714 &DemandedSubvector : DemandedSubvectors) {
24715 // How many elements into the WideVT does this subvector start?
24716 int Index = NumEltsExtracted * DemandedSubvector.second;
24717 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24718 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24719 DemandedSubvector.first, IndexC));
24720 }
24721 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24722 "Should end up with either one or two ops");
24723
24724 // If we ended up with only one operand, pad with an undef.
24725 if (NewOps.size() == 1)
24726 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24727
24728 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24729}
24730
24731SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24732 EVT NVT = N->getValueType(0);
24733 SDValue V = N->getOperand(0);
24734 uint64_t ExtIdx = N->getConstantOperandVal(1);
24735 SDLoc DL(N);
24736
24737 // Extract from UNDEF is UNDEF.
24738 if (V.isUndef())
24739 return DAG.getUNDEF(NVT);
24740
24742 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24743 return NarrowLoad;
24744
24745 // Combine an extract of an extract into a single extract_subvector.
24746 // ext (ext X, C), 0 --> ext X, C
24747 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24748 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24749 V.getConstantOperandVal(1)) &&
24751 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24752 V.getOperand(1));
24753 }
24754 }
24755
24756 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24757 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24758 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24759 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24760 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24761
24762 // extract_subvector(insert_subvector(x,y,c1),c2)
24763 // --> extract_subvector(y,c2-c1)
24764 // iff we're just extracting from the inserted subvector.
24765 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24766 SDValue InsSub = V.getOperand(1);
24767 EVT InsSubVT = InsSub.getValueType();
24768 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24769 unsigned InsIdx = V.getConstantOperandVal(2);
24770 unsigned NumSubElts = NVT.getVectorMinNumElements();
24771 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24772 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24773 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24774 V.getValueType().isFixedLengthVector())
24775 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24776 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24777 }
24778
24779 // Try to move vector bitcast after extract_subv by scaling extraction index:
24780 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24781 if (V.getOpcode() == ISD::BITCAST &&
24782 V.getOperand(0).getValueType().isVector() &&
24783 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24784 SDValue SrcOp = V.getOperand(0);
24785 EVT SrcVT = SrcOp.getValueType();
24786 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24787 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24788 if ((SrcNumElts % DestNumElts) == 0) {
24789 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24790 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24791 EVT NewExtVT =
24792 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24794 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24795 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24796 V.getOperand(0), NewIndex);
24797 return DAG.getBitcast(NVT, NewExtract);
24798 }
24799 }
24800 if ((DestNumElts % SrcNumElts) == 0) {
24801 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24802 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24803 ElementCount NewExtEC =
24804 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24805 EVT ScalarVT = SrcVT.getScalarType();
24806 if ((ExtIdx % DestSrcRatio) == 0) {
24807 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24808 EVT NewExtVT =
24809 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24811 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24812 SDValue NewExtract =
24813 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24814 V.getOperand(0), NewIndex);
24815 return DAG.getBitcast(NVT, NewExtract);
24816 }
24817 if (NewExtEC.isScalar() &&
24819 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24820 SDValue NewExtract =
24821 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24822 V.getOperand(0), NewIndex);
24823 return DAG.getBitcast(NVT, NewExtract);
24824 }
24825 }
24826 }
24827 }
24828 }
24829
24830 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24831 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24832 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24833 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24834 "Concat and extract subvector do not change element type");
24835 assert((ExtIdx % ExtNumElts) == 0 &&
24836 "Extract index is not a multiple of the input vector length.");
24837
24838 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24839 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24840
24841 // If the concatenated source types match this extract, it's a direct
24842 // simplification:
24843 // extract_subvec (concat V1, V2, ...), i --> Vi
24844 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24845 return V.getOperand(ConcatOpIdx);
24846
24847 // If the concatenated source vectors are a multiple length of this extract,
24848 // then extract a fraction of one of those source vectors directly from a
24849 // concat operand. Example:
24850 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24851 // v2i8 extract_subvec v8i8 Y, 6
24852 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24853 ConcatSrcNumElts % ExtNumElts == 0) {
24854 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24855 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24856 "Trying to extract from >1 concat operand?");
24857 assert(NewExtIdx % ExtNumElts == 0 &&
24858 "Extract index is not a multiple of the input vector length.");
24859 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24860 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24861 V.getOperand(ConcatOpIdx), NewIndexC);
24862 }
24863 }
24864
24865 if (SDValue V =
24866 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24867 return V;
24868
24870
24871 // If the input is a build vector. Try to make a smaller build vector.
24872 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24873 EVT InVT = V.getValueType();
24874 unsigned ExtractSize = NVT.getSizeInBits();
24875 unsigned EltSize = InVT.getScalarSizeInBits();
24876 // Only do this if we won't split any elements.
24877 if (ExtractSize % EltSize == 0) {
24878 unsigned NumElems = ExtractSize / EltSize;
24879 EVT EltVT = InVT.getVectorElementType();
24880 EVT ExtractVT =
24881 NumElems == 1 ? EltVT
24882 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24883 if ((Level < AfterLegalizeDAG ||
24884 (NumElems == 1 ||
24885 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24886 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24887 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24888
24889 if (NumElems == 1) {
24890 SDValue Src = V->getOperand(IdxVal);
24891 if (EltVT != Src.getValueType())
24892 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24893 return DAG.getBitcast(NVT, Src);
24894 }
24895
24896 // Extract the pieces from the original build_vector.
24897 SDValue BuildVec =
24898 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24899 return DAG.getBitcast(NVT, BuildVec);
24900 }
24901 }
24902 }
24903
24904 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24905 // Handle only simple case where vector being inserted and vector
24906 // being extracted are of same size.
24907 EVT SmallVT = V.getOperand(1).getValueType();
24908 if (!NVT.bitsEq(SmallVT))
24909 return SDValue();
24910
24911 // Combine:
24912 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24913 // Into:
24914 // indices are equal or bit offsets are equal => V1
24915 // otherwise => (extract_subvec V1, ExtIdx)
24916 uint64_t InsIdx = V.getConstantOperandVal(2);
24917 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24918 ExtIdx * NVT.getScalarSizeInBits()) {
24919 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24920 return SDValue();
24921
24922 return DAG.getBitcast(NVT, V.getOperand(1));
24923 }
24924 return DAG.getNode(
24926 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24927 N->getOperand(1));
24928 }
24929
24930 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24931 return NarrowBOp;
24932
24934 return SDValue(N, 0);
24935
24936 return SDValue();
24937}
24938
24939/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24940/// followed by concatenation. Narrow vector ops may have better performance
24941/// than wide ops, and this can unlock further narrowing of other vector ops.
24942/// Targets can invert this transform later if it is not profitable.
24944 SelectionDAG &DAG) {
24945 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24946 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24947 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24948 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24949 return SDValue();
24950
24951 // Split the wide shuffle mask into halves. Any mask element that is accessing
24952 // operand 1 is offset down to account for narrowing of the vectors.
24953 ArrayRef<int> Mask = Shuf->getMask();
24954 EVT VT = Shuf->getValueType(0);
24955 unsigned NumElts = VT.getVectorNumElements();
24956 unsigned HalfNumElts = NumElts / 2;
24957 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24958 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24959 for (unsigned i = 0; i != NumElts; ++i) {
24960 if (Mask[i] == -1)
24961 continue;
24962 // If we reference the upper (undef) subvector then the element is undef.
24963 if ((Mask[i] % NumElts) >= HalfNumElts)
24964 continue;
24965 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24966 if (i < HalfNumElts)
24967 Mask0[i] = M;
24968 else
24969 Mask1[i - HalfNumElts] = M;
24970 }
24971
24972 // Ask the target if this is a valid transform.
24973 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24974 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24975 HalfNumElts);
24976 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24977 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24978 return SDValue();
24979
24980 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24981 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24982 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24983 SDLoc DL(Shuf);
24984 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24985 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24986 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24987}
24988
24989// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24990// or turn a shuffle of a single concat into simpler shuffle then concat.
24992 EVT VT = N->getValueType(0);
24993 unsigned NumElts = VT.getVectorNumElements();
24994
24995 SDValue N0 = N->getOperand(0);
24996 SDValue N1 = N->getOperand(1);
24997 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24998 ArrayRef<int> Mask = SVN->getMask();
24999
25001 EVT ConcatVT = N0.getOperand(0).getValueType();
25002 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
25003 unsigned NumConcats = NumElts / NumElemsPerConcat;
25004
25005 auto IsUndefMaskElt = [](int i) { return i == -1; };
25006
25007 // Special case: shuffle(concat(A,B)) can be more efficiently represented
25008 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
25009 // half vector elements.
25010 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
25011 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
25012 IsUndefMaskElt)) {
25013 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
25014 N0.getOperand(1),
25015 Mask.slice(0, NumElemsPerConcat));
25016 N1 = DAG.getUNDEF(ConcatVT);
25017 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
25018 }
25019
25020 // Look at every vector that's inserted. We're looking for exact
25021 // subvector-sized copies from a concatenated vector
25022 for (unsigned I = 0; I != NumConcats; ++I) {
25023 unsigned Begin = I * NumElemsPerConcat;
25024 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25025
25026 // Make sure we're dealing with a copy.
25027 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25028 Ops.push_back(DAG.getUNDEF(ConcatVT));
25029 continue;
25030 }
25031
25032 int OpIdx = -1;
25033 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25034 if (IsUndefMaskElt(SubMask[i]))
25035 continue;
25036 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25037 return SDValue();
25038 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25039 if (0 <= OpIdx && EltOpIdx != OpIdx)
25040 return SDValue();
25041 OpIdx = EltOpIdx;
25042 }
25043 assert(0 <= OpIdx && "Unknown concat_vectors op");
25044
25045 if (OpIdx < (int)N0.getNumOperands())
25046 Ops.push_back(N0.getOperand(OpIdx));
25047 else
25048 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25049 }
25050
25051 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25052}
25053
25054// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25055// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25056//
25057// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25058// a simplification in some sense, but it isn't appropriate in general: some
25059// BUILD_VECTORs are substantially cheaper than others. The general case
25060// of a BUILD_VECTOR requires inserting each element individually (or
25061// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25062// all constants is a single constant pool load. A BUILD_VECTOR where each
25063// element is identical is a splat. A BUILD_VECTOR where most of the operands
25064// are undef lowers to a small number of element insertions.
25065//
25066// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25067// We don't fold shuffles where one side is a non-zero constant, and we don't
25068// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25069// non-constant operands. This seems to work out reasonably well in practice.
25071 SelectionDAG &DAG,
25072 const TargetLowering &TLI) {
25073 EVT VT = SVN->getValueType(0);
25074 unsigned NumElts = VT.getVectorNumElements();
25075 SDValue N0 = SVN->getOperand(0);
25076 SDValue N1 = SVN->getOperand(1);
25077
25078 if (!N0->hasOneUse())
25079 return SDValue();
25080
25081 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25082 // discussed above.
25083 if (!N1.isUndef()) {
25084 if (!N1->hasOneUse())
25085 return SDValue();
25086
25087 bool N0AnyConst = isAnyConstantBuildVector(N0);
25088 bool N1AnyConst = isAnyConstantBuildVector(N1);
25089 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25090 return SDValue();
25091 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25092 return SDValue();
25093 }
25094
25095 // If both inputs are splats of the same value then we can safely merge this
25096 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25097 bool IsSplat = false;
25098 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25099 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25100 if (BV0 && BV1)
25101 if (SDValue Splat0 = BV0->getSplatValue())
25102 IsSplat = (Splat0 == BV1->getSplatValue());
25103
25105 SmallSet<SDValue, 16> DuplicateOps;
25106 for (int M : SVN->getMask()) {
25107 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25108 if (M >= 0) {
25109 int Idx = M < (int)NumElts ? M : M - NumElts;
25110 SDValue &S = (M < (int)NumElts ? N0 : N1);
25111 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25112 Op = S.getOperand(Idx);
25113 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25114 SDValue Op0 = S.getOperand(0);
25115 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25116 } else {
25117 // Operand can't be combined - bail out.
25118 return SDValue();
25119 }
25120 }
25121
25122 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25123 // generating a splat; semantically, this is fine, but it's likely to
25124 // generate low-quality code if the target can't reconstruct an appropriate
25125 // shuffle.
25126 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25127 if (!IsSplat && !DuplicateOps.insert(Op).second)
25128 return SDValue();
25129
25130 Ops.push_back(Op);
25131 }
25132
25133 // BUILD_VECTOR requires all inputs to be of the same type, find the
25134 // maximum type and extend them all.
25135 EVT SVT = VT.getScalarType();
25136 if (SVT.isInteger())
25137 for (SDValue &Op : Ops)
25138 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25139 if (SVT != VT.getScalarType())
25140 for (SDValue &Op : Ops)
25141 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25142 : (TLI.isZExtFree(Op.getValueType(), SVT)
25143 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25144 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25145 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25146}
25147
25148// Match shuffles that can be converted to *_vector_extend_in_reg.
25149// This is often generated during legalization.
25150// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25151// and returns the EVT to which the extension should be performed.
25152// NOTE: this assumes that the src is the first operand of the shuffle.
25154 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25155 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25156 bool LegalOperations) {
25157 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25158
25159 // TODO Add support for big-endian when we have a test case.
25160 if (!VT.isInteger() || IsBigEndian)
25161 return std::nullopt;
25162
25163 unsigned NumElts = VT.getVectorNumElements();
25164 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25165
25166 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25167 // power-of-2 extensions as they are the most likely.
25168 // FIXME: should try Scale == NumElts case too,
25169 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25170 // The vector width must be a multiple of Scale.
25171 if (NumElts % Scale != 0)
25172 continue;
25173
25174 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25175 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25176
25177 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25178 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25179 continue;
25180
25181 if (Match(Scale))
25182 return OutVT;
25183 }
25184
25185 return std::nullopt;
25186}
25187
25188// Match shuffles that can be converted to any_vector_extend_in_reg.
25189// This is often generated during legalization.
25190// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25192 SelectionDAG &DAG,
25193 const TargetLowering &TLI,
25194 bool LegalOperations) {
25195 EVT VT = SVN->getValueType(0);
25196 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25197
25198 // TODO Add support for big-endian when we have a test case.
25199 if (!VT.isInteger() || IsBigEndian)
25200 return SDValue();
25201
25202 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25203 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25204 Mask = SVN->getMask()](unsigned Scale) {
25205 for (unsigned i = 0; i != NumElts; ++i) {
25206 if (Mask[i] < 0)
25207 continue;
25208 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25209 continue;
25210 return false;
25211 }
25212 return true;
25213 };
25214
25215 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25216 SDValue N0 = SVN->getOperand(0);
25217 // Never create an illegal type. Only create unsupported operations if we
25218 // are pre-legalization.
25219 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25220 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25221 if (!OutVT)
25222 return SDValue();
25223 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25224}
25225
25226// Match shuffles that can be converted to zero_extend_vector_inreg.
25227// This is often generated during legalization.
25228// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25230 SelectionDAG &DAG,
25231 const TargetLowering &TLI,
25232 bool LegalOperations) {
25233 bool LegalTypes = true;
25234 EVT VT = SVN->getValueType(0);
25235 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25236 unsigned NumElts = VT.getVectorNumElements();
25237 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25238
25239 // TODO: add support for big-endian when we have a test case.
25240 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25241 if (!VT.isInteger() || IsBigEndian)
25242 return SDValue();
25243
25244 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
25245 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25246 for (int &Indice : Mask) {
25247 if (Indice < 0)
25248 continue;
25249 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25250 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25251 Fn(Indice, OpIdx, OpEltIdx);
25252 }
25253 };
25254
25255 // Which elements of which operand does this shuffle demand?
25256 std::array<APInt, 2> OpsDemandedElts;
25257 for (APInt &OpDemandedElts : OpsDemandedElts)
25258 OpDemandedElts = APInt::getZero(NumElts);
25259 ForEachDecomposedIndice(
25260 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25261 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25262 });
25263
25264 // Element-wise(!), which of these demanded elements are know to be zero?
25265 std::array<APInt, 2> OpsKnownZeroElts;
25266 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25267 std::get<2>(I) =
25268 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25269
25270 // Manifest zeroable element knowledge in the shuffle mask.
25271 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25272 // this is a local invention, but it won't leak into DAG.
25273 // FIXME: should we not manifest them, but just check when matching?
25274 bool HadZeroableElts = false;
25275 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25276 int &Indice, int OpIdx, int OpEltIdx) {
25277 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25278 Indice = -2; // Zeroable element.
25279 HadZeroableElts = true;
25280 }
25281 });
25282
25283 // Don't proceed unless we've refined at least one zeroable mask indice.
25284 // If we didn't, then we are still trying to match the same shuffle mask
25285 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25286 // and evidently failed. Proceeding will lead to endless combine loops.
25287 if (!HadZeroableElts)
25288 return SDValue();
25289
25290 // The shuffle may be more fine-grained than we want. Widen elements first.
25291 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25292 SmallVector<int, 16> ScaledMask;
25293 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25294 assert(Mask.size() >= ScaledMask.size() &&
25295 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25296 int Prescale = Mask.size() / ScaledMask.size();
25297
25298 NumElts = ScaledMask.size();
25299 EltSizeInBits *= Prescale;
25300
25301 EVT PrescaledVT = EVT::getVectorVT(
25302 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25303 NumElts);
25304
25305 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25306 return SDValue();
25307
25308 // For example,
25309 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25310 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25311 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25312 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25313 "Unexpected mask scaling factor.");
25314 ArrayRef<int> Mask = ScaledMask;
25315 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25316 SrcElt != NumSrcElts; ++SrcElt) {
25317 // Analyze the shuffle mask in Scale-sized chunks.
25318 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25319 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25320 Mask = Mask.drop_front(MaskChunk.size());
25321 // The first indice in this chunk must be SrcElt, but not zero!
25322 // FIXME: undef should be fine, but that results in more-defined result.
25323 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25324 return false;
25325 // The rest of the indices in this chunk must be zeros.
25326 // FIXME: undef should be fine, but that results in more-defined result.
25327 if (!all_of(MaskChunk.drop_front(1),
25328 [](int Indice) { return Indice == -2; }))
25329 return false;
25330 }
25331 assert(Mask.empty() && "Did not process the whole mask?");
25332 return true;
25333 };
25334
25335 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25336 for (bool Commuted : {false, true}) {
25337 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25338 if (Commuted)
25340 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25341 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25342 LegalOperations);
25343 if (OutVT)
25344 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25345 DAG.getBitcast(PrescaledVT, Op)));
25346 }
25347 return SDValue();
25348}
25349
25350// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25351// each source element of a large type into the lowest elements of a smaller
25352// destination type. This is often generated during legalization.
25353// If the source node itself was a '*_extend_vector_inreg' node then we should
25354// then be able to remove it.
25356 SelectionDAG &DAG) {
25357 EVT VT = SVN->getValueType(0);
25358 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25359
25360 // TODO Add support for big-endian when we have a test case.
25361 if (!VT.isInteger() || IsBigEndian)
25362 return SDValue();
25363
25365
25366 unsigned Opcode = N0.getOpcode();
25367 if (!ISD::isExtVecInRegOpcode(Opcode))
25368 return SDValue();
25369
25370 SDValue N00 = N0.getOperand(0);
25371 ArrayRef<int> Mask = SVN->getMask();
25372 unsigned NumElts = VT.getVectorNumElements();
25373 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25374 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25375 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25376
25377 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25378 return SDValue();
25379 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25380
25381 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25382 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25383 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25384 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25385 for (unsigned i = 0; i != NumElts; ++i) {
25386 if (Mask[i] < 0)
25387 continue;
25388 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25389 continue;
25390 return false;
25391 }
25392 return true;
25393 };
25394
25395 // At the moment we just handle the case where we've truncated back to the
25396 // same size as before the extension.
25397 // TODO: handle more extension/truncation cases as cases arise.
25398 if (EltSizeInBits != ExtSrcSizeInBits)
25399 return SDValue();
25400
25401 // We can remove *extend_vector_inreg only if the truncation happens at
25402 // the same scale as the extension.
25403 if (isTruncate(ExtScale))
25404 return DAG.getBitcast(VT, N00);
25405
25406 return SDValue();
25407}
25408
25409// Combine shuffles of splat-shuffles of the form:
25410// shuffle (shuffle V, undef, splat-mask), undef, M
25411// If splat-mask contains undef elements, we need to be careful about
25412// introducing undef's in the folded mask which are not the result of composing
25413// the masks of the shuffles.
25415 SelectionDAG &DAG) {
25416 EVT VT = Shuf->getValueType(0);
25417 unsigned NumElts = VT.getVectorNumElements();
25418
25419 if (!Shuf->getOperand(1).isUndef())
25420 return SDValue();
25421
25422 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25423 // in disguise, with all demanded elements being identical.
25424 // FIXME: this can be done per-operand.
25425 if (!Shuf->isSplat()) {
25426 APInt DemandedElts(NumElts, 0);
25427 for (int Idx : Shuf->getMask()) {
25428 if (Idx < 0)
25429 continue; // Ignore sentinel indices.
25430 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25431 DemandedElts.setBit(Idx);
25432 }
25433 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25434 APInt UndefElts;
25435 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25436 // Even if all demanded elements are splat, some of them could be undef.
25437 // Which lowest demanded element is *not* known-undef?
25438 std::optional<unsigned> MinNonUndefIdx;
25439 for (int Idx : Shuf->getMask()) {
25440 if (Idx < 0 || UndefElts[Idx])
25441 continue; // Ignore sentinel indices, and undef elements.
25442 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25443 }
25444 if (!MinNonUndefIdx)
25445 return DAG.getUNDEF(VT); // All undef - result is undef.
25446 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25447 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25448 Shuf->getMask().end());
25449 for (int &Idx : SplatMask) {
25450 if (Idx < 0)
25451 continue; // Passthrough sentinel indices.
25452 // Otherwise, just pick the lowest demanded non-undef element.
25453 // Or sentinel undef, if we know we'd pick a known-undef element.
25454 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25455 }
25456 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25457 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25458 Shuf->getOperand(1), SplatMask);
25459 }
25460 }
25461
25462 // If the inner operand is a known splat with no undefs, just return that directly.
25463 // TODO: Create DemandedElts mask from Shuf's mask.
25464 // TODO: Allow undef elements and merge with the shuffle code below.
25465 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25466 return Shuf->getOperand(0);
25467
25468 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25469 if (!Splat || !Splat->isSplat())
25470 return SDValue();
25471
25472 ArrayRef<int> ShufMask = Shuf->getMask();
25473 ArrayRef<int> SplatMask = Splat->getMask();
25474 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25475
25476 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25477 // every undef mask element in the splat-shuffle has a corresponding undef
25478 // element in the user-shuffle's mask or if the composition of mask elements
25479 // would result in undef.
25480 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25481 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25482 // In this case it is not legal to simplify to the splat-shuffle because we
25483 // may be exposing the users of the shuffle an undef element at index 1
25484 // which was not there before the combine.
25485 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25486 // In this case the composition of masks yields SplatMask, so it's ok to
25487 // simplify to the splat-shuffle.
25488 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25489 // In this case the composed mask includes all undef elements of SplatMask
25490 // and in addition sets element zero to undef. It is safe to simplify to
25491 // the splat-shuffle.
25492 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25493 ArrayRef<int> SplatMask) {
25494 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25495 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25496 SplatMask[UserMask[i]] != -1)
25497 return false;
25498 return true;
25499 };
25500 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25501 return Shuf->getOperand(0);
25502
25503 // Create a new shuffle with a mask that is composed of the two shuffles'
25504 // masks.
25505 SmallVector<int, 32> NewMask;
25506 for (int Idx : ShufMask)
25507 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25508
25509 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25510 Splat->getOperand(0), Splat->getOperand(1),
25511 NewMask);
25512}
25513
25514// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25515// the mask can be treated as a larger type.
25517 SelectionDAG &DAG,
25518 const TargetLowering &TLI,
25519 bool LegalOperations) {
25520 SDValue Op0 = SVN->getOperand(0);
25521 SDValue Op1 = SVN->getOperand(1);
25522 EVT VT = SVN->getValueType(0);
25523 if (Op0.getOpcode() != ISD::BITCAST)
25524 return SDValue();
25525 EVT InVT = Op0.getOperand(0).getValueType();
25526 if (!InVT.isVector() ||
25527 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25528 Op1.getOperand(0).getValueType() != InVT)))
25529 return SDValue();
25531 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25532 return SDValue();
25533
25534 int VTLanes = VT.getVectorNumElements();
25535 int InLanes = InVT.getVectorNumElements();
25536 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25537 (LegalOperations &&
25539 return SDValue();
25540 int Factor = VTLanes / InLanes;
25541
25542 // Check that each group of lanes in the mask are either undef or make a valid
25543 // mask for the wider lane type.
25544 ArrayRef<int> Mask = SVN->getMask();
25545 SmallVector<int> NewMask;
25546 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25547 return SDValue();
25548
25549 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25550 return SDValue();
25551
25552 // Create the new shuffle with the new mask and bitcast it back to the
25553 // original type.
25554 SDLoc DL(SVN);
25555 Op0 = Op0.getOperand(0);
25556 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25557 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25558 return DAG.getBitcast(VT, NewShuf);
25559}
25560
25561/// Combine shuffle of shuffle of the form:
25562/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25564 SelectionDAG &DAG) {
25565 if (!OuterShuf->getOperand(1).isUndef())
25566 return SDValue();
25567 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25568 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25569 return SDValue();
25570
25571 ArrayRef<int> OuterMask = OuterShuf->getMask();
25572 ArrayRef<int> InnerMask = InnerShuf->getMask();
25573 unsigned NumElts = OuterMask.size();
25574 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25575 SmallVector<int, 32> CombinedMask(NumElts, -1);
25576 int SplatIndex = -1;
25577 for (unsigned i = 0; i != NumElts; ++i) {
25578 // Undef lanes remain undef.
25579 int OuterMaskElt = OuterMask[i];
25580 if (OuterMaskElt == -1)
25581 continue;
25582
25583 // Peek through the shuffle masks to get the underlying source element.
25584 int InnerMaskElt = InnerMask[OuterMaskElt];
25585 if (InnerMaskElt == -1)
25586 continue;
25587
25588 // Initialize the splatted element.
25589 if (SplatIndex == -1)
25590 SplatIndex = InnerMaskElt;
25591
25592 // Non-matching index - this is not a splat.
25593 if (SplatIndex != InnerMaskElt)
25594 return SDValue();
25595
25596 CombinedMask[i] = InnerMaskElt;
25597 }
25598 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25599 getSplatIndex(CombinedMask) != -1) &&
25600 "Expected a splat mask");
25601
25602 // TODO: The transform may be a win even if the mask is not legal.
25603 EVT VT = OuterShuf->getValueType(0);
25604 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25605 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25606 return SDValue();
25607
25608 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25609 InnerShuf->getOperand(1), CombinedMask);
25610}
25611
25612/// If the shuffle mask is taking exactly one element from the first vector
25613/// operand and passing through all other elements from the second vector
25614/// operand, return the index of the mask element that is choosing an element
25615/// from the first operand. Otherwise, return -1.
25617 int MaskSize = Mask.size();
25618 int EltFromOp0 = -1;
25619 // TODO: This does not match if there are undef elements in the shuffle mask.
25620 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25621 // removing an instruction (a shuffle), but losing the knowledge that some
25622 // vector lanes are not needed.
25623 for (int i = 0; i != MaskSize; ++i) {
25624 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25625 // We're looking for a shuffle of exactly one element from operand 0.
25626 if (EltFromOp0 != -1)
25627 return -1;
25628 EltFromOp0 = i;
25629 } else if (Mask[i] != i + MaskSize) {
25630 // Nothing from operand 1 can change lanes.
25631 return -1;
25632 }
25633 }
25634 return EltFromOp0;
25635}
25636
25637/// If a shuffle inserts exactly one element from a source vector operand into
25638/// another vector operand and we can access the specified element as a scalar,
25639/// then we can eliminate the shuffle.
25641 SelectionDAG &DAG) {
25642 // First, check if we are taking one element of a vector and shuffling that
25643 // element into another vector.
25644 ArrayRef<int> Mask = Shuf->getMask();
25645 SmallVector<int, 16> CommutedMask(Mask);
25646 SDValue Op0 = Shuf->getOperand(0);
25647 SDValue Op1 = Shuf->getOperand(1);
25648 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25649 if (ShufOp0Index == -1) {
25650 // Commute mask and check again.
25652 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25653 if (ShufOp0Index == -1)
25654 return SDValue();
25655 // Commute operands to match the commuted shuffle mask.
25656 std::swap(Op0, Op1);
25657 Mask = CommutedMask;
25658 }
25659
25660 // The shuffle inserts exactly one element from operand 0 into operand 1.
25661 // Now see if we can access that element as a scalar via a real insert element
25662 // instruction.
25663 // TODO: We can try harder to locate the element as a scalar. Examples: it
25664 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25665 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25666 "Shuffle mask value must be from operand 0");
25667 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25668 return SDValue();
25669
25670 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25671 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25672 return SDValue();
25673
25674 // There's an existing insertelement with constant insertion index, so we
25675 // don't need to check the legality/profitability of a replacement operation
25676 // that differs at most in the constant value. The target should be able to
25677 // lower any of those in a similar way. If not, legalization will expand this
25678 // to a scalar-to-vector plus shuffle.
25679 //
25680 // Note that the shuffle may move the scalar from the position that the insert
25681 // element used. Therefore, our new insert element occurs at the shuffle's
25682 // mask index value, not the insert's index value.
25683 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25684 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25685 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25686 Op1, Op0.getOperand(1), NewInsIndex);
25687}
25688
25689/// If we have a unary shuffle of a shuffle, see if it can be folded away
25690/// completely. This has the potential to lose undef knowledge because the first
25691/// shuffle may not have an undef mask element where the second one does. So
25692/// only call this after doing simplifications based on demanded elements.
25694 // shuf (shuf0 X, Y, Mask0), undef, Mask
25695 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25696 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25697 return SDValue();
25698
25699 ArrayRef<int> Mask = Shuf->getMask();
25700 ArrayRef<int> Mask0 = Shuf0->getMask();
25701 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25702 // Ignore undef elements.
25703 if (Mask[i] == -1)
25704 continue;
25705 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25706
25707 // Is the element of the shuffle operand chosen by this shuffle the same as
25708 // the element chosen by the shuffle operand itself?
25709 if (Mask0[Mask[i]] != Mask0[i])
25710 return SDValue();
25711 }
25712 // Every element of this shuffle is identical to the result of the previous
25713 // shuffle, so we can replace this value.
25714 return Shuf->getOperand(0);
25715}
25716
25717SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25718 EVT VT = N->getValueType(0);
25719 unsigned NumElts = VT.getVectorNumElements();
25720
25721 SDValue N0 = N->getOperand(0);
25722 SDValue N1 = N->getOperand(1);
25723
25724 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25725
25726 // Canonicalize shuffle undef, undef -> undef
25727 if (N0.isUndef() && N1.isUndef())
25728 return DAG.getUNDEF(VT);
25729
25730 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25731
25732 // Canonicalize shuffle v, v -> v, undef
25733 if (N0 == N1)
25734 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25735 createUnaryMask(SVN->getMask(), NumElts));
25736
25737 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25738 if (N0.isUndef())
25739 return DAG.getCommutedVectorShuffle(*SVN);
25740
25741 // Remove references to rhs if it is undef
25742 if (N1.isUndef()) {
25743 bool Changed = false;
25744 SmallVector<int, 8> NewMask;
25745 for (unsigned i = 0; i != NumElts; ++i) {
25746 int Idx = SVN->getMaskElt(i);
25747 if (Idx >= (int)NumElts) {
25748 Idx = -1;
25749 Changed = true;
25750 }
25751 NewMask.push_back(Idx);
25752 }
25753 if (Changed)
25754 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25755 }
25756
25757 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25758 return InsElt;
25759
25760 // A shuffle of a single vector that is a splatted value can always be folded.
25761 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25762 return V;
25763
25764 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25765 return V;
25766
25767 // If it is a splat, check if the argument vector is another splat or a
25768 // build_vector.
25769 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25770 int SplatIndex = SVN->getSplatIndex();
25771 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25772 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25773 // splat (vector_bo L, R), Index -->
25774 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25775 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25776 SDLoc DL(N);
25777 EVT EltVT = VT.getScalarType();
25778 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25779 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25780 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25781 SDValue NewBO =
25782 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25783 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25785 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25786 }
25787
25788 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25789 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25790 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25791 N0.hasOneUse()) {
25792 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25793 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25794
25796 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25797 if (Idx->getAPIntValue() == SplatIndex)
25798 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25799
25800 // Look through a bitcast if LE and splatting lane 0, through to a
25801 // scalar_to_vector or a build_vector.
25802 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25803 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25806 EVT N00VT = N0.getOperand(0).getValueType();
25807 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25808 VT.isInteger() && N00VT.isInteger()) {
25809 EVT InVT =
25812 SDLoc(N), InVT);
25813 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25814 }
25815 }
25816 }
25817
25818 // If this is a bit convert that changes the element type of the vector but
25819 // not the number of vector elements, look through it. Be careful not to
25820 // look though conversions that change things like v4f32 to v2f64.
25821 SDNode *V = N0.getNode();
25822 if (V->getOpcode() == ISD::BITCAST) {
25823 SDValue ConvInput = V->getOperand(0);
25824 if (ConvInput.getValueType().isVector() &&
25825 ConvInput.getValueType().getVectorNumElements() == NumElts)
25826 V = ConvInput.getNode();
25827 }
25828
25829 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25830 assert(V->getNumOperands() == NumElts &&
25831 "BUILD_VECTOR has wrong number of operands");
25832 SDValue Base;
25833 bool AllSame = true;
25834 for (unsigned i = 0; i != NumElts; ++i) {
25835 if (!V->getOperand(i).isUndef()) {
25836 Base = V->getOperand(i);
25837 break;
25838 }
25839 }
25840 // Splat of <u, u, u, u>, return <u, u, u, u>
25841 if (!Base.getNode())
25842 return N0;
25843 for (unsigned i = 0; i != NumElts; ++i) {
25844 if (V->getOperand(i) != Base) {
25845 AllSame = false;
25846 break;
25847 }
25848 }
25849 // Splat of <x, x, x, x>, return <x, x, x, x>
25850 if (AllSame)
25851 return N0;
25852
25853 // Canonicalize any other splat as a build_vector.
25854 SDValue Splatted = V->getOperand(SplatIndex);
25855 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25856 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25857
25858 // We may have jumped through bitcasts, so the type of the
25859 // BUILD_VECTOR may not match the type of the shuffle.
25860 if (V->getValueType(0) != VT)
25861 NewBV = DAG.getBitcast(VT, NewBV);
25862 return NewBV;
25863 }
25864 }
25865
25866 // Simplify source operands based on shuffle mask.
25868 return SDValue(N, 0);
25869
25870 // This is intentionally placed after demanded elements simplification because
25871 // it could eliminate knowledge of undef elements created by this shuffle.
25872 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25873 return ShufOp;
25874
25875 // Match shuffles that can be converted to any_vector_extend_in_reg.
25876 if (SDValue V =
25877 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25878 return V;
25879
25880 // Combine "truncate_vector_in_reg" style shuffles.
25881 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25882 return V;
25883
25884 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25885 Level < AfterLegalizeVectorOps &&
25886 (N1.isUndef() ||
25887 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25888 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25889 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25890 return V;
25891 }
25892
25893 // A shuffle of a concat of the same narrow vector can be reduced to use
25894 // only low-half elements of a concat with undef:
25895 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25896 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25897 N0.getNumOperands() == 2 &&
25898 N0.getOperand(0) == N0.getOperand(1)) {
25899 int HalfNumElts = (int)NumElts / 2;
25900 SmallVector<int, 8> NewMask;
25901 for (unsigned i = 0; i != NumElts; ++i) {
25902 int Idx = SVN->getMaskElt(i);
25903 if (Idx >= HalfNumElts) {
25904 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25905 Idx -= HalfNumElts;
25906 }
25907 NewMask.push_back(Idx);
25908 }
25909 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25910 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25911 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25912 N0.getOperand(0), UndefVec);
25913 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25914 }
25915 }
25916
25917 // See if we can replace a shuffle with an insert_subvector.
25918 // e.g. v2i32 into v8i32:
25919 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25920 // --> insert_subvector(lhs,rhs1,4).
25921 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25923 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25924 // Ensure RHS subvectors are legal.
25925 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25926 EVT SubVT = RHS.getOperand(0).getValueType();
25927 int NumSubVecs = RHS.getNumOperands();
25928 int NumSubElts = SubVT.getVectorNumElements();
25929 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25930 if (!TLI.isTypeLegal(SubVT))
25931 return SDValue();
25932
25933 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25934 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25935 return SDValue();
25936
25937 // Search [NumSubElts] spans for RHS sequence.
25938 // TODO: Can we avoid nested loops to increase performance?
25939 SmallVector<int> InsertionMask(NumElts);
25940 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25941 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25942 // Reset mask to identity.
25943 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25944
25945 // Add subvector insertion.
25946 std::iota(InsertionMask.begin() + SubIdx,
25947 InsertionMask.begin() + SubIdx + NumSubElts,
25948 NumElts + (SubVec * NumSubElts));
25949
25950 // See if the shuffle mask matches the reference insertion mask.
25951 bool MatchingShuffle = true;
25952 for (int i = 0; i != (int)NumElts; ++i) {
25953 int ExpectIdx = InsertionMask[i];
25954 int ActualIdx = Mask[i];
25955 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25956 MatchingShuffle = false;
25957 break;
25958 }
25959 }
25960
25961 if (MatchingShuffle)
25962 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25963 RHS.getOperand(SubVec),
25964 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25965 }
25966 }
25967 return SDValue();
25968 };
25969 ArrayRef<int> Mask = SVN->getMask();
25970 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25971 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25972 return InsertN1;
25973 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25974 SmallVector<int> CommuteMask(Mask);
25976 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25977 return InsertN0;
25978 }
25979 }
25980
25981 // If we're not performing a select/blend shuffle, see if we can convert the
25982 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25983 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25984 bool IsInLaneMask = true;
25985 ArrayRef<int> Mask = SVN->getMask();
25986 SmallVector<int, 16> ClearMask(NumElts, -1);
25987 APInt DemandedLHS = APInt::getZero(NumElts);
25988 APInt DemandedRHS = APInt::getZero(NumElts);
25989 for (int I = 0; I != (int)NumElts; ++I) {
25990 int M = Mask[I];
25991 if (M < 0)
25992 continue;
25993 ClearMask[I] = M == I ? I : (I + NumElts);
25994 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25995 if (M != I) {
25996 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25997 Demanded.setBit(M % NumElts);
25998 }
25999 }
26000 // TODO: Should we try to mask with N1 as well?
26001 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
26002 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
26003 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
26004 SDLoc DL(N);
26007 // Transform the type to a legal type so that the buildvector constant
26008 // elements are not illegal. Make sure that the result is larger than the
26009 // original type, incase the value is split into two (eg i64->i32).
26010 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
26011 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
26012 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
26013 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
26014 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
26015 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
26016 for (int I = 0; I != (int)NumElts; ++I)
26017 if (0 <= Mask[I])
26018 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
26019
26020 // See if a clear mask is legal instead of going via
26021 // XformToShuffleWithZero which loses UNDEF mask elements.
26022 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26023 return DAG.getBitcast(
26024 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26025 DAG.getConstant(0, DL, IntVT), ClearMask));
26026
26027 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26028 return DAG.getBitcast(
26029 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26030 DAG.getBuildVector(IntVT, DL, AndMask)));
26031 }
26032 }
26033 }
26034
26035 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26036 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26037 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26038 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26039 return Res;
26040
26041 // If this shuffle only has a single input that is a bitcasted shuffle,
26042 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26043 // back to their original types.
26044 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26045 N1.isUndef() && Level < AfterLegalizeVectorOps &&
26046 TLI.isTypeLegal(VT)) {
26047
26049 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26050 EVT SVT = VT.getScalarType();
26051 EVT InnerVT = BC0->getValueType(0);
26052 EVT InnerSVT = InnerVT.getScalarType();
26053
26054 // Determine which shuffle works with the smaller scalar type.
26055 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26056 EVT ScaleSVT = ScaleVT.getScalarType();
26057
26058 if (TLI.isTypeLegal(ScaleVT) &&
26059 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26060 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26061 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26062 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26063
26064 // Scale the shuffle masks to the smaller scalar type.
26065 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26066 SmallVector<int, 8> InnerMask;
26067 SmallVector<int, 8> OuterMask;
26068 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26069 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26070
26071 // Merge the shuffle masks.
26072 SmallVector<int, 8> NewMask;
26073 for (int M : OuterMask)
26074 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26075
26076 // Test for shuffle mask legality over both commutations.
26077 SDValue SV0 = BC0->getOperand(0);
26078 SDValue SV1 = BC0->getOperand(1);
26079 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26080 if (!LegalMask) {
26081 std::swap(SV0, SV1);
26083 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26084 }
26085
26086 if (LegalMask) {
26087 SV0 = DAG.getBitcast(ScaleVT, SV0);
26088 SV1 = DAG.getBitcast(ScaleVT, SV1);
26089 return DAG.getBitcast(
26090 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26091 }
26092 }
26093 }
26094 }
26095
26096 // Match shuffles of bitcasts, so long as the mask can be treated as the
26097 // larger type.
26098 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26099 return V;
26100
26101 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26102 // operand, and SV1 as the second operand.
26103 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26104 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26105 auto MergeInnerShuffle =
26106 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26107 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26108 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26109 SmallVectorImpl<int> &Mask) -> bool {
26110 // Don't try to fold splats; they're likely to simplify somehow, or they
26111 // might be free.
26112 if (OtherSVN->isSplat())
26113 return false;
26114
26115 SV0 = SV1 = SDValue();
26116 Mask.clear();
26117
26118 for (unsigned i = 0; i != NumElts; ++i) {
26119 int Idx = SVN->getMaskElt(i);
26120 if (Idx < 0) {
26121 // Propagate Undef.
26122 Mask.push_back(Idx);
26123 continue;
26124 }
26125
26126 if (Commute)
26127 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26128
26129 SDValue CurrentVec;
26130 if (Idx < (int)NumElts) {
26131 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26132 // shuffle mask to identify which vector is actually referenced.
26133 Idx = OtherSVN->getMaskElt(Idx);
26134 if (Idx < 0) {
26135 // Propagate Undef.
26136 Mask.push_back(Idx);
26137 continue;
26138 }
26139 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26140 : OtherSVN->getOperand(1);
26141 } else {
26142 // This shuffle index references an element within N1.
26143 CurrentVec = N1;
26144 }
26145
26146 // Simple case where 'CurrentVec' is UNDEF.
26147 if (CurrentVec.isUndef()) {
26148 Mask.push_back(-1);
26149 continue;
26150 }
26151
26152 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26153 // will be the first or second operand of the combined shuffle.
26154 Idx = Idx % NumElts;
26155 if (!SV0.getNode() || SV0 == CurrentVec) {
26156 // Ok. CurrentVec is the left hand side.
26157 // Update the mask accordingly.
26158 SV0 = CurrentVec;
26159 Mask.push_back(Idx);
26160 continue;
26161 }
26162 if (!SV1.getNode() || SV1 == CurrentVec) {
26163 // Ok. CurrentVec is the right hand side.
26164 // Update the mask accordingly.
26165 SV1 = CurrentVec;
26166 Mask.push_back(Idx + NumElts);
26167 continue;
26168 }
26169
26170 // Last chance - see if the vector is another shuffle and if it
26171 // uses one of the existing candidate shuffle ops.
26172 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26173 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26174 if (InnerIdx < 0) {
26175 Mask.push_back(-1);
26176 continue;
26177 }
26178 SDValue InnerVec = (InnerIdx < (int)NumElts)
26179 ? CurrentSVN->getOperand(0)
26180 : CurrentSVN->getOperand(1);
26181 if (InnerVec.isUndef()) {
26182 Mask.push_back(-1);
26183 continue;
26184 }
26185 InnerIdx %= NumElts;
26186 if (InnerVec == SV0) {
26187 Mask.push_back(InnerIdx);
26188 continue;
26189 }
26190 if (InnerVec == SV1) {
26191 Mask.push_back(InnerIdx + NumElts);
26192 continue;
26193 }
26194 }
26195
26196 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26197 return false;
26198 }
26199
26200 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26201 return true;
26202
26203 // Avoid introducing shuffles with illegal mask.
26204 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26205 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26206 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26207 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26208 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26209 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26210 if (TLI.isShuffleMaskLegal(Mask, VT))
26211 return true;
26212
26213 std::swap(SV0, SV1);
26215 return TLI.isShuffleMaskLegal(Mask, VT);
26216 };
26217
26218 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26219 // Canonicalize shuffles according to rules:
26220 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26221 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26222 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26223 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26225 // The incoming shuffle must be of the same type as the result of the
26226 // current shuffle.
26227 assert(N1->getOperand(0).getValueType() == VT &&
26228 "Shuffle types don't match");
26229
26230 SDValue SV0 = N1->getOperand(0);
26231 SDValue SV1 = N1->getOperand(1);
26232 bool HasSameOp0 = N0 == SV0;
26233 bool IsSV1Undef = SV1.isUndef();
26234 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26235 // Commute the operands of this shuffle so merging below will trigger.
26236 return DAG.getCommutedVectorShuffle(*SVN);
26237 }
26238
26239 // Canonicalize splat shuffles to the RHS to improve merging below.
26240 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26241 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26242 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26243 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26244 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26245 return DAG.getCommutedVectorShuffle(*SVN);
26246 }
26247
26248 // Try to fold according to rules:
26249 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26250 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26251 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26252 // Don't try to fold shuffles with illegal type.
26253 // Only fold if this shuffle is the only user of the other shuffle.
26254 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26255 for (int i = 0; i != 2; ++i) {
26256 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26257 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26258 // The incoming shuffle must be of the same type as the result of the
26259 // current shuffle.
26260 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26261 assert(OtherSV->getOperand(0).getValueType() == VT &&
26262 "Shuffle types don't match");
26263
26264 SDValue SV0, SV1;
26266 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26267 SV0, SV1, Mask)) {
26268 // Check if all indices in Mask are Undef. In case, propagate Undef.
26269 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26270 return DAG.getUNDEF(VT);
26271
26272 return DAG.getVectorShuffle(VT, SDLoc(N),
26273 SV0 ? SV0 : DAG.getUNDEF(VT),
26274 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26275 }
26276 }
26277 }
26278
26279 // Merge shuffles through binops if we are able to merge it with at least
26280 // one other shuffles.
26281 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26282 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26283 unsigned SrcOpcode = N0.getOpcode();
26284 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26285 (N1.isUndef() ||
26286 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26287 // Get binop source ops, or just pass on the undef.
26288 SDValue Op00 = N0.getOperand(0);
26289 SDValue Op01 = N0.getOperand(1);
26290 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26291 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26292 // TODO: We might be able to relax the VT check but we don't currently
26293 // have any isBinOp() that has different result/ops VTs so play safe until
26294 // we have test coverage.
26295 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26296 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26297 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26298 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26299 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26300 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26301 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26302 SmallVectorImpl<int> &Mask, bool LeftOp,
26303 bool Commute) {
26304 SDValue InnerN = Commute ? N1 : N0;
26305 SDValue Op0 = LeftOp ? Op00 : Op01;
26306 SDValue Op1 = LeftOp ? Op10 : Op11;
26307 if (Commute)
26308 std::swap(Op0, Op1);
26309 // Only accept the merged shuffle if we don't introduce undef elements,
26310 // or the inner shuffle already contained undef elements.
26311 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26312 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26313 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26314 Mask) &&
26315 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26316 llvm::none_of(Mask, [](int M) { return M < 0; }));
26317 };
26318
26319 // Ensure we don't increase the number of shuffles - we must merge a
26320 // shuffle from at least one of the LHS and RHS ops.
26321 bool MergedLeft = false;
26322 SDValue LeftSV0, LeftSV1;
26323 SmallVector<int, 4> LeftMask;
26324 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26325 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26326 MergedLeft = true;
26327 } else {
26328 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26329 LeftSV0 = Op00, LeftSV1 = Op10;
26330 }
26331
26332 bool MergedRight = false;
26333 SDValue RightSV0, RightSV1;
26334 SmallVector<int, 4> RightMask;
26335 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26336 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26337 MergedRight = true;
26338 } else {
26339 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26340 RightSV0 = Op01, RightSV1 = Op11;
26341 }
26342
26343 if (MergedLeft || MergedRight) {
26344 SDLoc DL(N);
26346 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26347 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26349 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26350 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26351 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26352 }
26353 }
26354 }
26355 }
26356
26357 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26358 return V;
26359
26360 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26361 // Perform this really late, because it could eliminate knowledge
26362 // of undef elements created by this shuffle.
26363 if (Level < AfterLegalizeTypes)
26364 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26365 LegalOperations))
26366 return V;
26367
26368 return SDValue();
26369}
26370
26371SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26372 EVT VT = N->getValueType(0);
26373 if (!VT.isFixedLengthVector())
26374 return SDValue();
26375
26376 // Try to convert a scalar binop with an extracted vector element to a vector
26377 // binop. This is intended to reduce potentially expensive register moves.
26378 // TODO: Check if both operands are extracted.
26379 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26380 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26381 SDValue Scalar = N->getOperand(0);
26382 unsigned Opcode = Scalar.getOpcode();
26383 EVT VecEltVT = VT.getScalarType();
26384 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26385 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26386 Scalar.getOperand(0).getValueType() == VecEltVT &&
26387 Scalar.getOperand(1).getValueType() == VecEltVT &&
26388 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26389 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26390 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26391 // Match an extract element and get a shuffle mask equivalent.
26392 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26393
26394 for (int i : {0, 1}) {
26395 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26396 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26397 SDValue EE = Scalar.getOperand(i);
26398 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26399 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26400 EE.getOperand(0).getValueType() == VT &&
26401 isa<ConstantSDNode>(EE.getOperand(1))) {
26402 // Mask = {ExtractIndex, undef, undef....}
26403 ShufMask[0] = EE.getConstantOperandVal(1);
26404 // Make sure the shuffle is legal if we are crossing lanes.
26405 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26406 SDLoc DL(N);
26407 SDValue V[] = {EE.getOperand(0),
26408 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26409 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26410 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26411 ShufMask);
26412 }
26413 }
26414 }
26415 }
26416
26417 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26418 // with a VECTOR_SHUFFLE and possible truncate.
26419 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26420 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26421 return SDValue();
26422
26423 // If we have an implicit truncate, truncate here if it is legal.
26424 if (VecEltVT != Scalar.getValueType() &&
26425 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26426 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26427 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26428 }
26429
26430 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26431 if (!ExtIndexC)
26432 return SDValue();
26433
26434 SDValue SrcVec = Scalar.getOperand(0);
26435 EVT SrcVT = SrcVec.getValueType();
26436 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26437 unsigned VTNumElts = VT.getVectorNumElements();
26438 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26439 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26440 SmallVector<int, 8> Mask(SrcNumElts, -1);
26441 Mask[0] = ExtIndexC->getZExtValue();
26442 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26443 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26444 if (!LegalShuffle)
26445 return SDValue();
26446
26447 // If the initial vector is the same size, the shuffle is the result.
26448 if (VT == SrcVT)
26449 return LegalShuffle;
26450
26451 // If not, shorten the shuffled vector.
26452 if (VTNumElts != SrcNumElts) {
26453 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26454 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26455 SrcVT.getVectorElementType(), VTNumElts);
26456 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26457 ZeroIdx);
26458 }
26459 }
26460
26461 return SDValue();
26462}
26463
26464SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26465 EVT VT = N->getValueType(0);
26466 SDValue N0 = N->getOperand(0);
26467 SDValue N1 = N->getOperand(1);
26468 SDValue N2 = N->getOperand(2);
26469 uint64_t InsIdx = N->getConstantOperandVal(2);
26470
26471 // If inserting an UNDEF, just return the original vector.
26472 if (N1.isUndef())
26473 return N0;
26474
26475 // If this is an insert of an extracted vector into an undef vector, we can
26476 // just use the input to the extract if the types match, and can simplify
26477 // in some cases even if they don't.
26478 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26479 N1.getOperand(1) == N2) {
26480 EVT SrcVT = N1.getOperand(0).getValueType();
26481 if (SrcVT == VT)
26482 return N1.getOperand(0);
26483 // TODO: To remove the zero check, need to adjust the offset to
26484 // a multiple of the new src type.
26485 if (isNullConstant(N2)) {
26486 if (VT.knownBitsGE(SrcVT) &&
26487 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
26488 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26489 VT, N0, N1.getOperand(0), N2);
26490 else if (VT.knownBitsLE(SrcVT) &&
26491 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
26493 VT, N1.getOperand(0), N2);
26494 }
26495 }
26496
26497 // Handle case where we've ended up inserting back into the source vector
26498 // we extracted the subvector from.
26499 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26500 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26501 N1.getOperand(1) == N2)
26502 return N0;
26503
26504 // Simplify scalar inserts into an undef vector:
26505 // insert_subvector undef, (splat X), N2 -> splat X
26506 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26507 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26508 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26509
26510 // If we are inserting a bitcast value into an undef, with the same
26511 // number of elements, just use the bitcast input of the extract.
26512 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26513 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26514 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26516 N1.getOperand(0).getOperand(1) == N2 &&
26518 VT.getVectorElementCount() &&
26520 VT.getSizeInBits()) {
26521 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26522 }
26523
26524 // If both N1 and N2 are bitcast values on which insert_subvector
26525 // would makes sense, pull the bitcast through.
26526 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26527 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26528 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26529 SDValue CN0 = N0.getOperand(0);
26530 SDValue CN1 = N1.getOperand(0);
26531 EVT CN0VT = CN0.getValueType();
26532 EVT CN1VT = CN1.getValueType();
26533 if (CN0VT.isVector() && CN1VT.isVector() &&
26534 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26536 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26537 CN0.getValueType(), CN0, CN1, N2);
26538 return DAG.getBitcast(VT, NewINSERT);
26539 }
26540 }
26541
26542 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26543 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26544 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26545 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26546 N0.getOperand(1).getValueType() == N1.getValueType() &&
26547 N0.getOperand(2) == N2)
26548 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26549 N1, N2);
26550
26551 // Eliminate an intermediate insert into an undef vector:
26552 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26553 // insert_subvector undef, X, 0
26554 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26555 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26556 isNullConstant(N2))
26557 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26558 N1.getOperand(1), N2);
26559
26560 // Push subvector bitcasts to the output, adjusting the index as we go.
26561 // insert_subvector(bitcast(v), bitcast(s), c1)
26562 // -> bitcast(insert_subvector(v, s, c2))
26563 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26564 N1.getOpcode() == ISD::BITCAST) {
26565 SDValue N0Src = peekThroughBitcasts(N0);
26566 SDValue N1Src = peekThroughBitcasts(N1);
26567 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26568 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26569 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26570 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26571 EVT NewVT;
26572 SDLoc DL(N);
26573 SDValue NewIdx;
26574 LLVMContext &Ctx = *DAG.getContext();
26575 ElementCount NumElts = VT.getVectorElementCount();
26576 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26577 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26578 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26579 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26580 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26581 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26582 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26583 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26584 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26585 NumElts.divideCoefficientBy(Scale));
26586 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26587 }
26588 }
26589 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26590 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26591 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26592 return DAG.getBitcast(VT, Res);
26593 }
26594 }
26595 }
26596
26597 // Canonicalize insert_subvector dag nodes.
26598 // Example:
26599 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26600 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26601 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26602 N1.getValueType() == N0.getOperand(1).getValueType()) {
26603 unsigned OtherIdx = N0.getConstantOperandVal(2);
26604 if (InsIdx < OtherIdx) {
26605 // Swap nodes.
26606 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26607 N0.getOperand(0), N1, N2);
26608 AddToWorklist(NewOp.getNode());
26609 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26610 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26611 }
26612 }
26613
26614 // If the input vector is a concatenation, and the insert replaces
26615 // one of the pieces, we can optimize into a single concat_vectors.
26616 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26617 N0.getOperand(0).getValueType() == N1.getValueType() &&
26620 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26621 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26622 Ops[InsIdx / Factor] = N1;
26623 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26624 }
26625
26626 // Simplify source operands based on insertion.
26628 return SDValue(N, 0);
26629
26630 return SDValue();
26631}
26632
26633SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26634 SDValue N0 = N->getOperand(0);
26635
26636 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26637 if (N0->getOpcode() == ISD::FP16_TO_FP)
26638 return N0->getOperand(0);
26639
26640 return SDValue();
26641}
26642
26643SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26644 auto Op = N->getOpcode();
26646 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26647 SDValue N0 = N->getOperand(0);
26648
26649 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26650 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26651 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26653 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26654 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26655 }
26656 }
26657
26658 // Sometimes constants manage to survive very late in the pipeline, e.g.,
26659 // because they are wrapped inside the <1 x f16> type. Try one last time to
26660 // get rid of them.
26661 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
26662 N->getValueType(0), {N0});
26663 return Folded;
26664}
26665
26666SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26667 SDValue N0 = N->getOperand(0);
26668
26669 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26670 if (N0->getOpcode() == ISD::BF16_TO_FP)
26671 return N0->getOperand(0);
26672
26673 return SDValue();
26674}
26675
26676SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26677 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26678 return visitFP16_TO_FP(N);
26679}
26680
26681SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26682 SDValue N0 = N->getOperand(0);
26683 EVT VT = N0.getValueType();
26684 unsigned Opcode = N->getOpcode();
26685
26686 // VECREDUCE over 1-element vector is just an extract.
26687 if (VT.getVectorElementCount().isScalar()) {
26688 SDLoc dl(N);
26689 SDValue Res =
26691 DAG.getVectorIdxConstant(0, dl));
26692 if (Res.getValueType() != N->getValueType(0))
26693 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26694 return Res;
26695 }
26696
26697 // On an boolean vector an and/or reduction is the same as a umin/umax
26698 // reduction. Convert them if the latter is legal while the former isn't.
26699 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26700 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26702 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26703 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26705 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26706 }
26707
26708 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26709 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26710 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26711 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26712 SDValue Vec = N0.getOperand(0);
26713 SDValue Subvec = N0.getOperand(1);
26714 if ((Opcode == ISD::VECREDUCE_OR &&
26715 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26716 (Opcode == ISD::VECREDUCE_AND &&
26717 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26718 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26719 }
26720
26721 return SDValue();
26722}
26723
26724SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26725 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26726
26727 // FSUB -> FMA combines:
26728 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26729 AddToWorklist(Fused.getNode());
26730 return Fused;
26731 }
26732 return SDValue();
26733}
26734
26735SDValue DAGCombiner::visitVPOp(SDNode *N) {
26736
26737 if (N->getOpcode() == ISD::VP_GATHER)
26738 if (SDValue SD = visitVPGATHER(N))
26739 return SD;
26740
26741 if (N->getOpcode() == ISD::VP_SCATTER)
26742 if (SDValue SD = visitVPSCATTER(N))
26743 return SD;
26744
26745 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26746 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26747 return SD;
26748
26749 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26750 if (SDValue SD = visitVP_STRIDED_STORE(N))
26751 return SD;
26752
26753 // VP operations in which all vector elements are disabled - either by
26754 // determining that the mask is all false or that the EVL is 0 - can be
26755 // eliminated.
26756 bool AreAllEltsDisabled = false;
26757 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26758 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26759 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26760 AreAllEltsDisabled |=
26761 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26762
26763 // This is the only generic VP combine we support for now.
26764 if (!AreAllEltsDisabled) {
26765 switch (N->getOpcode()) {
26766 case ISD::VP_FADD:
26767 return visitVP_FADD(N);
26768 case ISD::VP_FSUB:
26769 return visitVP_FSUB(N);
26770 case ISD::VP_FMA:
26771 return visitFMA<VPMatchContext>(N);
26772 case ISD::VP_SELECT:
26773 return visitVP_SELECT(N);
26774 case ISD::VP_MUL:
26775 return visitMUL<VPMatchContext>(N);
26776 default:
26777 break;
26778 }
26779 return SDValue();
26780 }
26781
26782 // Binary operations can be replaced by UNDEF.
26783 if (ISD::isVPBinaryOp(N->getOpcode()))
26784 return DAG.getUNDEF(N->getValueType(0));
26785
26786 // VP Memory operations can be replaced by either the chain (stores) or the
26787 // chain + undef (loads).
26788 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26789 if (MemSD->writeMem())
26790 return MemSD->getChain();
26791 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26792 }
26793
26794 // Reduction operations return the start operand when no elements are active.
26795 if (ISD::isVPReduction(N->getOpcode()))
26796 return N->getOperand(0);
26797
26798 return SDValue();
26799}
26800
26801SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26802 SDValue Chain = N->getOperand(0);
26803 SDValue Ptr = N->getOperand(1);
26804 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26805
26806 // Check if the memory, where FP state is written to, is used only in a single
26807 // load operation.
26808 LoadSDNode *LdNode = nullptr;
26809 for (auto *U : Ptr->uses()) {
26810 if (U == N)
26811 continue;
26812 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26813 if (LdNode && LdNode != Ld)
26814 return SDValue();
26815 LdNode = Ld;
26816 continue;
26817 }
26818 return SDValue();
26819 }
26820 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26821 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26823 return SDValue();
26824
26825 // Check if the loaded value is used only in a store operation.
26826 StoreSDNode *StNode = nullptr;
26827 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26828 SDUse &U = I.getUse();
26829 if (U.getResNo() == 0) {
26830 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26831 if (StNode)
26832 return SDValue();
26833 StNode = St;
26834 } else {
26835 return SDValue();
26836 }
26837 }
26838 }
26839 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26840 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26841 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26842 return SDValue();
26843
26844 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26845 // environment.
26846 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26847 StNode->getMemOperand());
26848 CombineTo(StNode, Res, false);
26849 return Res;
26850}
26851
26852SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26853 SDValue Chain = N->getOperand(0);
26854 SDValue Ptr = N->getOperand(1);
26855 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26856
26857 // Check if the address of FP state is used also in a store operation only.
26858 StoreSDNode *StNode = nullptr;
26859 for (auto *U : Ptr->uses()) {
26860 if (U == N)
26861 continue;
26862 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26863 if (StNode && StNode != St)
26864 return SDValue();
26865 StNode = St;
26866 continue;
26867 }
26868 return SDValue();
26869 }
26870 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26871 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26872 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26873 return SDValue();
26874
26875 // Check if the stored value is loaded from some location and the loaded
26876 // value is used only in the store operation.
26877 SDValue StValue = StNode->getValue();
26878 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26879 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26880 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26881 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26882 return SDValue();
26883
26884 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26885 // environment.
26886 SDValue Res =
26887 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26888 LdNode->getMemOperand());
26889 return Res;
26890}
26891
26892/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26893/// with the destination vector and a zero vector.
26894/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26895/// vector_shuffle V, Zero, <0, 4, 2, 4>
26896SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26897 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26898
26899 EVT VT = N->getValueType(0);
26900 SDValue LHS = N->getOperand(0);
26901 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26902 SDLoc DL(N);
26903
26904 // Make sure we're not running after operation legalization where it
26905 // may have custom lowered the vector shuffles.
26906 if (LegalOperations)
26907 return SDValue();
26908
26909 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26910 return SDValue();
26911
26912 EVT RVT = RHS.getValueType();
26913 unsigned NumElts = RHS.getNumOperands();
26914
26915 // Attempt to create a valid clear mask, splitting the mask into
26916 // sub elements and checking to see if each is
26917 // all zeros or all ones - suitable for shuffle masking.
26918 auto BuildClearMask = [&](int Split) {
26919 int NumSubElts = NumElts * Split;
26920 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26921
26922 SmallVector<int, 8> Indices;
26923 for (int i = 0; i != NumSubElts; ++i) {
26924 int EltIdx = i / Split;
26925 int SubIdx = i % Split;
26926 SDValue Elt = RHS.getOperand(EltIdx);
26927 // X & undef --> 0 (not undef). So this lane must be converted to choose
26928 // from the zero constant vector (same as if the element had all 0-bits).
26929 if (Elt.isUndef()) {
26930 Indices.push_back(i + NumSubElts);
26931 continue;
26932 }
26933
26934 APInt Bits;
26935 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26936 Bits = Cst->getAPIntValue();
26937 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26938 Bits = CstFP->getValueAPF().bitcastToAPInt();
26939 else
26940 return SDValue();
26941
26942 // Extract the sub element from the constant bit mask.
26943 if (DAG.getDataLayout().isBigEndian())
26944 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26945 else
26946 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26947
26948 if (Bits.isAllOnes())
26949 Indices.push_back(i);
26950 else if (Bits == 0)
26951 Indices.push_back(i + NumSubElts);
26952 else
26953 return SDValue();
26954 }
26955
26956 // Let's see if the target supports this vector_shuffle.
26957 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26958 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26959 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26960 return SDValue();
26961
26962 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26963 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26964 DAG.getBitcast(ClearVT, LHS),
26965 Zero, Indices));
26966 };
26967
26968 // Determine maximum split level (byte level masking).
26969 int MaxSplit = 1;
26970 if (RVT.getScalarSizeInBits() % 8 == 0)
26971 MaxSplit = RVT.getScalarSizeInBits() / 8;
26972
26973 for (int Split = 1; Split <= MaxSplit; ++Split)
26974 if (RVT.getScalarSizeInBits() % Split == 0)
26975 if (SDValue S = BuildClearMask(Split))
26976 return S;
26977
26978 return SDValue();
26979}
26980
26981/// If a vector binop is performed on splat values, it may be profitable to
26982/// extract, scalarize, and insert/splat.
26984 const SDLoc &DL) {
26985 SDValue N0 = N->getOperand(0);
26986 SDValue N1 = N->getOperand(1);
26987 unsigned Opcode = N->getOpcode();
26988 EVT VT = N->getValueType(0);
26989 EVT EltVT = VT.getVectorElementType();
26990 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26991
26992 // TODO: Remove/replace the extract cost check? If the elements are available
26993 // as scalars, then there may be no extract cost. Should we ask if
26994 // inserting a scalar back into a vector is cheap instead?
26995 int Index0, Index1;
26996 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26997 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26998 // Extract element from splat_vector should be free.
26999 // TODO: use DAG.isSplatValue instead?
27000 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
27002 if (!Src0 || !Src1 || Index0 != Index1 ||
27003 Src0.getValueType().getVectorElementType() != EltVT ||
27004 Src1.getValueType().getVectorElementType() != EltVT ||
27005 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27006 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
27007 return SDValue();
27008
27009 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27010 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
27011 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
27012 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27013
27014 // If all lanes but 1 are undefined, no need to splat the scalar result.
27015 // TODO: Keep track of undefs and use that info in the general case.
27016 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27017 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
27018 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
27019 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27020 // build_vec ..undef, (bo X, Y), undef...
27022 Ops[Index0] = ScalarBO;
27023 return DAG.getBuildVector(VT, DL, Ops);
27024 }
27025
27026 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27027 return DAG.getSplat(VT, DL, ScalarBO);
27028}
27029
27030/// Visit a vector cast operation, like FP_EXTEND.
27031SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27032 EVT VT = N->getValueType(0);
27033 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27034 EVT EltVT = VT.getVectorElementType();
27035 unsigned Opcode = N->getOpcode();
27036
27037 SDValue N0 = N->getOperand(0);
27038 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27039
27040 // TODO: promote operation might be also good here?
27041 int Index0;
27042 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27043 if (Src0 &&
27044 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27045 TLI.isExtractVecEltCheap(VT, Index0)) &&
27046 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27047 TLI.preferScalarizeSplat(N)) {
27048 EVT SrcVT = N0.getValueType();
27049 EVT SrcEltVT = SrcVT.getVectorElementType();
27050 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27051 SDValue Elt =
27052 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27053 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27054 if (VT.isScalableVector())
27055 return DAG.getSplatVector(VT, DL, ScalarBO);
27057 return DAG.getBuildVector(VT, DL, Ops);
27058 }
27059
27060 return SDValue();
27061}
27062
27063/// Visit a binary vector operation, like ADD.
27064SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27065 EVT VT = N->getValueType(0);
27066 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27067
27068 SDValue LHS = N->getOperand(0);
27069 SDValue RHS = N->getOperand(1);
27070 unsigned Opcode = N->getOpcode();
27071 SDNodeFlags Flags = N->getFlags();
27072
27073 // Move unary shuffles with identical masks after a vector binop:
27074 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27075 // --> shuffle (VBinOp A, B), Undef, Mask
27076 // This does not require type legality checks because we are creating the
27077 // same types of operations that are in the original sequence. We do have to
27078 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27079 // though. This code is adapted from the identical transform in instcombine.
27080 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27081 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27082 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27083 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27084 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27085 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27086 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27087 RHS.getOperand(0), Flags);
27088 SDValue UndefV = LHS.getOperand(1);
27089 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27090 }
27091
27092 // Try to sink a splat shuffle after a binop with a uniform constant.
27093 // This is limited to cases where neither the shuffle nor the constant have
27094 // undefined elements because that could be poison-unsafe or inhibit
27095 // demanded elements analysis. It is further limited to not change a splat
27096 // of an inserted scalar because that may be optimized better by
27097 // load-folding or other target-specific behaviors.
27098 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27099 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27100 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27101 // binop (splat X), (splat C) --> splat (binop X, C)
27102 SDValue X = Shuf0->getOperand(0);
27103 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27104 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27105 Shuf0->getMask());
27106 }
27107 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27108 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27109 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27110 // binop (splat C), (splat X) --> splat (binop C, X)
27111 SDValue X = Shuf1->getOperand(0);
27112 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27113 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27114 Shuf1->getMask());
27115 }
27116 }
27117
27118 // The following pattern is likely to emerge with vector reduction ops. Moving
27119 // the binary operation ahead of insertion may allow using a narrower vector
27120 // instruction that has better performance than the wide version of the op:
27121 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27122 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27123 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27124 LHS.getOperand(2) == RHS.getOperand(2) &&
27125 (LHS.hasOneUse() || RHS.hasOneUse())) {
27126 SDValue X = LHS.getOperand(1);
27127 SDValue Y = RHS.getOperand(1);
27128 SDValue Z = LHS.getOperand(2);
27129 EVT NarrowVT = X.getValueType();
27130 if (NarrowVT == Y.getValueType() &&
27131 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27132 LegalOperations)) {
27133 // (binop undef, undef) may not return undef, so compute that result.
27134 SDValue VecC =
27135 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27136 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27137 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27138 }
27139 }
27140
27141 // Make sure all but the first op are undef or constant.
27142 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27143 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27144 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27145 return Op.isUndef() ||
27146 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27147 });
27148 };
27149
27150 // The following pattern is likely to emerge with vector reduction ops. Moving
27151 // the binary operation ahead of the concat may allow using a narrower vector
27152 // instruction that has better performance than the wide version of the op:
27153 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27154 // concat (VBinOp X, Y), VecC
27155 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27156 (LHS.hasOneUse() || RHS.hasOneUse())) {
27157 EVT NarrowVT = LHS.getOperand(0).getValueType();
27158 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27159 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27160 unsigned NumOperands = LHS.getNumOperands();
27161 SmallVector<SDValue, 4> ConcatOps;
27162 for (unsigned i = 0; i != NumOperands; ++i) {
27163 // This constant fold for operands 1 and up.
27164 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27165 RHS.getOperand(i)));
27166 }
27167
27168 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27169 }
27170 }
27171
27172 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
27173 return V;
27174
27175 return SDValue();
27176}
27177
27178SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27179 SDValue N2) {
27180 assert(N0.getOpcode() == ISD::SETCC &&
27181 "First argument must be a SetCC node!");
27182
27183 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27184 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27185
27186 // If we got a simplified select_cc node back from SimplifySelectCC, then
27187 // break it down into a new SETCC node, and a new SELECT node, and then return
27188 // the SELECT node, since we were called with a SELECT node.
27189 if (SCC.getNode()) {
27190 // Check to see if we got a select_cc back (to turn into setcc/select).
27191 // Otherwise, just return whatever node we got back, like fabs.
27192 if (SCC.getOpcode() == ISD::SELECT_CC) {
27193 const SDNodeFlags Flags = N0->getFlags();
27195 N0.getValueType(),
27196 SCC.getOperand(0), SCC.getOperand(1),
27197 SCC.getOperand(4), Flags);
27198 AddToWorklist(SETCC.getNode());
27199 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27200 SCC.getOperand(2), SCC.getOperand(3));
27201 SelectNode->setFlags(Flags);
27202 return SelectNode;
27203 }
27204
27205 return SCC;
27206 }
27207 return SDValue();
27208}
27209
27210/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27211/// being selected between, see if we can simplify the select. Callers of this
27212/// should assume that TheSelect is deleted if this returns true. As such, they
27213/// should return the appropriate thing (e.g. the node) back to the top-level of
27214/// the DAG combiner loop to avoid it being looked at.
27215bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27216 SDValue RHS) {
27217 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27218 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27219 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27220 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27221 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27222 SDValue Sqrt = RHS;
27224 SDValue CmpLHS;
27225 const ConstantFPSDNode *Zero = nullptr;
27226
27227 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27228 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27229 CmpLHS = TheSelect->getOperand(0);
27230 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27231 } else {
27232 // SELECT or VSELECT
27233 SDValue Cmp = TheSelect->getOperand(0);
27234 if (Cmp.getOpcode() == ISD::SETCC) {
27235 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27236 CmpLHS = Cmp.getOperand(0);
27237 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27238 }
27239 }
27240 if (Zero && Zero->isZero() &&
27241 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27242 CC == ISD::SETULT || CC == ISD::SETLT)) {
27243 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27244 CombineTo(TheSelect, Sqrt);
27245 return true;
27246 }
27247 }
27248 }
27249 // Cannot simplify select with vector condition
27250 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27251
27252 // If this is a select from two identical things, try to pull the operation
27253 // through the select.
27254 if (LHS.getOpcode() != RHS.getOpcode() ||
27255 !LHS.hasOneUse() || !RHS.hasOneUse())
27256 return false;
27257
27258 // If this is a load and the token chain is identical, replace the select
27259 // of two loads with a load through a select of the address to load from.
27260 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27261 // constants have been dropped into the constant pool.
27262 if (LHS.getOpcode() == ISD::LOAD) {
27263 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27264 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27265
27266 // Token chains must be identical.
27267 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27268 // Do not let this transformation reduce the number of volatile loads.
27269 // Be conservative for atomics for the moment
27270 // TODO: This does appear to be legal for unordered atomics (see D66309)
27271 !LLD->isSimple() || !RLD->isSimple() ||
27272 // FIXME: If either is a pre/post inc/dec load,
27273 // we'd need to split out the address adjustment.
27274 LLD->isIndexed() || RLD->isIndexed() ||
27275 // If this is an EXTLOAD, the VT's must match.
27276 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27277 // If this is an EXTLOAD, the kind of extension must match.
27278 (LLD->getExtensionType() != RLD->getExtensionType() &&
27279 // The only exception is if one of the extensions is anyext.
27280 LLD->getExtensionType() != ISD::EXTLOAD &&
27281 RLD->getExtensionType() != ISD::EXTLOAD) ||
27282 // FIXME: this discards src value information. This is
27283 // over-conservative. It would be beneficial to be able to remember
27284 // both potential memory locations. Since we are discarding
27285 // src value info, don't do the transformation if the memory
27286 // locations are not in the default address space.
27287 LLD->getPointerInfo().getAddrSpace() != 0 ||
27288 RLD->getPointerInfo().getAddrSpace() != 0 ||
27289 // We can't produce a CMOV of a TargetFrameIndex since we won't
27290 // generate the address generation required.
27293 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27294 LLD->getBasePtr().getValueType()))
27295 return false;
27296
27297 // The loads must not depend on one another.
27298 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27299 return false;
27300
27301 // Check that the select condition doesn't reach either load. If so,
27302 // folding this will induce a cycle into the DAG. If not, this is safe to
27303 // xform, so create a select of the addresses.
27304
27307
27308 // Always fail if LLD and RLD are not independent. TheSelect is a
27309 // predecessor to all Nodes in question so we need not search past it.
27310
27311 Visited.insert(TheSelect);
27312 Worklist.push_back(LLD);
27313 Worklist.push_back(RLD);
27314
27315 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27316 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27317 return false;
27318
27319 SDValue Addr;
27320 if (TheSelect->getOpcode() == ISD::SELECT) {
27321 // We cannot do this optimization if any pair of {RLD, LLD} is a
27322 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27323 // Loads, we only need to check if CondNode is a successor to one of the
27324 // loads. We can further avoid this if there's no use of their chain
27325 // value.
27326 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27327 Worklist.push_back(CondNode);
27328
27329 if ((LLD->hasAnyUseOfValue(1) &&
27330 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27331 (RLD->hasAnyUseOfValue(1) &&
27332 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27333 return false;
27334
27335 Addr = DAG.getSelect(SDLoc(TheSelect),
27336 LLD->getBasePtr().getValueType(),
27337 TheSelect->getOperand(0), LLD->getBasePtr(),
27338 RLD->getBasePtr());
27339 } else { // Otherwise SELECT_CC
27340 // We cannot do this optimization if any pair of {RLD, LLD} is a
27341 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27342 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27343 // one of the loads. We can further avoid this if there's no use of their
27344 // chain value.
27345
27346 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27347 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27348 Worklist.push_back(CondLHS);
27349 Worklist.push_back(CondRHS);
27350
27351 if ((LLD->hasAnyUseOfValue(1) &&
27352 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27353 (RLD->hasAnyUseOfValue(1) &&
27354 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27355 return false;
27356
27357 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27358 LLD->getBasePtr().getValueType(),
27359 TheSelect->getOperand(0),
27360 TheSelect->getOperand(1),
27361 LLD->getBasePtr(), RLD->getBasePtr(),
27362 TheSelect->getOperand(4));
27363 }
27364
27365 SDValue Load;
27366 // It is safe to replace the two loads if they have different alignments,
27367 // but the new load must be the minimum (most restrictive) alignment of the
27368 // inputs.
27369 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27370 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27371 if (!RLD->isInvariant())
27372 MMOFlags &= ~MachineMemOperand::MOInvariant;
27373 if (!RLD->isDereferenceable())
27374 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27375 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27376 // FIXME: Discards pointer and AA info.
27377 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27378 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27379 MMOFlags);
27380 } else {
27381 // FIXME: Discards pointer and AA info.
27382 Load = DAG.getExtLoad(
27384 : LLD->getExtensionType(),
27385 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27386 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27387 }
27388
27389 // Users of the select now use the result of the load.
27390 CombineTo(TheSelect, Load);
27391
27392 // Users of the old loads now use the new load's chain. We know the
27393 // old-load value is dead now.
27394 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27395 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27396 return true;
27397 }
27398
27399 return false;
27400}
27401
27402/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27403/// bitwise 'and'.
27404SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27405 SDValue N1, SDValue N2, SDValue N3,
27406 ISD::CondCode CC) {
27407 // If this is a select where the false operand is zero and the compare is a
27408 // check of the sign bit, see if we can perform the "gzip trick":
27409 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27410 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27411 EVT XType = N0.getValueType();
27412 EVT AType = N2.getValueType();
27413 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27414 return SDValue();
27415
27416 // If the comparison is testing for a positive value, we have to invert
27417 // the sign bit mask, so only do that transform if the target has a bitwise
27418 // 'and not' instruction (the invert is free).
27419 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27420 // (X > -1) ? A : 0
27421 // (X > 0) ? X : 0 <-- This is canonical signed max.
27422 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27423 return SDValue();
27424 } else if (CC == ISD::SETLT) {
27425 // (X < 0) ? A : 0
27426 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27427 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27428 return SDValue();
27429 } else {
27430 return SDValue();
27431 }
27432
27433 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27434 // constant.
27435 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27436 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27437 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27438 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27439 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
27440 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27441 AddToWorklist(Shift.getNode());
27442
27443 if (XType.bitsGT(AType)) {
27444 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27445 AddToWorklist(Shift.getNode());
27446 }
27447
27448 if (CC == ISD::SETGT)
27449 Shift = DAG.getNOT(DL, Shift, AType);
27450
27451 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27452 }
27453 }
27454
27455 unsigned ShCt = XType.getSizeInBits() - 1;
27456 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27457 return SDValue();
27458
27459 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
27460 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27461 AddToWorklist(Shift.getNode());
27462
27463 if (XType.bitsGT(AType)) {
27464 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27465 AddToWorklist(Shift.getNode());
27466 }
27467
27468 if (CC == ISD::SETGT)
27469 Shift = DAG.getNOT(DL, Shift, AType);
27470
27471 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27472}
27473
27474// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27475SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27476 SDValue N0 = N->getOperand(0);
27477 SDValue N1 = N->getOperand(1);
27478 SDValue N2 = N->getOperand(2);
27479 SDLoc DL(N);
27480
27481 unsigned BinOpc = N1.getOpcode();
27482 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27483 (N1.getResNo() != N2.getResNo()))
27484 return SDValue();
27485
27486 // The use checks are intentionally on SDNode because we may be dealing
27487 // with opcodes that produce more than one SDValue.
27488 // TODO: Do we really need to check N0 (the condition operand of the select)?
27489 // But removing that clause could cause an infinite loop...
27490 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27491 return SDValue();
27492
27493 // Binops may include opcodes that return multiple values, so all values
27494 // must be created/propagated from the newly created binops below.
27495 SDVTList OpVTs = N1->getVTList();
27496
27497 // Fold select(cond, binop(x, y), binop(z, y))
27498 // --> binop(select(cond, x, z), y)
27499 if (N1.getOperand(1) == N2.getOperand(1)) {
27500 SDValue N10 = N1.getOperand(0);
27501 SDValue N20 = N2.getOperand(0);
27502 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27503 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27504 NewBinOp->setFlags(N1->getFlags());
27505 NewBinOp->intersectFlagsWith(N2->getFlags());
27506 return SDValue(NewBinOp.getNode(), N1.getResNo());
27507 }
27508
27509 // Fold select(cond, binop(x, y), binop(x, z))
27510 // --> binop(x, select(cond, y, z))
27511 if (N1.getOperand(0) == N2.getOperand(0)) {
27512 SDValue N11 = N1.getOperand(1);
27513 SDValue N21 = N2.getOperand(1);
27514 // Second op VT might be different (e.g. shift amount type)
27515 if (N11.getValueType() == N21.getValueType()) {
27516 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27517 SDValue NewBinOp =
27518 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27519 NewBinOp->setFlags(N1->getFlags());
27520 NewBinOp->intersectFlagsWith(N2->getFlags());
27521 return SDValue(NewBinOp.getNode(), N1.getResNo());
27522 }
27523 }
27524
27525 // TODO: Handle isCommutativeBinOp patterns as well?
27526 return SDValue();
27527}
27528
27529// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27530SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27531 SDValue N0 = N->getOperand(0);
27532 EVT VT = N->getValueType(0);
27533 bool IsFabs = N->getOpcode() == ISD::FABS;
27534 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27535
27536 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27537 return SDValue();
27538
27539 SDValue Int = N0.getOperand(0);
27540 EVT IntVT = Int.getValueType();
27541
27542 // The operand to cast should be integer.
27543 if (!IntVT.isInteger() || IntVT.isVector())
27544 return SDValue();
27545
27546 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27547 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27548 APInt SignMask;
27549 if (N0.getValueType().isVector()) {
27550 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27551 // 0x7f...) per element and splat it.
27553 if (IsFabs)
27554 SignMask = ~SignMask;
27555 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27556 } else {
27557 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27558 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27559 if (IsFabs)
27560 SignMask = ~SignMask;
27561 }
27562 SDLoc DL(N0);
27563 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27564 DAG.getConstant(SignMask, DL, IntVT));
27565 AddToWorklist(Int.getNode());
27566 return DAG.getBitcast(VT, Int);
27567}
27568
27569/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27570/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27571/// in it. This may be a win when the constant is not otherwise available
27572/// because it replaces two constant pool loads with one.
27573SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27574 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27575 ISD::CondCode CC) {
27577 return SDValue();
27578
27579 // If we are before legalize types, we want the other legalization to happen
27580 // first (for example, to avoid messing with soft float).
27581 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27582 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27583 EVT VT = N2.getValueType();
27584 if (!TV || !FV || !TLI.isTypeLegal(VT))
27585 return SDValue();
27586
27587 // If a constant can be materialized without loads, this does not make sense.
27589 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27590 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27591 return SDValue();
27592
27593 // If both constants have multiple uses, then we won't need to do an extra
27594 // load. The values are likely around in registers for other users.
27595 if (!TV->hasOneUse() && !FV->hasOneUse())
27596 return SDValue();
27597
27598 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27599 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27600 Type *FPTy = Elts[0]->getType();
27601 const DataLayout &TD = DAG.getDataLayout();
27602
27603 // Create a ConstantArray of the two constants.
27604 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27605 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27606 TD.getPrefTypeAlign(FPTy));
27607 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27608
27609 // Get offsets to the 0 and 1 elements of the array, so we can select between
27610 // them.
27611 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27612 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27613 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27614 SDValue Cond =
27615 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27616 AddToWorklist(Cond.getNode());
27617 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27618 AddToWorklist(CstOffset.getNode());
27619 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27620 AddToWorklist(CPIdx.getNode());
27621 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27623 DAG.getMachineFunction()), Alignment);
27624}
27625
27626/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27627/// where 'cond' is the comparison specified by CC.
27628SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27630 bool NotExtCompare) {
27631 // (x ? y : y) -> y.
27632 if (N2 == N3) return N2;
27633
27634 EVT CmpOpVT = N0.getValueType();
27635 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27636 EVT VT = N2.getValueType();
27637 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27638 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27639 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27640
27641 // Determine if the condition we're dealing with is constant.
27642 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27643 AddToWorklist(SCC.getNode());
27644 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27645 // fold select_cc true, x, y -> x
27646 // fold select_cc false, x, y -> y
27647 return !(SCCC->isZero()) ? N2 : N3;
27648 }
27649 }
27650
27651 if (SDValue V =
27652 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27653 return V;
27654
27655 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27656 return V;
27657
27658 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27659 // where y is has a single bit set.
27660 // A plaintext description would be, we can turn the SELECT_CC into an AND
27661 // when the condition can be materialized as an all-ones register. Any
27662 // single bit-test can be materialized as an all-ones register with
27663 // shift-left and shift-right-arith.
27664 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27665 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27666 SDValue AndLHS = N0->getOperand(0);
27667 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27668 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27669 // Shift the tested bit over the sign bit.
27670 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27671 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27672 unsigned ShCt = AndMask.getBitWidth() - 1;
27673 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
27674 SDLoc(AndLHS));
27675 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27676
27677 // Now arithmetic right shift it all the way over, so the result is
27678 // either all-ones, or zero.
27679 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
27680 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27681
27682 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27683 }
27684 }
27685 }
27686
27687 // fold select C, 16, 0 -> shl C, 4
27688 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27689 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27690
27691 if ((Fold || Swap) &&
27692 TLI.getBooleanContents(CmpOpVT) ==
27694 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27695
27696 if (Swap) {
27697 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27698 std::swap(N2C, N3C);
27699 }
27700
27701 // If the caller doesn't want us to simplify this into a zext of a compare,
27702 // don't do it.
27703 if (NotExtCompare && N2C->isOne())
27704 return SDValue();
27705
27706 SDValue Temp, SCC;
27707 // zext (setcc n0, n1)
27708 if (LegalTypes) {
27709 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27710 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27711 } else {
27712 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27713 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27714 }
27715
27716 AddToWorklist(SCC.getNode());
27717 AddToWorklist(Temp.getNode());
27718
27719 if (N2C->isOne())
27720 return Temp;
27721
27722 unsigned ShCt = N2C->getAPIntValue().logBase2();
27723 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27724 return SDValue();
27725
27726 // shl setcc result by log2 n2c
27727 return DAG.getNode(
27728 ISD::SHL, DL, N2.getValueType(), Temp,
27729 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
27730 }
27731
27732 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27733 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27734 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27735 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27736 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27737 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27738 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27739 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27740 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27741 SDValue ValueOnZero = N2;
27742 SDValue Count = N3;
27743 // If the condition is NE instead of E, swap the operands.
27744 if (CC == ISD::SETNE)
27745 std::swap(ValueOnZero, Count);
27746 // Check if the value on zero is a constant equal to the bits in the type.
27747 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27748 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27749 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27750 // legal, combine to just cttz.
27751 if ((Count.getOpcode() == ISD::CTTZ ||
27752 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27753 N0 == Count.getOperand(0) &&
27754 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27755 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27756 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27757 // legal, combine to just ctlz.
27758 if ((Count.getOpcode() == ISD::CTLZ ||
27759 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27760 N0 == Count.getOperand(0) &&
27761 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27762 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27763 }
27764 }
27765 }
27766
27767 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27768 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27769 if (!NotExtCompare && N1C && N2C && N3C &&
27770 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27771 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27772 (N1C->isZero() && CC == ISD::SETLT)) &&
27773 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27774 SDValue ASR = DAG.getNode(
27775 ISD::SRA, DL, CmpOpVT, N0,
27776 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27777 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27778 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27779 }
27780
27781 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27782 return S;
27783 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27784 return S;
27785
27786 return SDValue();
27787}
27788
27789/// This is a stub for TargetLowering::SimplifySetCC.
27790SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27791 ISD::CondCode Cond, const SDLoc &DL,
27792 bool foldBooleans) {
27794 DagCombineInfo(DAG, Level, false, this);
27795 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27796}
27797
27798/// Given an ISD::SDIV node expressing a divide by constant, return
27799/// a DAG expression to select that will generate the same value by multiplying
27800/// by a magic number.
27801/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27802SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27803 // when optimising for minimum size, we don't want to expand a div to a mul
27804 // and a shift.
27806 return SDValue();
27807
27809 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27810 for (SDNode *N : Built)
27811 AddToWorklist(N);
27812 return S;
27813 }
27814
27815 return SDValue();
27816}
27817
27818/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27819/// DAG expression that will generate the same value by right shifting.
27820SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27821 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27822 if (!C)
27823 return SDValue();
27824
27825 // Avoid division by zero.
27826 if (C->isZero())
27827 return SDValue();
27828
27830 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27831 for (SDNode *N : Built)
27832 AddToWorklist(N);
27833 return S;
27834 }
27835
27836 return SDValue();
27837}
27838
27839/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27840/// expression that will generate the same value by multiplying by a magic
27841/// number.
27842/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27843SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27844 // when optimising for minimum size, we don't want to expand a div to a mul
27845 // and a shift.
27847 return SDValue();
27848
27850 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27851 for (SDNode *N : Built)
27852 AddToWorklist(N);
27853 return S;
27854 }
27855
27856 return SDValue();
27857}
27858
27859/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27860/// return a DAG expression that will generate the same value.
27861SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27862 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27863 if (!C)
27864 return SDValue();
27865
27866 // Avoid division by zero.
27867 if (C->isZero())
27868 return SDValue();
27869
27871 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27872 for (SDNode *N : Built)
27873 AddToWorklist(N);
27874 return S;
27875 }
27876
27877 return SDValue();
27878}
27879
27880// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27881//
27882// Returns the node that represents `Log2(Op)`. This may create a new node. If
27883// we are unable to compute `Log2(Op)` its return `SDValue()`.
27884//
27885// All nodes will be created at `DL` and the output will be of type `VT`.
27886//
27887// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27888// `AssumeNonZero` if this function should simply assume (not require proving
27889// `Op` is non-zero).
27891 SDValue Op, unsigned Depth,
27892 bool AssumeNonZero) {
27893 assert(VT.isInteger() && "Only integer types are supported!");
27894
27895 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27896 while (true) {
27897 switch (V.getOpcode()) {
27898 case ISD::TRUNCATE:
27899 case ISD::ZERO_EXTEND:
27900 V = V.getOperand(0);
27901 break;
27902 default:
27903 return V;
27904 }
27905 }
27906 };
27907
27908 if (VT.isScalableVector())
27909 return SDValue();
27910
27911 Op = PeekThroughCastsAndTrunc(Op);
27912
27913 // Helper for determining whether a value is a power-2 constant scalar or a
27914 // vector of such elements.
27915 SmallVector<APInt> Pow2Constants;
27916 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27917 if (C->isZero() || C->isOpaque())
27918 return false;
27919 // TODO: We may also be able to support negative powers of 2 here.
27920 if (C->getAPIntValue().isPowerOf2()) {
27921 Pow2Constants.emplace_back(C->getAPIntValue());
27922 return true;
27923 }
27924 return false;
27925 };
27926
27927 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27928 if (!VT.isVector())
27929 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27930 // We need to create a build vector
27931 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
27932 return DAG.getSplat(VT, DL,
27933 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
27934 VT.getScalarType()));
27935 SmallVector<SDValue> Log2Ops;
27936 for (const APInt &Pow2 : Pow2Constants)
27937 Log2Ops.emplace_back(
27938 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27939 return DAG.getBuildVector(VT, DL, Log2Ops);
27940 }
27941
27942 if (Depth >= DAG.MaxRecursionDepth)
27943 return SDValue();
27944
27945 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27946 ToCast = PeekThroughCastsAndTrunc(ToCast);
27947 EVT CurVT = ToCast.getValueType();
27948 if (NewVT == CurVT)
27949 return ToCast;
27950
27951 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27952 return DAG.getBitcast(NewVT, ToCast);
27953
27954 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27955 };
27956
27957 // log2(X << Y) -> log2(X) + Y
27958 if (Op.getOpcode() == ISD::SHL) {
27959 // 1 << Y and X nuw/nsw << Y are all non-zero.
27960 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27961 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27962 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27963 Depth + 1, AssumeNonZero))
27964 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27965 CastToVT(VT, Op.getOperand(1)));
27966 }
27967
27968 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27969 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27970 Op.hasOneUse()) {
27971 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27972 Depth + 1, AssumeNonZero))
27973 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27974 Depth + 1, AssumeNonZero))
27975 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27976 }
27977
27978 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27979 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27980 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27981 Op.hasOneUse()) {
27982 // Use AssumeNonZero as false here. Otherwise we can hit case where
27983 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27984 if (SDValue LogX =
27985 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27986 /*AssumeNonZero*/ false))
27987 if (SDValue LogY =
27988 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27989 /*AssumeNonZero*/ false))
27990 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27991 }
27992
27993 return SDValue();
27994}
27995
27996/// Determines the LogBase2 value for a non-null input value using the
27997/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27998SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27999 bool KnownNonZero, bool InexpensiveOnly,
28000 std::optional<EVT> OutVT) {
28001 EVT VT = OutVT ? *OutVT : V.getValueType();
28002 SDValue InexpensiveLogBase2 =
28003 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
28004 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
28005 return InexpensiveLogBase2;
28006
28007 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
28008 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28009 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
28010 return LogBase2;
28011}
28012
28013/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28014/// For the reciprocal, we need to find the zero of the function:
28015/// F(X) = 1/X - A [which has a zero at X = 1/A]
28016/// =>
28017/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28018/// does not require additional intermediate precision]
28019/// For the last iteration, put numerator N into it to gain more precision:
28020/// Result = N X_i + X_i (N - N A X_i)
28021SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28022 SDNodeFlags Flags) {
28023 if (LegalDAG)
28024 return SDValue();
28025
28026 // TODO: Handle extended types?
28027 EVT VT = Op.getValueType();
28028 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28029 VT.getScalarType() != MVT::f64)
28030 return SDValue();
28031
28032 // If estimates are explicitly disabled for this function, we're done.
28034 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28035 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28036 return SDValue();
28037
28038 // Estimates may be explicitly enabled for this type with a custom number of
28039 // refinement steps.
28040 int Iterations = TLI.getDivRefinementSteps(VT, MF);
28041 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28042 AddToWorklist(Est.getNode());
28043
28044 SDLoc DL(Op);
28045 if (Iterations) {
28046 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28047
28048 // Newton iterations: Est = Est + Est (N - Arg * Est)
28049 // If this is the last iteration, also multiply by the numerator.
28050 for (int i = 0; i < Iterations; ++i) {
28051 SDValue MulEst = Est;
28052
28053 if (i == Iterations - 1) {
28054 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28055 AddToWorklist(MulEst.getNode());
28056 }
28057
28058 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28059 AddToWorklist(NewEst.getNode());
28060
28061 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28062 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28063 AddToWorklist(NewEst.getNode());
28064
28065 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28066 AddToWorklist(NewEst.getNode());
28067
28068 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28069 AddToWorklist(Est.getNode());
28070 }
28071 } else {
28072 // If no iterations are available, multiply with N.
28073 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28074 AddToWorklist(Est.getNode());
28075 }
28076
28077 return Est;
28078 }
28079
28080 return SDValue();
28081}
28082
28083/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28084/// For the reciprocal sqrt, we need to find the zero of the function:
28085/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28086/// =>
28087/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28088/// As a result, we precompute A/2 prior to the iteration loop.
28089SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28090 unsigned Iterations,
28091 SDNodeFlags Flags, bool Reciprocal) {
28092 EVT VT = Arg.getValueType();
28093 SDLoc DL(Arg);
28094 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28095
28096 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28097 // this entire sequence requires only one FP constant.
28098 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28099 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28100
28101 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28102 for (unsigned i = 0; i < Iterations; ++i) {
28103 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28104 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28105 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28106 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28107 }
28108
28109 // If non-reciprocal square root is requested, multiply the result by Arg.
28110 if (!Reciprocal)
28111 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28112
28113 return Est;
28114}
28115
28116/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28117/// For the reciprocal sqrt, we need to find the zero of the function:
28118/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28119/// =>
28120/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28121SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28122 unsigned Iterations,
28123 SDNodeFlags Flags, bool Reciprocal) {
28124 EVT VT = Arg.getValueType();
28125 SDLoc DL(Arg);
28126 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28127 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28128
28129 // This routine must enter the loop below to work correctly
28130 // when (Reciprocal == false).
28131 assert(Iterations > 0);
28132
28133 // Newton iterations for reciprocal square root:
28134 // E = (E * -0.5) * ((A * E) * E + -3.0)
28135 for (unsigned i = 0; i < Iterations; ++i) {
28136 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28137 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28138 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28139
28140 // When calculating a square root at the last iteration build:
28141 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28142 // (notice a common subexpression)
28143 SDValue LHS;
28144 if (Reciprocal || (i + 1) < Iterations) {
28145 // RSQRT: LHS = (E * -0.5)
28146 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28147 } else {
28148 // SQRT: LHS = (A * E) * -0.5
28149 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28150 }
28151
28152 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28153 }
28154
28155 return Est;
28156}
28157
28158/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28159/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28160/// Op can be zero.
28161SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28162 bool Reciprocal) {
28163 if (LegalDAG)
28164 return SDValue();
28165
28166 // TODO: Handle extended types?
28167 EVT VT = Op.getValueType();
28168 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28169 VT.getScalarType() != MVT::f64)
28170 return SDValue();
28171
28172 // If estimates are explicitly disabled for this function, we're done.
28174 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28175 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28176 return SDValue();
28177
28178 // Estimates may be explicitly enabled for this type with a custom number of
28179 // refinement steps.
28180 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28181
28182 bool UseOneConstNR = false;
28183 if (SDValue Est =
28184 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28185 Reciprocal)) {
28186 AddToWorklist(Est.getNode());
28187
28188 if (Iterations > 0)
28189 Est = UseOneConstNR
28190 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28191 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28192 if (!Reciprocal) {
28193 SDLoc DL(Op);
28194 // Try the target specific test first.
28195 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28196
28197 // The estimate is now completely wrong if the input was exactly 0.0 or
28198 // possibly a denormal. Force the answer to 0.0 or value provided by
28199 // target for those cases.
28200 Est = DAG.getNode(
28201 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28202 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28203 }
28204 return Est;
28205 }
28206
28207 return SDValue();
28208}
28209
28210SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28211 return buildSqrtEstimateImpl(Op, Flags, true);
28212}
28213
28214SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28215 return buildSqrtEstimateImpl(Op, Flags, false);
28216}
28217
28218/// Return true if there is any possibility that the two addresses overlap.
28219bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28220
28221 struct MemUseCharacteristics {
28222 bool IsVolatile;
28223 bool IsAtomic;
28225 int64_t Offset;
28226 LocationSize NumBytes;
28227 MachineMemOperand *MMO;
28228 };
28229
28230 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28231 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28232 int64_t Offset = 0;
28233 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28234 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28235 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28236 ? -1 * C->getSExtValue()
28237 : 0;
28238 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28239 return {LSN->isVolatile(), LSN->isAtomic(),
28240 LSN->getBasePtr(), Offset /*base offset*/,
28241 LocationSize::precise(Size), LSN->getMemOperand()};
28242 }
28243 if (const auto *LN = cast<LifetimeSDNode>(N))
28244 return {false /*isVolatile*/,
28245 /*isAtomic*/ false,
28246 LN->getOperand(1),
28247 (LN->hasOffset()) ? LN->getOffset() : 0,
28248 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28250 (MachineMemOperand *)nullptr};
28251 // Default.
28252 return {false /*isvolatile*/,
28253 /*isAtomic*/ false,
28254 SDValue(),
28255 (int64_t)0 /*offset*/,
28257 (MachineMemOperand *)nullptr};
28258 };
28259
28260 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28261 MUC1 = getCharacteristics(Op1);
28262
28263 // If they are to the same address, then they must be aliases.
28264 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28265 MUC0.Offset == MUC1.Offset)
28266 return true;
28267
28268 // If they are both volatile then they cannot be reordered.
28269 if (MUC0.IsVolatile && MUC1.IsVolatile)
28270 return true;
28271
28272 // Be conservative about atomics for the moment
28273 // TODO: This is way overconservative for unordered atomics (see D66309)
28274 if (MUC0.IsAtomic && MUC1.IsAtomic)
28275 return true;
28276
28277 if (MUC0.MMO && MUC1.MMO) {
28278 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28279 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28280 return false;
28281 }
28282
28283 // If NumBytes is scalable and offset is not 0, conservatively return may
28284 // alias
28285 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28286 MUC0.Offset != 0) ||
28287 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28288 MUC1.Offset != 0))
28289 return true;
28290 // Try to prove that there is aliasing, or that there is no aliasing. Either
28291 // way, we can return now. If nothing can be proved, proceed with more tests.
28292 bool IsAlias;
28293 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28294 DAG, IsAlias))
28295 return IsAlias;
28296
28297 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28298 // either are not known.
28299 if (!MUC0.MMO || !MUC1.MMO)
28300 return true;
28301
28302 // If one operation reads from invariant memory, and the other may store, they
28303 // cannot alias. These should really be checking the equivalent of mayWrite,
28304 // but it only matters for memory nodes other than load /store.
28305 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28306 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28307 return false;
28308
28309 // If we know required SrcValue1 and SrcValue2 have relatively large
28310 // alignment compared to the size and offset of the access, we may be able
28311 // to prove they do not alias. This check is conservative for now to catch
28312 // cases created by splitting vector types, it only works when the offsets are
28313 // multiples of the size of the data.
28314 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28315 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28316 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28317 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28318 LocationSize Size0 = MUC0.NumBytes;
28319 LocationSize Size1 = MUC1.NumBytes;
28320
28321 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28322 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28323 !Size1.isScalable() && Size0 == Size1 &&
28324 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28325 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28326 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28327 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28328 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28329
28330 // There is no overlap between these relatively aligned accesses of
28331 // similar size. Return no alias.
28332 if ((OffAlign0 + static_cast<int64_t>(
28333 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28334 (OffAlign1 + static_cast<int64_t>(
28335 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28336 return false;
28337 }
28338
28339 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28341 : DAG.getSubtarget().useAA();
28342#ifndef NDEBUG
28343 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28345 UseAA = false;
28346#endif
28347
28348 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28349 Size0.hasValue() && Size1.hasValue() &&
28350 // Can't represent a scalable size + fixed offset in LocationSize
28351 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28352 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28353 // Use alias analysis information.
28354 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28355 int64_t Overlap0 =
28356 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28357 int64_t Overlap1 =
28358 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28359 LocationSize Loc0 =
28360 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28361 LocationSize Loc1 =
28362 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28363 if (AA->isNoAlias(
28364 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28365 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28366 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28367 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28368 return false;
28369 }
28370
28371 // Otherwise we have to assume they alias.
28372 return true;
28373}
28374
28375/// Walk up chain skipping non-aliasing memory nodes,
28376/// looking for aliasing nodes and adding them to the Aliases vector.
28377void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28378 SmallVectorImpl<SDValue> &Aliases) {
28379 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28380 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28381
28382 // Get alias information for node.
28383 // TODO: relax aliasing for unordered atomics (see D66309)
28384 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28385
28386 // Starting off.
28387 Chains.push_back(OriginalChain);
28388 unsigned Depth = 0;
28389
28390 // Attempt to improve chain by a single step
28391 auto ImproveChain = [&](SDValue &C) -> bool {
28392 switch (C.getOpcode()) {
28393 case ISD::EntryToken:
28394 // No need to mark EntryToken.
28395 C = SDValue();
28396 return true;
28397 case ISD::LOAD:
28398 case ISD::STORE: {
28399 // Get alias information for C.
28400 // TODO: Relax aliasing for unordered atomics (see D66309)
28401 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28402 cast<LSBaseSDNode>(C.getNode())->isSimple();
28403 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28404 // Look further up the chain.
28405 C = C.getOperand(0);
28406 return true;
28407 }
28408 // Alias, so stop here.
28409 return false;
28410 }
28411
28412 case ISD::CopyFromReg:
28413 // Always forward past CopyFromReg.
28414 C = C.getOperand(0);
28415 return true;
28416
28418 case ISD::LIFETIME_END: {
28419 // We can forward past any lifetime start/end that can be proven not to
28420 // alias the memory access.
28421 if (!mayAlias(N, C.getNode())) {
28422 // Look further up the chain.
28423 C = C.getOperand(0);
28424 return true;
28425 }
28426 return false;
28427 }
28428 default:
28429 return false;
28430 }
28431 };
28432
28433 // Look at each chain and determine if it is an alias. If so, add it to the
28434 // aliases list. If not, then continue up the chain looking for the next
28435 // candidate.
28436 while (!Chains.empty()) {
28437 SDValue Chain = Chains.pop_back_val();
28438
28439 // Don't bother if we've seen Chain before.
28440 if (!Visited.insert(Chain.getNode()).second)
28441 continue;
28442
28443 // For TokenFactor nodes, look at each operand and only continue up the
28444 // chain until we reach the depth limit.
28445 //
28446 // FIXME: The depth check could be made to return the last non-aliasing
28447 // chain we found before we hit a tokenfactor rather than the original
28448 // chain.
28449 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28450 Aliases.clear();
28451 Aliases.push_back(OriginalChain);
28452 return;
28453 }
28454
28455 if (Chain.getOpcode() == ISD::TokenFactor) {
28456 // We have to check each of the operands of the token factor for "small"
28457 // token factors, so we queue them up. Adding the operands to the queue
28458 // (stack) in reverse order maintains the original order and increases the
28459 // likelihood that getNode will find a matching token factor (CSE.)
28460 if (Chain.getNumOperands() > 16) {
28461 Aliases.push_back(Chain);
28462 continue;
28463 }
28464 for (unsigned n = Chain.getNumOperands(); n;)
28465 Chains.push_back(Chain.getOperand(--n));
28466 ++Depth;
28467 continue;
28468 }
28469 // Everything else
28470 if (ImproveChain(Chain)) {
28471 // Updated Chain Found, Consider new chain if one exists.
28472 if (Chain.getNode())
28473 Chains.push_back(Chain);
28474 ++Depth;
28475 continue;
28476 }
28477 // No Improved Chain Possible, treat as Alias.
28478 Aliases.push_back(Chain);
28479 }
28480}
28481
28482/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28483/// (aliasing node.)
28484SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28485 if (OptLevel == CodeGenOptLevel::None)
28486 return OldChain;
28487
28488 // Ops for replacing token factor.
28490
28491 // Accumulate all the aliases to this node.
28492 GatherAllAliases(N, OldChain, Aliases);
28493
28494 // If no operands then chain to entry token.
28495 if (Aliases.empty())
28496 return DAG.getEntryNode();
28497
28498 // If a single operand then chain to it. We don't need to revisit it.
28499 if (Aliases.size() == 1)
28500 return Aliases[0];
28501
28502 // Construct a custom tailored token factor.
28503 return DAG.getTokenFactor(SDLoc(N), Aliases);
28504}
28505
28506// This function tries to collect a bunch of potentially interesting
28507// nodes to improve the chains of, all at once. This might seem
28508// redundant, as this function gets called when visiting every store
28509// node, so why not let the work be done on each store as it's visited?
28510//
28511// I believe this is mainly important because mergeConsecutiveStores
28512// is unable to deal with merging stores of different sizes, so unless
28513// we improve the chains of all the potential candidates up-front
28514// before running mergeConsecutiveStores, it might only see some of
28515// the nodes that will eventually be candidates, and then not be able
28516// to go from a partially-merged state to the desired final
28517// fully-merged state.
28518
28519bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28520 SmallVector<StoreSDNode *, 8> ChainedStores;
28521 StoreSDNode *STChain = St;
28522 // Intervals records which offsets from BaseIndex have been covered. In
28523 // the common case, every store writes to the immediately previous address
28524 // space and thus merged with the previous interval at insertion time.
28525
28526 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28528 IMap::Allocator A;
28529 IMap Intervals(A);
28530
28531 // This holds the base pointer, index, and the offset in bytes from the base
28532 // pointer.
28534
28535 // We must have a base and an offset.
28536 if (!BasePtr.getBase().getNode())
28537 return false;
28538
28539 // Do not handle stores to undef base pointers.
28540 if (BasePtr.getBase().isUndef())
28541 return false;
28542
28543 // Do not handle stores to opaque types
28544 if (St->getMemoryVT().isZeroSized())
28545 return false;
28546
28547 // BaseIndexOffset assumes that offsets are fixed-size, which
28548 // is not valid for scalable vectors where the offsets are
28549 // scaled by `vscale`, so bail out early.
28550 if (St->getMemoryVT().isScalableVT())
28551 return false;
28552
28553 // Add ST's interval.
28554 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28555 std::monostate{});
28556
28557 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28558 if (Chain->getMemoryVT().isScalableVector())
28559 return false;
28560
28561 // If the chain has more than one use, then we can't reorder the mem ops.
28562 if (!SDValue(Chain, 0)->hasOneUse())
28563 break;
28564 // TODO: Relax for unordered atomics (see D66309)
28565 if (!Chain->isSimple() || Chain->isIndexed())
28566 break;
28567
28568 // Find the base pointer and offset for this memory node.
28569 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28570 // Check that the base pointer is the same as the original one.
28571 int64_t Offset;
28572 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28573 break;
28574 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28575 // Make sure we don't overlap with other intervals by checking the ones to
28576 // the left or right before inserting.
28577 auto I = Intervals.find(Offset);
28578 // If there's a next interval, we should end before it.
28579 if (I != Intervals.end() && I.start() < (Offset + Length))
28580 break;
28581 // If there's a previous interval, we should start after it.
28582 if (I != Intervals.begin() && (--I).stop() <= Offset)
28583 break;
28584 Intervals.insert(Offset, Offset + Length, std::monostate{});
28585
28586 ChainedStores.push_back(Chain);
28587 STChain = Chain;
28588 }
28589
28590 // If we didn't find a chained store, exit.
28591 if (ChainedStores.empty())
28592 return false;
28593
28594 // Improve all chained stores (St and ChainedStores members) starting from
28595 // where the store chain ended and return single TokenFactor.
28596 SDValue NewChain = STChain->getChain();
28598 for (unsigned I = ChainedStores.size(); I;) {
28599 StoreSDNode *S = ChainedStores[--I];
28600 SDValue BetterChain = FindBetterChain(S, NewChain);
28601 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28602 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28603 TFOps.push_back(SDValue(S, 0));
28604 ChainedStores[I] = S;
28605 }
28606
28607 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28608 SDValue BetterChain = FindBetterChain(St, NewChain);
28609 SDValue NewST;
28610 if (St->isTruncatingStore())
28611 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28612 St->getBasePtr(), St->getMemoryVT(),
28613 St->getMemOperand());
28614 else
28615 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28616 St->getBasePtr(), St->getMemOperand());
28617
28618 TFOps.push_back(NewST);
28619
28620 // If we improved every element of TFOps, then we've lost the dependence on
28621 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28622 // the beginning to keep relative order consistent with FindBetterChains.
28623 auto hasImprovedChain = [&](SDValue ST) -> bool {
28624 return ST->getOperand(0) != NewChain;
28625 };
28626 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28627 if (AddNewChain)
28628 TFOps.insert(TFOps.begin(), NewChain);
28629
28630 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28631 CombineTo(St, TF);
28632
28633 // Add TF and its operands to the worklist.
28634 AddToWorklist(TF.getNode());
28635 for (const SDValue &Op : TF->ops())
28636 AddToWorklist(Op.getNode());
28637 AddToWorklist(STChain);
28638 return true;
28639}
28640
28641bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28642 if (OptLevel == CodeGenOptLevel::None)
28643 return false;
28644
28646
28647 // We must have a base and an offset.
28648 if (!BasePtr.getBase().getNode())
28649 return false;
28650
28651 // Do not handle stores to undef base pointers.
28652 if (BasePtr.getBase().isUndef())
28653 return false;
28654
28655 // Directly improve a chain of disjoint stores starting at St.
28656 if (parallelizeChainedStores(St))
28657 return true;
28658
28659 // Improve St's Chain..
28660 SDValue BetterChain = FindBetterChain(St, St->getChain());
28661 if (St->getChain() != BetterChain) {
28662 replaceStoreChain(St, BetterChain);
28663 return true;
28664 }
28665 return false;
28666}
28667
28668/// This is the entry point for the file.
28670 CodeGenOptLevel OptLevel) {
28671 /// This is the main entry point to this class.
28672 DAGCombiner(*this, AA, OptLevel).Run(Level);
28673}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:531
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1119
bool isNegative() const
Definition: APFloat.h:1354
bool isNormal() const
Definition: APFloat.h:1358
bool isDenormal() const
Definition: APFloat.h:1355
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1337
const fltSemantics & getSemantics() const
Definition: APFloat.h:1362
bool isNaN() const
Definition: APFloat.h:1352
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:997
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
bool isLargest() const
Definition: APFloat.h:1370
bool isIEEE() const
Definition: APFloat.h:1372
bool isInfinity() const
Definition: APFloat.h:1351
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:429
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
APInt abs() const
Get the absolute value.
Definition: APInt.h:1753
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:238
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:446
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1229
int32_t exactLogBase2() const
Definition: APInt.h:1741
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
unsigned countLeadingZeros() const
Definition: APInt.h:1565
unsigned logBase2() const
Definition: APInt.h:1719
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:490
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:455
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:451
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:468
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1130
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:369
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:345
iterator end()
Definition: DenseMap.h:84
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:365
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:969
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:568
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:490
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:494
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:452
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:843
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:488
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:676
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:877
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:489
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:560
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:495
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:483
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:860
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
Definition: SelectionDAG.h:501
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:577
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:571
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:893
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:923
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:418
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2197
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2202
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2207
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2212
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1330
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1415
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1284
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:501
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:820
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1400
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1404
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:850
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1414
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:943
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1455
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1397
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1401
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:787
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:660
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1280
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1416
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1409
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1021
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ TargetConstantFP
Definition: ISDOpcodes.h:165
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:886
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1372
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1008
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ TargetFrameIndex
Definition: ISDOpcodes.h:172
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:839
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1347
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1342
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1234
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1417
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:952
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1027
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1050
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:267
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1398
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:981
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:668
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:861
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:885
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1405
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1113
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1183
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1055
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:320
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1544
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1654
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1629
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1649
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1470
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1540
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1540
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1611
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1623
@ VecLoad
Definition: NVPTX.h:88
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:893
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
TernaryOpc_match< T0_P, T1_P, T2_P, false, false > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4625
constexpr double e
Definition: MathExtras.h:47
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:353
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1546
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2062
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:516
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1528
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1496
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:331
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:327
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:323
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:266
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:337
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:269
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:258
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:79
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:311
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...