LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG.
175
176 /// Mapping from an SDNode to its position on the worklist.
177 ///
178 /// This is used to find and remove nodes from the worklist (by nulling
179 /// them) when they are deleted from the underlying DAG. It relies on
180 /// stable indices of nodes within the worklist.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Set of nodes which have been combined (at least once).
189 ///
190 /// This is used to allow us to reliably add any operands of a DAG node
191 /// which have not yet been combined to the worklist.
192 SmallPtrSet<SDNode *, 32> CombinedNodes;
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // AA - Used for DAG load/store alias analysis.
203 AliasAnalysis *AA;
204
205 /// When an instruction is simplified, add all users of the instruction to
206 /// the work lists because they might get more simplified now.
207 void AddUsersToWorklist(SDNode *N) {
208 for (SDNode *Node : N->uses())
209 AddToWorklist(Node);
210 }
211
212 /// Convenient shorthand to add a node and all of its user to the worklist.
213 void AddToWorklistWithUsers(SDNode *N) {
214 AddUsersToWorklist(N);
215 AddToWorklist(N);
216 }
217
218 // Prune potentially dangling nodes. This is called after
219 // any visit to a node, but should also be called during a visit after any
220 // failed combine which may have created a DAG node.
221 void clearAddedDanglingWorklistEntries() {
222 // Check any nodes added to the worklist to see if they are prunable.
223 while (!PruningList.empty()) {
224 auto *N = PruningList.pop_back_val();
225 if (N->use_empty())
226 recursivelyDeleteUnusedNodes(N);
227 }
228 }
229
230 SDNode *getNextWorklistEntry() {
231 // Before we do any work, remove nodes that are not in use.
232 clearAddedDanglingWorklistEntries();
233 SDNode *N = nullptr;
234 // The Worklist holds the SDNodes in order, but it may contain null
235 // entries.
236 while (!N && !Worklist.empty()) {
237 N = Worklist.pop_back_val();
238 }
239
240 if (N) {
241 bool GoodWorklistEntry = WorklistMap.erase(N);
242 (void)GoodWorklistEntry;
243 assert(GoodWorklistEntry &&
244 "Found a worklist entry without a corresponding map entry!");
245 }
246 return N;
247 }
248
249 /// Call the node-specific routine that folds each particular type of node.
250 SDValue visit(SDNode *N);
251
252 public:
253 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
254 : DAG(D), TLI(D.getTargetLoweringInfo()),
255 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
277 assert(N->getOpcode() != ISD::DELETED_NODE &&
278 "Deleted Node added to Worklist");
279
280 // Skip handle nodes as they can't usefully be combined and confuse the
281 // zero-use deletion strategy.
282 if (N->getOpcode() == ISD::HANDLENODE)
283 return;
284
285 if (IsCandidateForPruning)
286 ConsiderForPruning(N);
287
288 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
289 Worklist.push_back(N);
290 }
291
292 /// Remove all instances of N from the worklist.
293 void removeFromWorklist(SDNode *N) {
294 CombinedNodes.erase(N);
295 PruningList.remove(N);
296 StoreRootCountMap.erase(N);
297
298 auto It = WorklistMap.find(N);
299 if (It == WorklistMap.end())
300 return; // Not in the worklist.
301
302 // Null out the entry rather than erasing it to avoid a linear operation.
303 Worklist[It->second] = nullptr;
304 WorklistMap.erase(It);
305 }
306
307 void deleteAndRecombine(SDNode *N);
308 bool recursivelyDeleteUnusedNodes(SDNode *N);
309
310 /// Replaces all uses of the results of one DAG node with new values.
311 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
312 bool AddTo = true);
313
314 /// Replaces all uses of the results of one DAG node with new values.
315 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
316 return CombineTo(N, &Res, 1, AddTo);
317 }
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
321 bool AddTo = true) {
322 SDValue To[] = { Res0, Res1 };
323 return CombineTo(N, To, 2, AddTo);
324 }
325
326 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
327
328 private:
329 unsigned MaximumLegalStoreInBits;
330
331 /// Check the specified integer node value to see if it can be simplified or
332 /// if things it uses can be simplified by bit propagation.
333 /// If so, return true.
334 bool SimplifyDemandedBits(SDValue Op) {
335 unsigned BitWidth = Op.getScalarValueSizeInBits();
337 return SimplifyDemandedBits(Op, DemandedBits);
338 }
339
340 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
341 EVT VT = Op.getValueType();
342 APInt DemandedElts = VT.isFixedLengthVector()
344 : APInt(1, 1);
345 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
346 }
347
348 /// Check the specified vector node value to see if it can be simplified or
349 /// if things it uses can be simplified as it only uses some of the
350 /// elements. If so, return true.
351 bool SimplifyDemandedVectorElts(SDValue Op) {
352 // TODO: For now just pretend it cannot be simplified.
353 if (Op.getValueType().isScalableVector())
354 return false;
355
356 unsigned NumElts = Op.getValueType().getVectorNumElements();
357 APInt DemandedElts = APInt::getAllOnes(NumElts);
358 return SimplifyDemandedVectorElts(Op, DemandedElts);
359 }
360
361 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
362 const APInt &DemandedElts,
363 bool AssumeSingleUse = false);
364 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
365 bool AssumeSingleUse = false);
366
367 bool CombineToPreIndexedLoadStore(SDNode *N);
368 bool CombineToPostIndexedLoadStore(SDNode *N);
369 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
370 bool SliceUpLoad(SDNode *N);
371
372 // Looks up the chain to find a unique (unaliased) store feeding the passed
373 // load. If no such store is found, returns a nullptr.
374 // Note: This will look past a CALLSEQ_START if the load is chained to it so
375 // so that it can find stack stores for byval params.
376 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
377 // Scalars have size 0 to distinguish from singleton vectors.
378 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
379 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
380 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
381
382 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
383 /// load.
384 ///
385 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
386 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
387 /// \param EltNo index of the vector element to load.
388 /// \param OriginalLoad load that EVE came from to be replaced.
389 /// \returns EVE on success SDValue() on failure.
390 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
391 SDValue EltNo,
392 LoadSDNode *OriginalLoad);
393 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
394 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
395 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
396 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
397 SDValue PromoteIntBinOp(SDValue Op);
398 SDValue PromoteIntShiftOp(SDValue Op);
399 SDValue PromoteExtend(SDValue Op);
400 bool PromoteLoad(SDValue Op);
401
402 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
403 SDValue RHS, SDValue True, SDValue False,
405
406 /// Call the node-specific routine that knows how to fold each
407 /// particular type of node. If that doesn't do anything, try the
408 /// target-specific DAG combines.
409 SDValue combine(SDNode *N);
410
411 // Visitation implementation - Implement dag node combining for different
412 // node types. The semantics are as follows:
413 // Return Value:
414 // SDValue.getNode() == 0 - No change was made
415 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
416 // otherwise - N should be replaced by the returned Operand.
417 //
418 SDValue visitTokenFactor(SDNode *N);
419 SDValue visitMERGE_VALUES(SDNode *N);
420 SDValue visitADD(SDNode *N);
421 SDValue visitADDLike(SDNode *N);
422 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
423 SDValue visitSUB(SDNode *N);
424 SDValue visitADDSAT(SDNode *N);
425 SDValue visitSUBSAT(SDNode *N);
426 SDValue visitADDC(SDNode *N);
427 SDValue visitADDO(SDNode *N);
428 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
429 SDValue visitSUBC(SDNode *N);
430 SDValue visitSUBO(SDNode *N);
431 SDValue visitADDE(SDNode *N);
432 SDValue visitUADDO_CARRY(SDNode *N);
433 SDValue visitSADDO_CARRY(SDNode *N);
434 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
435 SDNode *N);
436 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
437 SDNode *N);
438 SDValue visitSUBE(SDNode *N);
439 SDValue visitUSUBO_CARRY(SDNode *N);
440 SDValue visitSSUBO_CARRY(SDNode *N);
441 SDValue visitMUL(SDNode *N);
442 SDValue visitMULFIX(SDNode *N);
443 SDValue useDivRem(SDNode *N);
444 SDValue visitSDIV(SDNode *N);
445 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
446 SDValue visitUDIV(SDNode *N);
447 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
448 SDValue visitREM(SDNode *N);
449 SDValue visitMULHU(SDNode *N);
450 SDValue visitMULHS(SDNode *N);
451 SDValue visitAVG(SDNode *N);
452 SDValue visitABD(SDNode *N);
453 SDValue visitSMUL_LOHI(SDNode *N);
454 SDValue visitUMUL_LOHI(SDNode *N);
455 SDValue visitMULO(SDNode *N);
456 SDValue visitIMINMAX(SDNode *N);
457 SDValue visitAND(SDNode *N);
458 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
459 SDValue visitOR(SDNode *N);
460 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
461 SDValue visitXOR(SDNode *N);
462 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
463 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
464 SDValue visitSHL(SDNode *N);
465 SDValue visitSRA(SDNode *N);
466 SDValue visitSRL(SDNode *N);
467 SDValue visitFunnelShift(SDNode *N);
468 SDValue visitSHLSAT(SDNode *N);
469 SDValue visitRotate(SDNode *N);
470 SDValue visitABS(SDNode *N);
471 SDValue visitBSWAP(SDNode *N);
472 SDValue visitBITREVERSE(SDNode *N);
473 SDValue visitCTLZ(SDNode *N);
474 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
475 SDValue visitCTTZ(SDNode *N);
476 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
477 SDValue visitCTPOP(SDNode *N);
478 SDValue visitSELECT(SDNode *N);
479 SDValue visitVSELECT(SDNode *N);
480 SDValue visitVP_SELECT(SDNode *N);
481 SDValue visitSELECT_CC(SDNode *N);
482 SDValue visitSETCC(SDNode *N);
483 SDValue visitSETCCCARRY(SDNode *N);
484 SDValue visitSIGN_EXTEND(SDNode *N);
485 SDValue visitZERO_EXTEND(SDNode *N);
486 SDValue visitANY_EXTEND(SDNode *N);
487 SDValue visitAssertExt(SDNode *N);
488 SDValue visitAssertAlign(SDNode *N);
489 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491 SDValue visitTRUNCATE(SDNode *N);
492 SDValue visitBITCAST(SDNode *N);
493 SDValue visitFREEZE(SDNode *N);
494 SDValue visitBUILD_PAIR(SDNode *N);
495 SDValue visitFADD(SDNode *N);
496 SDValue visitVP_FADD(SDNode *N);
497 SDValue visitVP_FSUB(SDNode *N);
498 SDValue visitSTRICT_FADD(SDNode *N);
499 SDValue visitFSUB(SDNode *N);
500 SDValue visitFMUL(SDNode *N);
501 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502 SDValue visitFMAD(SDNode *N);
503 SDValue visitFDIV(SDNode *N);
504 SDValue visitFREM(SDNode *N);
505 SDValue visitFSQRT(SDNode *N);
506 SDValue visitFCOPYSIGN(SDNode *N);
507 SDValue visitFPOW(SDNode *N);
508 SDValue visitSINT_TO_FP(SDNode *N);
509 SDValue visitUINT_TO_FP(SDNode *N);
510 SDValue visitFP_TO_SINT(SDNode *N);
511 SDValue visitFP_TO_UINT(SDNode *N);
512 SDValue visitXRINT(SDNode *N);
513 SDValue visitFP_ROUND(SDNode *N);
514 SDValue visitFP_EXTEND(SDNode *N);
515 SDValue visitFNEG(SDNode *N);
516 SDValue visitFABS(SDNode *N);
517 SDValue visitFCEIL(SDNode *N);
518 SDValue visitFTRUNC(SDNode *N);
519 SDValue visitFFREXP(SDNode *N);
520 SDValue visitFFLOOR(SDNode *N);
521 SDValue visitFMinMax(SDNode *N);
522 SDValue visitBRCOND(SDNode *N);
523 SDValue visitBR_CC(SDNode *N);
524 SDValue visitLOAD(SDNode *N);
525
526 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529
530 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531
532 SDValue visitSTORE(SDNode *N);
533 SDValue visitLIFETIME_END(SDNode *N);
534 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
535 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
536 SDValue visitBUILD_VECTOR(SDNode *N);
537 SDValue visitCONCAT_VECTORS(SDNode *N);
538 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
539 SDValue visitVECTOR_SHUFFLE(SDNode *N);
540 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
541 SDValue visitINSERT_SUBVECTOR(SDNode *N);
542 SDValue visitMLOAD(SDNode *N);
543 SDValue visitMSTORE(SDNode *N);
544 SDValue visitMGATHER(SDNode *N);
545 SDValue visitMSCATTER(SDNode *N);
546 SDValue visitVPGATHER(SDNode *N);
547 SDValue visitVPSCATTER(SDNode *N);
548 SDValue visitVP_STRIDED_LOAD(SDNode *N);
549 SDValue visitVP_STRIDED_STORE(SDNode *N);
550 SDValue visitFP_TO_FP16(SDNode *N);
551 SDValue visitFP16_TO_FP(SDNode *N);
552 SDValue visitFP_TO_BF16(SDNode *N);
553 SDValue visitBF16_TO_FP(SDNode *N);
554 SDValue visitVECREDUCE(SDNode *N);
555 SDValue visitVPOp(SDNode *N);
556 SDValue visitGET_FPENV_MEM(SDNode *N);
557 SDValue visitSET_FPENV_MEM(SDNode *N);
558
559 template <class MatchContextClass>
560 SDValue visitFADDForFMACombine(SDNode *N);
561 template <class MatchContextClass>
562 SDValue visitFSUBForFMACombine(SDNode *N);
563 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
564
565 SDValue XformToShuffleWithZero(SDNode *N);
566 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
567 const SDLoc &DL,
568 SDNode *N,
569 SDValue N0,
570 SDValue N1);
571 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
572 SDValue N1, SDNodeFlags Flags);
573 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
574 SDValue N1, SDNodeFlags Flags);
575 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
576 EVT VT, SDValue N0, SDValue N1,
577 SDNodeFlags Flags = SDNodeFlags());
578
579 SDValue visitShiftByConstant(SDNode *N);
580
581 SDValue foldSelectOfConstants(SDNode *N);
582 SDValue foldVSelectOfConstants(SDNode *N);
583 SDValue foldBinOpIntoSelect(SDNode *BO);
584 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
585 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
586 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
587 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
589 bool NotExtCompare = false);
590 SDValue convertSelectOfFPConstantsToLoadOffset(
591 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
593 SDValue foldSignChangeInBitcast(SDNode *N);
594 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
596 SDValue foldSelectOfBinops(SDNode *N);
597 SDValue foldSextSetcc(SDNode *N);
598 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
599 const SDLoc &DL);
600 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
601 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
602 SDValue unfoldMaskedMerge(SDNode *N);
603 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
604 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
605 const SDLoc &DL, bool foldBooleans);
606 SDValue rebuildSetCC(SDValue N);
607
608 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
609 SDValue &CC, bool MatchStrict = false) const;
610 bool isOneUseSetCC(SDValue N) const;
611
612 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
613 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
614
615 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
616 unsigned HiOp);
617 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
618 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
619 const TargetLowering &TLI);
620
621 SDValue CombineExtLoad(SDNode *N);
622 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
623 SDValue combineRepeatedFPDivisors(SDNode *N);
624 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
625 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
626 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
628 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
629 SDValue BuildSDIV(SDNode *N);
630 SDValue BuildSDIVPow2(SDNode *N);
631 SDValue BuildUDIV(SDNode *N);
632 SDValue BuildSREMPow2(SDNode *N);
633 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
634 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
635 bool KnownNeverZero = false,
636 bool InexpensiveOnly = false,
637 std::optional<EVT> OutVT = std::nullopt);
638 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
639 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
640 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
642 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
643 SDNodeFlags Flags, bool Reciprocal);
644 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
645 SDNodeFlags Flags, bool Reciprocal);
646 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
647 bool DemandHighBits = true);
648 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
649 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
650 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
651 unsigned PosOpcode, unsigned NegOpcode,
652 const SDLoc &DL);
653 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
654 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
655 unsigned PosOpcode, unsigned NegOpcode,
656 const SDLoc &DL);
657 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
658 SDValue MatchLoadCombine(SDNode *N);
659 SDValue mergeTruncStores(StoreSDNode *N);
660 SDValue reduceLoadWidth(SDNode *N);
661 SDValue ReduceLoadOpStoreWidth(SDNode *N);
663 SDValue TransformFPLoadStorePair(SDNode *N);
664 SDValue convertBuildVecZextToZext(SDNode *N);
665 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
666 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
667 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
668 SDValue reduceBuildVecToShuffle(SDNode *N);
669 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
670 ArrayRef<int> VectorMask, SDValue VecIn1,
671 SDValue VecIn2, unsigned LeftIdx,
672 bool DidSplitVec);
673 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
674
675 /// Walk up chain skipping non-aliasing memory nodes,
676 /// looking for aliasing nodes and adding them to the Aliases vector.
677 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
678 SmallVectorImpl<SDValue> &Aliases);
679
680 /// Return true if there is any possibility that the two addresses overlap.
681 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
682
683 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
684 /// chain (aliasing node.)
685 SDValue FindBetterChain(SDNode *N, SDValue Chain);
686
687 /// Try to replace a store and any possibly adjacent stores on
688 /// consecutive chains with better chains. Return true only if St is
689 /// replaced.
690 ///
691 /// Notice that other chains may still be replaced even if the function
692 /// returns false.
693 bool findBetterNeighborChains(StoreSDNode *St);
694
695 // Helper for findBetterNeighborChains. Walk up store chain add additional
696 // chained stores that do not overlap and can be parallelized.
697 bool parallelizeChainedStores(StoreSDNode *St);
698
699 /// Holds a pointer to an LSBaseSDNode as well as information on where it
700 /// is located in a sequence of memory operations connected by a chain.
701 struct MemOpLink {
702 // Ptr to the mem node.
703 LSBaseSDNode *MemNode;
704
705 // Offset from the base ptr.
706 int64_t OffsetFromBase;
707
708 MemOpLink(LSBaseSDNode *N, int64_t Offset)
709 : MemNode(N), OffsetFromBase(Offset) {}
710 };
711
712 // Classify the origin of a stored value.
713 enum class StoreSource { Unknown, Constant, Extract, Load };
714 StoreSource getStoreSource(SDValue StoreVal) {
715 switch (StoreVal.getOpcode()) {
716 case ISD::Constant:
717 case ISD::ConstantFP:
718 return StoreSource::Constant;
722 return StoreSource::Constant;
723 return StoreSource::Unknown;
726 return StoreSource::Extract;
727 case ISD::LOAD:
728 return StoreSource::Load;
729 default:
730 return StoreSource::Unknown;
731 }
732 }
733
734 /// This is a helper function for visitMUL to check the profitability
735 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
736 /// MulNode is the original multiply, AddNode is (add x, c1),
737 /// and ConstNode is c2.
738 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
739 SDValue ConstNode);
740
741 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
742 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
743 /// the type of the loaded value to be extended.
744 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
745 EVT LoadResultTy, EVT &ExtVT);
746
747 /// Helper function to calculate whether the given Load/Store can have its
748 /// width reduced to ExtVT.
749 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
750 EVT &MemVT, unsigned ShAmt = 0);
751
752 /// Used by BackwardsPropagateMask to find suitable loads.
753 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
754 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
755 ConstantSDNode *Mask, SDNode *&NodeToMask);
756 /// Attempt to propagate a given AND node back to load leaves so that they
757 /// can be combined into narrow loads.
758 bool BackwardsPropagateMask(SDNode *N);
759
760 /// Helper function for mergeConsecutiveStores which merges the component
761 /// store chains.
762 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
763 unsigned NumStores);
764
765 /// Helper function for mergeConsecutiveStores which checks if all the store
766 /// nodes have the same underlying object. We can still reuse the first
767 /// store's pointer info if all the stores are from the same object.
768 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
769
770 /// This is a helper function for mergeConsecutiveStores. When the source
771 /// elements of the consecutive stores are all constants or all extracted
772 /// vector elements, try to merge them into one larger store introducing
773 /// bitcasts if necessary. \return True if a merged store was created.
774 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
775 EVT MemVT, unsigned NumStores,
776 bool IsConstantSrc, bool UseVector,
777 bool UseTrunc);
778
779 /// This is a helper function for mergeConsecutiveStores. Stores that
780 /// potentially may be merged with St are placed in StoreNodes. RootNode is
781 /// a chain predecessor to all store candidates.
782 void getStoreMergeCandidates(StoreSDNode *St,
783 SmallVectorImpl<MemOpLink> &StoreNodes,
784 SDNode *&Root);
785
786 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
787 /// have indirect dependency through their operands. RootNode is the
788 /// predecessor to all stores calculated by getStoreMergeCandidates and is
789 /// used to prune the dependency check. \return True if safe to merge.
790 bool checkMergeStoreCandidatesForDependencies(
791 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
792 SDNode *RootNode);
793
794 /// This is a helper function for mergeConsecutiveStores. Given a list of
795 /// store candidates, find the first N that are consecutive in memory.
796 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
797 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
798 int64_t ElementSizeBytes) const;
799
800 /// This is a helper function for mergeConsecutiveStores. It is used for
801 /// store chains that are composed entirely of constant values.
802 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
803 unsigned NumConsecutiveStores,
804 EVT MemVT, SDNode *Root, bool AllowVectors);
805
806 /// This is a helper function for mergeConsecutiveStores. It is used for
807 /// store chains that are composed entirely of extracted vector elements.
808 /// When extracting multiple vector elements, try to store them in one
809 /// vector store rather than a sequence of scalar stores.
810 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
811 unsigned NumConsecutiveStores, EVT MemVT,
812 SDNode *Root);
813
814 /// This is a helper function for mergeConsecutiveStores. It is used for
815 /// store chains that are composed entirely of loaded values.
816 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
817 unsigned NumConsecutiveStores, EVT MemVT,
818 SDNode *Root, bool AllowVectors,
819 bool IsNonTemporalStore, bool IsNonTemporalLoad);
820
821 /// Merge consecutive store operations into a wide store.
822 /// This optimization uses wide integers or vectors when possible.
823 /// \return true if stores were merged.
824 bool mergeConsecutiveStores(StoreSDNode *St);
825
826 /// Try to transform a truncation where C is a constant:
827 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
828 ///
829 /// \p N needs to be a truncation and its first operand an AND. Other
830 /// requirements are checked by the function (e.g. that trunc is
831 /// single-use) and if missed an empty SDValue is returned.
832 SDValue distributeTruncateThroughAnd(SDNode *N);
833
834 /// Helper function to determine whether the target supports operation
835 /// given by \p Opcode for type \p VT, that is, whether the operation
836 /// is legal or custom before legalizing operations, and whether is
837 /// legal (but not custom) after legalization.
838 bool hasOperation(unsigned Opcode, EVT VT) {
839 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
840 }
841
842 public:
843 /// Runs the dag combiner on all nodes in the work list
844 void Run(CombineLevel AtLevel);
845
846 SelectionDAG &getDAG() const { return DAG; }
847
848 /// Returns a type large enough to hold any valid shift amount - before type
849 /// legalization these can be huge.
850 EVT getShiftAmountTy(EVT LHSTy) {
851 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
852 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
853 }
854
855 /// This method returns true if we are running before type legalization or
856 /// if the specified VT is legal.
857 bool isTypeLegal(const EVT &VT) {
858 if (!LegalTypes) return true;
859 return TLI.isTypeLegal(VT);
860 }
861
862 /// Convenience wrapper around TargetLowering::getSetCCResultType
863 EVT getSetCCResultType(EVT VT) const {
864 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
865 }
866
867 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
868 SDValue OrigLoad, SDValue ExtLoad,
869 ISD::NodeType ExtType);
870 };
871
872/// This class is a DAGUpdateListener that removes any deleted
873/// nodes from the worklist.
874class WorklistRemover : public SelectionDAG::DAGUpdateListener {
875 DAGCombiner &DC;
876
877public:
878 explicit WorklistRemover(DAGCombiner &dc)
879 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
880
881 void NodeDeleted(SDNode *N, SDNode *E) override {
882 DC.removeFromWorklist(N);
883 }
884};
885
886class WorklistInserter : public SelectionDAG::DAGUpdateListener {
887 DAGCombiner &DC;
888
889public:
890 explicit WorklistInserter(DAGCombiner &dc)
891 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
892
893 // FIXME: Ideally we could add N to the worklist, but this causes exponential
894 // compile time costs in large DAGs, e.g. Halide.
895 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
896};
897
898} // end anonymous namespace
899
900//===----------------------------------------------------------------------===//
901// TargetLowering::DAGCombinerInfo implementation
902//===----------------------------------------------------------------------===//
903
905 ((DAGCombiner*)DC)->AddToWorklist(N);
906}
907
909CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
910 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
911}
912
914CombineTo(SDNode *N, SDValue Res, bool AddTo) {
915 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
916}
917
919CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
920 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
921}
922
925 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
926}
927
930 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
931}
932
933//===----------------------------------------------------------------------===//
934// Helper Functions
935//===----------------------------------------------------------------------===//
936
937void DAGCombiner::deleteAndRecombine(SDNode *N) {
938 removeFromWorklist(N);
939
940 // If the operands of this node are only used by the node, they will now be
941 // dead. Make sure to re-visit them and recursively delete dead nodes.
942 for (const SDValue &Op : N->ops())
943 // For an operand generating multiple values, one of the values may
944 // become dead allowing further simplification (e.g. split index
945 // arithmetic from an indexed load).
946 if (Op->hasOneUse() || Op->getNumValues() > 1)
947 AddToWorklist(Op.getNode());
948
949 DAG.DeleteNode(N);
950}
951
952// APInts must be the same size for most operations, this helper
953// function zero extends the shorter of the pair so that they match.
954// We provide an Offset so that we can create bitwidths that won't overflow.
955static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
956 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
957 LHS = LHS.zext(Bits);
958 RHS = RHS.zext(Bits);
959}
960
961// Return true if this node is a setcc, or is a select_cc
962// that selects between the target values used for true and false, making it
963// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
964// the appropriate nodes based on the type of node we are checking. This
965// simplifies life a bit for the callers.
966bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
967 SDValue &CC, bool MatchStrict) const {
968 if (N.getOpcode() == ISD::SETCC) {
969 LHS = N.getOperand(0);
970 RHS = N.getOperand(1);
971 CC = N.getOperand(2);
972 return true;
973 }
974
975 if (MatchStrict &&
976 (N.getOpcode() == ISD::STRICT_FSETCC ||
977 N.getOpcode() == ISD::STRICT_FSETCCS)) {
978 LHS = N.getOperand(1);
979 RHS = N.getOperand(2);
980 CC = N.getOperand(3);
981 return true;
982 }
983
984 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
985 !TLI.isConstFalseVal(N.getOperand(3)))
986 return false;
987
988 if (TLI.getBooleanContents(N.getValueType()) ==
990 return false;
991
992 LHS = N.getOperand(0);
993 RHS = N.getOperand(1);
994 CC = N.getOperand(4);
995 return true;
996}
997
998/// Return true if this is a SetCC-equivalent operation with only one use.
999/// If this is true, it allows the users to invert the operation for free when
1000/// it is profitable to do so.
1001bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1002 SDValue N0, N1, N2;
1003 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1004 return true;
1005 return false;
1006}
1007
1009 if (!ScalarTy.isSimple())
1010 return false;
1011
1012 uint64_t MaskForTy = 0ULL;
1013 switch (ScalarTy.getSimpleVT().SimpleTy) {
1014 case MVT::i8:
1015 MaskForTy = 0xFFULL;
1016 break;
1017 case MVT::i16:
1018 MaskForTy = 0xFFFFULL;
1019 break;
1020 case MVT::i32:
1021 MaskForTy = 0xFFFFFFFFULL;
1022 break;
1023 default:
1024 return false;
1025 break;
1026 }
1027
1028 APInt Val;
1029 if (ISD::isConstantSplatVector(N, Val))
1030 return Val.getLimitedValue() == MaskForTy;
1031
1032 return false;
1033}
1034
1035// Determines if it is a constant integer or a splat/build vector of constant
1036// integers (and undefs).
1037// Do not permit build vector implicit truncation.
1038static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1039 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1040 return !(Const->isOpaque() && NoOpaques);
1041 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1042 return false;
1043 unsigned BitWidth = N.getScalarValueSizeInBits();
1044 for (const SDValue &Op : N->op_values()) {
1045 if (Op.isUndef())
1046 continue;
1047 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1048 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1049 (Const->isOpaque() && NoOpaques))
1050 return false;
1051 }
1052 return true;
1053}
1054
1055// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1056// undef's.
1057static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1058 if (V.getOpcode() != ISD::BUILD_VECTOR)
1059 return false;
1060 return isConstantOrConstantVector(V, NoOpaques) ||
1062}
1063
1064// Determine if this an indexed load with an opaque target constant index.
1065static bool canSplitIdx(LoadSDNode *LD) {
1066 return MaySplitLoadIndex &&
1067 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1068 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1069}
1070
1071bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1072 const SDLoc &DL,
1073 SDNode *N,
1074 SDValue N0,
1075 SDValue N1) {
1076 // Currently this only tries to ensure we don't undo the GEP splits done by
1077 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1078 // we check if the following transformation would be problematic:
1079 // (load/store (add, (add, x, offset1), offset2)) ->
1080 // (load/store (add, x, offset1+offset2)).
1081
1082 // (load/store (add, (add, x, y), offset2)) ->
1083 // (load/store (add, (add, x, offset2), y)).
1084
1085 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1086 return false;
1087
1088 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1089 if (!C2)
1090 return false;
1091
1092 const APInt &C2APIntVal = C2->getAPIntValue();
1093 if (C2APIntVal.getSignificantBits() > 64)
1094 return false;
1095
1096 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1097 if (N0.hasOneUse())
1098 return false;
1099
1100 const APInt &C1APIntVal = C1->getAPIntValue();
1101 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1102 if (CombinedValueIntVal.getSignificantBits() > 64)
1103 return false;
1104 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1105
1106 for (SDNode *Node : N->uses()) {
1107 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1108 // Is x[offset2] already not a legal addressing mode? If so then
1109 // reassociating the constants breaks nothing (we test offset2 because
1110 // that's the one we hope to fold into the load or store).
1112 AM.HasBaseReg = true;
1113 AM.BaseOffs = C2APIntVal.getSExtValue();
1114 EVT VT = LoadStore->getMemoryVT();
1115 unsigned AS = LoadStore->getAddressSpace();
1116 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1117 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1118 continue;
1119
1120 // Would x[offset1+offset2] still be a legal addressing mode?
1121 AM.BaseOffs = CombinedValue;
1122 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1123 return true;
1124 }
1125 }
1126 } else {
1127 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1128 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1129 return false;
1130
1131 for (SDNode *Node : N->uses()) {
1132 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1133 if (!LoadStore)
1134 return false;
1135
1136 // Is x[offset2] a legal addressing mode? If so then
1137 // reassociating the constants breaks address pattern
1139 AM.HasBaseReg = true;
1140 AM.BaseOffs = C2APIntVal.getSExtValue();
1141 EVT VT = LoadStore->getMemoryVT();
1142 unsigned AS = LoadStore->getAddressSpace();
1143 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1144 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1145 return false;
1146 }
1147 return true;
1148 }
1149
1150 return false;
1151}
1152
1153/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1154/// \p N0 is the same kind of operation as \p Opc.
1155SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1156 SDValue N0, SDValue N1,
1157 SDNodeFlags Flags) {
1158 EVT VT = N0.getValueType();
1159
1160 if (N0.getOpcode() != Opc)
1161 return SDValue();
1162
1163 SDValue N00 = N0.getOperand(0);
1164 SDValue N01 = N0.getOperand(1);
1165
1168 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1169 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1170 return DAG.getNode(Opc, DL, VT, N00, OpNode);
1171 return SDValue();
1172 }
1173 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1174 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1175 // iff (op x, c1) has one use
1176 SDNodeFlags NewFlags;
1177 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1178 Flags.hasNoUnsignedWrap())
1179 NewFlags.setNoUnsignedWrap(true);
1180 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1181 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1182 }
1183 }
1184
1185 // Check for repeated operand logic simplifications.
1186 if (Opc == ISD::AND || Opc == ISD::OR) {
1187 // (N00 & N01) & N00 --> N00 & N01
1188 // (N00 & N01) & N01 --> N00 & N01
1189 // (N00 | N01) | N00 --> N00 | N01
1190 // (N00 | N01) | N01 --> N00 | N01
1191 if (N1 == N00 || N1 == N01)
1192 return N0;
1193 }
1194 if (Opc == ISD::XOR) {
1195 // (N00 ^ N01) ^ N00 --> N01
1196 if (N1 == N00)
1197 return N01;
1198 // (N00 ^ N01) ^ N01 --> N00
1199 if (N1 == N01)
1200 return N00;
1201 }
1202
1203 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1204 if (N1 != N01) {
1205 // Reassociate if (op N00, N1) already exist
1206 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1207 // if Op (Op N00, N1), N01 already exist
1208 // we need to stop reassciate to avoid dead loop
1209 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1210 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1211 }
1212 }
1213
1214 if (N1 != N00) {
1215 // Reassociate if (op N01, N1) already exist
1216 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1217 // if Op (Op N01, N1), N00 already exist
1218 // we need to stop reassciate to avoid dead loop
1219 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1220 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1221 }
1222 }
1223
1224 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1225 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1226 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1227 // comparisons with the same predicate. This enables optimizations as the
1228 // following one:
1229 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1230 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1231 if (Opc == ISD::AND || Opc == ISD::OR) {
1232 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1233 N01->getOpcode() == ISD::SETCC) {
1234 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1235 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1236 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1237 if (CC1 == CC00 && CC1 != CC01) {
1238 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1239 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1240 }
1241 if (CC1 == CC01 && CC1 != CC00) {
1242 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1243 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1244 }
1245 }
1246 }
1247 }
1248
1249 return SDValue();
1250}
1251
1252/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1253/// same kind of operation as \p Opc.
1254SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1255 SDValue N1, SDNodeFlags Flags) {
1256 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1257
1258 // Floating-point reassociation is not allowed without loose FP math.
1259 if (N0.getValueType().isFloatingPoint() ||
1261 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1262 return SDValue();
1263
1264 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1265 return Combined;
1266 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1267 return Combined;
1268 return SDValue();
1269}
1270
1271// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1272// Note that we only expect Flags to be passed from FP operations. For integer
1273// operations they need to be dropped.
1274SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1275 const SDLoc &DL, EVT VT, SDValue N0,
1276 SDValue N1, SDNodeFlags Flags) {
1277 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1278 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1279 N0->hasOneUse() && N1->hasOneUse() &&
1281 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1282 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1283 return DAG.getNode(RedOpc, DL, VT,
1284 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1285 N0.getOperand(0), N1.getOperand(0)));
1286 }
1287 return SDValue();
1288}
1289
1290SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1291 bool AddTo) {
1292 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1293 ++NodesCombined;
1294 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1295 To[0].dump(&DAG);
1296 dbgs() << " and " << NumTo - 1 << " other values\n");
1297 for (unsigned i = 0, e = NumTo; i != e; ++i)
1298 assert((!To[i].getNode() ||
1299 N->getValueType(i) == To[i].getValueType()) &&
1300 "Cannot combine value to value of different type!");
1301
1302 WorklistRemover DeadNodes(*this);
1303 DAG.ReplaceAllUsesWith(N, To);
1304 if (AddTo) {
1305 // Push the new nodes and any users onto the worklist
1306 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1307 if (To[i].getNode())
1308 AddToWorklistWithUsers(To[i].getNode());
1309 }
1310 }
1311
1312 // Finally, if the node is now dead, remove it from the graph. The node
1313 // may not be dead if the replacement process recursively simplified to
1314 // something else needing this node.
1315 if (N->use_empty())
1316 deleteAndRecombine(N);
1317 return SDValue(N, 0);
1318}
1319
1320void DAGCombiner::
1321CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1322 // Replace the old value with the new one.
1323 ++NodesCombined;
1324 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1325 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1326
1327 // Replace all uses.
1328 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1329
1330 // Push the new node and any (possibly new) users onto the worklist.
1331 AddToWorklistWithUsers(TLO.New.getNode());
1332
1333 // Finally, if the node is now dead, remove it from the graph.
1334 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1335}
1336
1337/// Check the specified integer node value to see if it can be simplified or if
1338/// things it uses can be simplified by bit propagation. If so, return true.
1339bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1340 const APInt &DemandedElts,
1341 bool AssumeSingleUse) {
1342 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1343 KnownBits Known;
1344 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1345 AssumeSingleUse))
1346 return false;
1347
1348 // Revisit the node.
1349 AddToWorklist(Op.getNode());
1350
1351 CommitTargetLoweringOpt(TLO);
1352 return true;
1353}
1354
1355/// Check the specified vector node value to see if it can be simplified or
1356/// if things it uses can be simplified as it only uses some of the elements.
1357/// If so, return true.
1358bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1359 const APInt &DemandedElts,
1360 bool AssumeSingleUse) {
1361 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1362 APInt KnownUndef, KnownZero;
1363 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1364 TLO, 0, AssumeSingleUse))
1365 return false;
1366
1367 // Revisit the node.
1368 AddToWorklist(Op.getNode());
1369
1370 CommitTargetLoweringOpt(TLO);
1371 return true;
1372}
1373
1374void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1375 SDLoc DL(Load);
1376 EVT VT = Load->getValueType(0);
1377 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1378
1379 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1380 Trunc.dump(&DAG); dbgs() << '\n');
1381
1382 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1383 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1384
1385 AddToWorklist(Trunc.getNode());
1386 recursivelyDeleteUnusedNodes(Load);
1387}
1388
1389SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1390 Replace = false;
1391 SDLoc DL(Op);
1392 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1393 LoadSDNode *LD = cast<LoadSDNode>(Op);
1394 EVT MemVT = LD->getMemoryVT();
1396 : LD->getExtensionType();
1397 Replace = true;
1398 return DAG.getExtLoad(ExtType, DL, PVT,
1399 LD->getChain(), LD->getBasePtr(),
1400 MemVT, LD->getMemOperand());
1401 }
1402
1403 unsigned Opc = Op.getOpcode();
1404 switch (Opc) {
1405 default: break;
1406 case ISD::AssertSext:
1407 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1408 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1409 break;
1410 case ISD::AssertZext:
1411 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1412 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1413 break;
1414 case ISD::Constant: {
1415 unsigned ExtOpc =
1416 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1417 return DAG.getNode(ExtOpc, DL, PVT, Op);
1418 }
1419 }
1420
1421 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1422 return SDValue();
1423 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1424}
1425
1426SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1428 return SDValue();
1429 EVT OldVT = Op.getValueType();
1430 SDLoc DL(Op);
1431 bool Replace = false;
1432 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1433 if (!NewOp.getNode())
1434 return SDValue();
1435 AddToWorklist(NewOp.getNode());
1436
1437 if (Replace)
1438 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1439 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1440 DAG.getValueType(OldVT));
1441}
1442
1443SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1444 EVT OldVT = Op.getValueType();
1445 SDLoc DL(Op);
1446 bool Replace = false;
1447 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1448 if (!NewOp.getNode())
1449 return SDValue();
1450 AddToWorklist(NewOp.getNode());
1451
1452 if (Replace)
1453 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1454 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1455}
1456
1457/// Promote the specified integer binary operation if the target indicates it is
1458/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1459/// i32 since i16 instructions are longer.
1460SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1461 if (!LegalOperations)
1462 return SDValue();
1463
1464 EVT VT = Op.getValueType();
1465 if (VT.isVector() || !VT.isInteger())
1466 return SDValue();
1467
1468 // If operation type is 'undesirable', e.g. i16 on x86, consider
1469 // promoting it.
1470 unsigned Opc = Op.getOpcode();
1471 if (TLI.isTypeDesirableForOp(Opc, VT))
1472 return SDValue();
1473
1474 EVT PVT = VT;
1475 // Consult target whether it is a good idea to promote this operation and
1476 // what's the right type to promote it to.
1477 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1478 assert(PVT != VT && "Don't know what type to promote to!");
1479
1480 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1481
1482 bool Replace0 = false;
1483 SDValue N0 = Op.getOperand(0);
1484 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1485
1486 bool Replace1 = false;
1487 SDValue N1 = Op.getOperand(1);
1488 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1489 SDLoc DL(Op);
1490
1491 SDValue RV =
1492 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1493
1494 // We are always replacing N0/N1's use in N and only need additional
1495 // replacements if there are additional uses.
1496 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1497 // (SDValue) here because the node may reference multiple values
1498 // (for example, the chain value of a load node).
1499 Replace0 &= !N0->hasOneUse();
1500 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1501
1502 // Combine Op here so it is preserved past replacements.
1503 CombineTo(Op.getNode(), RV);
1504
1505 // If operands have a use ordering, make sure we deal with
1506 // predecessor first.
1507 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1508 std::swap(N0, N1);
1509 std::swap(NN0, NN1);
1510 }
1511
1512 if (Replace0) {
1513 AddToWorklist(NN0.getNode());
1514 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1515 }
1516 if (Replace1) {
1517 AddToWorklist(NN1.getNode());
1518 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1519 }
1520 return Op;
1521 }
1522 return SDValue();
1523}
1524
1525/// Promote the specified integer shift operation if the target indicates it is
1526/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1527/// i32 since i16 instructions are longer.
1528SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1529 if (!LegalOperations)
1530 return SDValue();
1531
1532 EVT VT = Op.getValueType();
1533 if (VT.isVector() || !VT.isInteger())
1534 return SDValue();
1535
1536 // If operation type is 'undesirable', e.g. i16 on x86, consider
1537 // promoting it.
1538 unsigned Opc = Op.getOpcode();
1539 if (TLI.isTypeDesirableForOp(Opc, VT))
1540 return SDValue();
1541
1542 EVT PVT = VT;
1543 // Consult target whether it is a good idea to promote this operation and
1544 // what's the right type to promote it to.
1545 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1546 assert(PVT != VT && "Don't know what type to promote to!");
1547
1548 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1549
1550 bool Replace = false;
1551 SDValue N0 = Op.getOperand(0);
1552 if (Opc == ISD::SRA)
1553 N0 = SExtPromoteOperand(N0, PVT);
1554 else if (Opc == ISD::SRL)
1555 N0 = ZExtPromoteOperand(N0, PVT);
1556 else
1557 N0 = PromoteOperand(N0, PVT, Replace);
1558
1559 if (!N0.getNode())
1560 return SDValue();
1561
1562 SDLoc DL(Op);
1563 SDValue N1 = Op.getOperand(1);
1564 SDValue RV =
1565 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1566
1567 if (Replace)
1568 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1569
1570 // Deal with Op being deleted.
1571 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1572 return RV;
1573 }
1574 return SDValue();
1575}
1576
1577SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1578 if (!LegalOperations)
1579 return SDValue();
1580
1581 EVT VT = Op.getValueType();
1582 if (VT.isVector() || !VT.isInteger())
1583 return SDValue();
1584
1585 // If operation type is 'undesirable', e.g. i16 on x86, consider
1586 // promoting it.
1587 unsigned Opc = Op.getOpcode();
1588 if (TLI.isTypeDesirableForOp(Opc, VT))
1589 return SDValue();
1590
1591 EVT PVT = VT;
1592 // Consult target whether it is a good idea to promote this operation and
1593 // what's the right type to promote it to.
1594 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1595 assert(PVT != VT && "Don't know what type to promote to!");
1596 // fold (aext (aext x)) -> (aext x)
1597 // fold (aext (zext x)) -> (zext x)
1598 // fold (aext (sext x)) -> (sext x)
1599 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1600 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1601 }
1602 return SDValue();
1603}
1604
1605bool DAGCombiner::PromoteLoad(SDValue Op) {
1606 if (!LegalOperations)
1607 return false;
1608
1609 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1610 return false;
1611
1612 EVT VT = Op.getValueType();
1613 if (VT.isVector() || !VT.isInteger())
1614 return false;
1615
1616 // If operation type is 'undesirable', e.g. i16 on x86, consider
1617 // promoting it.
1618 unsigned Opc = Op.getOpcode();
1619 if (TLI.isTypeDesirableForOp(Opc, VT))
1620 return false;
1621
1622 EVT PVT = VT;
1623 // Consult target whether it is a good idea to promote this operation and
1624 // what's the right type to promote it to.
1625 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1626 assert(PVT != VT && "Don't know what type to promote to!");
1627
1628 SDLoc DL(Op);
1629 SDNode *N = Op.getNode();
1630 LoadSDNode *LD = cast<LoadSDNode>(N);
1631 EVT MemVT = LD->getMemoryVT();
1633 : LD->getExtensionType();
1634 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1635 LD->getChain(), LD->getBasePtr(),
1636 MemVT, LD->getMemOperand());
1637 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1638
1639 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1640 Result.dump(&DAG); dbgs() << '\n');
1641
1643 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1644
1645 AddToWorklist(Result.getNode());
1646 recursivelyDeleteUnusedNodes(N);
1647 return true;
1648 }
1649
1650 return false;
1651}
1652
1653/// Recursively delete a node which has no uses and any operands for
1654/// which it is the only use.
1655///
1656/// Note that this both deletes the nodes and removes them from the worklist.
1657/// It also adds any nodes who have had a user deleted to the worklist as they
1658/// may now have only one use and subject to other combines.
1659bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1660 if (!N->use_empty())
1661 return false;
1662
1664 Nodes.insert(N);
1665 do {
1666 N = Nodes.pop_back_val();
1667 if (!N)
1668 continue;
1669
1670 if (N->use_empty()) {
1671 for (const SDValue &ChildN : N->op_values())
1672 Nodes.insert(ChildN.getNode());
1673
1674 removeFromWorklist(N);
1675 DAG.DeleteNode(N);
1676 } else {
1677 AddToWorklist(N);
1678 }
1679 } while (!Nodes.empty());
1680 return true;
1681}
1682
1683//===----------------------------------------------------------------------===//
1684// Main DAG Combiner implementation
1685//===----------------------------------------------------------------------===//
1686
1687void DAGCombiner::Run(CombineLevel AtLevel) {
1688 // set the instance variables, so that the various visit routines may use it.
1689 Level = AtLevel;
1690 LegalDAG = Level >= AfterLegalizeDAG;
1691 LegalOperations = Level >= AfterLegalizeVectorOps;
1692 LegalTypes = Level >= AfterLegalizeTypes;
1693
1694 WorklistInserter AddNodes(*this);
1695
1696 // Add all the dag nodes to the worklist.
1697 //
1698 // Note: All nodes are not added to PruningList here, this is because the only
1699 // nodes which can be deleted are those which have no uses and all other nodes
1700 // which would otherwise be added to the worklist by the first call to
1701 // getNextWorklistEntry are already present in it.
1702 for (SDNode &Node : DAG.allnodes())
1703 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1704
1705 // Create a dummy node (which is not added to allnodes), that adds a reference
1706 // to the root node, preventing it from being deleted, and tracking any
1707 // changes of the root.
1708 HandleSDNode Dummy(DAG.getRoot());
1709
1710 // While we have a valid worklist entry node, try to combine it.
1711 while (SDNode *N = getNextWorklistEntry()) {
1712 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1713 // N is deleted from the DAG, since they too may now be dead or may have a
1714 // reduced number of uses, allowing other xforms.
1715 if (recursivelyDeleteUnusedNodes(N))
1716 continue;
1717
1718 WorklistRemover DeadNodes(*this);
1719
1720 // If this combine is running after legalizing the DAG, re-legalize any
1721 // nodes pulled off the worklist.
1722 if (LegalDAG) {
1723 SmallSetVector<SDNode *, 16> UpdatedNodes;
1724 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1725
1726 for (SDNode *LN : UpdatedNodes)
1727 AddToWorklistWithUsers(LN);
1728
1729 if (!NIsValid)
1730 continue;
1731 }
1732
1733 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1734
1735 // Add any operands of the new node which have not yet been combined to the
1736 // worklist as well. Because the worklist uniques things already, this
1737 // won't repeatedly process the same operand.
1738 for (const SDValue &ChildN : N->op_values())
1739 if (!CombinedNodes.count(ChildN.getNode()))
1740 AddToWorklist(ChildN.getNode());
1741
1742 CombinedNodes.insert(N);
1743 SDValue RV = combine(N);
1744
1745 if (!RV.getNode())
1746 continue;
1747
1748 ++NodesCombined;
1749
1750 // If we get back the same node we passed in, rather than a new node or
1751 // zero, we know that the node must have defined multiple values and
1752 // CombineTo was used. Since CombineTo takes care of the worklist
1753 // mechanics for us, we have no work to do in this case.
1754 if (RV.getNode() == N)
1755 continue;
1756
1757 assert(N->getOpcode() != ISD::DELETED_NODE &&
1758 RV.getOpcode() != ISD::DELETED_NODE &&
1759 "Node was deleted but visit returned new node!");
1760
1761 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1762
1763 if (N->getNumValues() == RV->getNumValues())
1764 DAG.ReplaceAllUsesWith(N, RV.getNode());
1765 else {
1766 assert(N->getValueType(0) == RV.getValueType() &&
1767 N->getNumValues() == 1 && "Type mismatch");
1768 DAG.ReplaceAllUsesWith(N, &RV);
1769 }
1770
1771 // Push the new node and any users onto the worklist. Omit this if the
1772 // new node is the EntryToken (e.g. if a store managed to get optimized
1773 // out), because re-visiting the EntryToken and its users will not uncover
1774 // any additional opportunities, but there may be a large number of such
1775 // users, potentially causing compile time explosion.
1776 if (RV.getOpcode() != ISD::EntryToken)
1777 AddToWorklistWithUsers(RV.getNode());
1778
1779 // Finally, if the node is now dead, remove it from the graph. The node
1780 // may not be dead if the replacement process recursively simplified to
1781 // something else needing this node. This will also take care of adding any
1782 // operands which have lost a user to the worklist.
1783 recursivelyDeleteUnusedNodes(N);
1784 }
1785
1786 // If the root changed (e.g. it was a dead load, update the root).
1787 DAG.setRoot(Dummy.getValue());
1788 DAG.RemoveDeadNodes();
1789}
1790
1791SDValue DAGCombiner::visit(SDNode *N) {
1792 // clang-format off
1793 switch (N->getOpcode()) {
1794 default: break;
1795 case ISD::TokenFactor: return visitTokenFactor(N);
1796 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1797 case ISD::ADD: return visitADD(N);
1798 case ISD::SUB: return visitSUB(N);
1799 case ISD::SADDSAT:
1800 case ISD::UADDSAT: return visitADDSAT(N);
1801 case ISD::SSUBSAT:
1802 case ISD::USUBSAT: return visitSUBSAT(N);
1803 case ISD::ADDC: return visitADDC(N);
1804 case ISD::SADDO:
1805 case ISD::UADDO: return visitADDO(N);
1806 case ISD::SUBC: return visitSUBC(N);
1807 case ISD::SSUBO:
1808 case ISD::USUBO: return visitSUBO(N);
1809 case ISD::ADDE: return visitADDE(N);
1810 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1811 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1812 case ISD::SUBE: return visitSUBE(N);
1813 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1814 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1815 case ISD::SMULFIX:
1816 case ISD::SMULFIXSAT:
1817 case ISD::UMULFIX:
1818 case ISD::UMULFIXSAT: return visitMULFIX(N);
1819 case ISD::MUL: return visitMUL(N);
1820 case ISD::SDIV: return visitSDIV(N);
1821 case ISD::UDIV: return visitUDIV(N);
1822 case ISD::SREM:
1823 case ISD::UREM: return visitREM(N);
1824 case ISD::MULHU: return visitMULHU(N);
1825 case ISD::MULHS: return visitMULHS(N);
1826 case ISD::AVGFLOORS:
1827 case ISD::AVGFLOORU:
1828 case ISD::AVGCEILS:
1829 case ISD::AVGCEILU: return visitAVG(N);
1830 case ISD::ABDS:
1831 case ISD::ABDU: return visitABD(N);
1832 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1833 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1834 case ISD::SMULO:
1835 case ISD::UMULO: return visitMULO(N);
1836 case ISD::SMIN:
1837 case ISD::SMAX:
1838 case ISD::UMIN:
1839 case ISD::UMAX: return visitIMINMAX(N);
1840 case ISD::AND: return visitAND(N);
1841 case ISD::OR: return visitOR(N);
1842 case ISD::XOR: return visitXOR(N);
1843 case ISD::SHL: return visitSHL(N);
1844 case ISD::SRA: return visitSRA(N);
1845 case ISD::SRL: return visitSRL(N);
1846 case ISD::ROTR:
1847 case ISD::ROTL: return visitRotate(N);
1848 case ISD::FSHL:
1849 case ISD::FSHR: return visitFunnelShift(N);
1850 case ISD::SSHLSAT:
1851 case ISD::USHLSAT: return visitSHLSAT(N);
1852 case ISD::ABS: return visitABS(N);
1853 case ISD::BSWAP: return visitBSWAP(N);
1854 case ISD::BITREVERSE: return visitBITREVERSE(N);
1855 case ISD::CTLZ: return visitCTLZ(N);
1856 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1857 case ISD::CTTZ: return visitCTTZ(N);
1858 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1859 case ISD::CTPOP: return visitCTPOP(N);
1860 case ISD::SELECT: return visitSELECT(N);
1861 case ISD::VSELECT: return visitVSELECT(N);
1862 case ISD::SELECT_CC: return visitSELECT_CC(N);
1863 case ISD::SETCC: return visitSETCC(N);
1864 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1865 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1866 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1867 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1868 case ISD::AssertSext:
1869 case ISD::AssertZext: return visitAssertExt(N);
1870 case ISD::AssertAlign: return visitAssertAlign(N);
1871 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1874 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1875 case ISD::TRUNCATE: return visitTRUNCATE(N);
1876 case ISD::BITCAST: return visitBITCAST(N);
1877 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1878 case ISD::FADD: return visitFADD(N);
1879 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1880 case ISD::FSUB: return visitFSUB(N);
1881 case ISD::FMUL: return visitFMUL(N);
1882 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1883 case ISD::FMAD: return visitFMAD(N);
1884 case ISD::FDIV: return visitFDIV(N);
1885 case ISD::FREM: return visitFREM(N);
1886 case ISD::FSQRT: return visitFSQRT(N);
1887 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1888 case ISD::FPOW: return visitFPOW(N);
1889 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1890 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1891 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1892 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1893 case ISD::LRINT:
1894 case ISD::LLRINT: return visitXRINT(N);
1895 case ISD::FP_ROUND: return visitFP_ROUND(N);
1896 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1897 case ISD::FNEG: return visitFNEG(N);
1898 case ISD::FABS: return visitFABS(N);
1899 case ISD::FFLOOR: return visitFFLOOR(N);
1900 case ISD::FMINNUM:
1901 case ISD::FMAXNUM:
1902 case ISD::FMINIMUM:
1903 case ISD::FMAXIMUM: return visitFMinMax(N);
1904 case ISD::FCEIL: return visitFCEIL(N);
1905 case ISD::FTRUNC: return visitFTRUNC(N);
1906 case ISD::FFREXP: return visitFFREXP(N);
1907 case ISD::BRCOND: return visitBRCOND(N);
1908 case ISD::BR_CC: return visitBR_CC(N);
1909 case ISD::LOAD: return visitLOAD(N);
1910 case ISD::STORE: return visitSTORE(N);
1911 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1912 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1913 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1914 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1915 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1916 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1917 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1918 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1919 case ISD::MGATHER: return visitMGATHER(N);
1920 case ISD::MLOAD: return visitMLOAD(N);
1921 case ISD::MSCATTER: return visitMSCATTER(N);
1922 case ISD::MSTORE: return visitMSTORE(N);
1923 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1924 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1925 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1926 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1927 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1928 case ISD::FREEZE: return visitFREEZE(N);
1929 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1930 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1933 case ISD::VECREDUCE_ADD:
1934 case ISD::VECREDUCE_MUL:
1935 case ISD::VECREDUCE_AND:
1936 case ISD::VECREDUCE_OR:
1937 case ISD::VECREDUCE_XOR:
1945 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1946#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1947#include "llvm/IR/VPIntrinsics.def"
1948 return visitVPOp(N);
1949 }
1950 // clang-format on
1951 return SDValue();
1952}
1953
1954SDValue DAGCombiner::combine(SDNode *N) {
1955 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1956 return SDValue();
1957
1958 SDValue RV;
1959 if (!DisableGenericCombines)
1960 RV = visit(N);
1961
1962 // If nothing happened, try a target-specific DAG combine.
1963 if (!RV.getNode()) {
1964 assert(N->getOpcode() != ISD::DELETED_NODE &&
1965 "Node was deleted but visit returned NULL!");
1966
1967 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1968 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1969
1970 // Expose the DAG combiner to the target combiner impls.
1972 DagCombineInfo(DAG, Level, false, this);
1973
1974 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1975 }
1976 }
1977
1978 // If nothing happened still, try promoting the operation.
1979 if (!RV.getNode()) {
1980 switch (N->getOpcode()) {
1981 default: break;
1982 case ISD::ADD:
1983 case ISD::SUB:
1984 case ISD::MUL:
1985 case ISD::AND:
1986 case ISD::OR:
1987 case ISD::XOR:
1988 RV = PromoteIntBinOp(SDValue(N, 0));
1989 break;
1990 case ISD::SHL:
1991 case ISD::SRA:
1992 case ISD::SRL:
1993 RV = PromoteIntShiftOp(SDValue(N, 0));
1994 break;
1995 case ISD::SIGN_EXTEND:
1996 case ISD::ZERO_EXTEND:
1997 case ISD::ANY_EXTEND:
1998 RV = PromoteExtend(SDValue(N, 0));
1999 break;
2000 case ISD::LOAD:
2001 if (PromoteLoad(SDValue(N, 0)))
2002 RV = SDValue(N, 0);
2003 break;
2004 }
2005 }
2006
2007 // If N is a commutative binary node, try to eliminate it if the commuted
2008 // version is already present in the DAG.
2009 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2010 SDValue N0 = N->getOperand(0);
2011 SDValue N1 = N->getOperand(1);
2012
2013 // Constant operands are canonicalized to RHS.
2014 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2015 SDValue Ops[] = {N1, N0};
2016 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2017 N->getFlags());
2018 if (CSENode)
2019 return SDValue(CSENode, 0);
2020 }
2021 }
2022
2023 return RV;
2024}
2025
2026/// Given a node, return its input chain if it has one, otherwise return a null
2027/// sd operand.
2029 if (unsigned NumOps = N->getNumOperands()) {
2030 if (N->getOperand(0).getValueType() == MVT::Other)
2031 return N->getOperand(0);
2032 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2033 return N->getOperand(NumOps-1);
2034 for (unsigned i = 1; i < NumOps-1; ++i)
2035 if (N->getOperand(i).getValueType() == MVT::Other)
2036 return N->getOperand(i);
2037 }
2038 return SDValue();
2039}
2040
2041SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2042 // If N has two operands, where one has an input chain equal to the other,
2043 // the 'other' chain is redundant.
2044 if (N->getNumOperands() == 2) {
2045 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2046 return N->getOperand(0);
2047 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2048 return N->getOperand(1);
2049 }
2050
2051 // Don't simplify token factors if optnone.
2052 if (OptLevel == CodeGenOptLevel::None)
2053 return SDValue();
2054
2055 // Don't simplify the token factor if the node itself has too many operands.
2056 if (N->getNumOperands() > TokenFactorInlineLimit)
2057 return SDValue();
2058
2059 // If the sole user is a token factor, we should make sure we have a
2060 // chance to merge them together. This prevents TF chains from inhibiting
2061 // optimizations.
2062 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2063 AddToWorklist(*(N->use_begin()));
2064
2065 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2066 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2068 bool Changed = false; // If we should replace this token factor.
2069
2070 // Start out with this token factor.
2071 TFs.push_back(N);
2072
2073 // Iterate through token factors. The TFs grows when new token factors are
2074 // encountered.
2075 for (unsigned i = 0; i < TFs.size(); ++i) {
2076 // Limit number of nodes to inline, to avoid quadratic compile times.
2077 // We have to add the outstanding Token Factors to Ops, otherwise we might
2078 // drop Ops from the resulting Token Factors.
2079 if (Ops.size() > TokenFactorInlineLimit) {
2080 for (unsigned j = i; j < TFs.size(); j++)
2081 Ops.emplace_back(TFs[j], 0);
2082 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2083 // combiner worklist later.
2084 TFs.resize(i);
2085 break;
2086 }
2087
2088 SDNode *TF = TFs[i];
2089 // Check each of the operands.
2090 for (const SDValue &Op : TF->op_values()) {
2091 switch (Op.getOpcode()) {
2092 case ISD::EntryToken:
2093 // Entry tokens don't need to be added to the list. They are
2094 // redundant.
2095 Changed = true;
2096 break;
2097
2098 case ISD::TokenFactor:
2099 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2100 // Queue up for processing.
2101 TFs.push_back(Op.getNode());
2102 Changed = true;
2103 break;
2104 }
2105 [[fallthrough]];
2106
2107 default:
2108 // Only add if it isn't already in the list.
2109 if (SeenOps.insert(Op.getNode()).second)
2110 Ops.push_back(Op);
2111 else
2112 Changed = true;
2113 break;
2114 }
2115 }
2116 }
2117
2118 // Re-visit inlined Token Factors, to clean them up in case they have been
2119 // removed. Skip the first Token Factor, as this is the current node.
2120 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2121 AddToWorklist(TFs[i]);
2122
2123 // Remove Nodes that are chained to another node in the list. Do so
2124 // by walking up chains breath-first stopping when we've seen
2125 // another operand. In general we must climb to the EntryNode, but we can exit
2126 // early if we find all remaining work is associated with just one operand as
2127 // no further pruning is possible.
2128
2129 // List of nodes to search through and original Ops from which they originate.
2131 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2132 SmallPtrSet<SDNode *, 16> SeenChains;
2133 bool DidPruneOps = false;
2134
2135 unsigned NumLeftToConsider = 0;
2136 for (const SDValue &Op : Ops) {
2137 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2138 OpWorkCount.push_back(1);
2139 }
2140
2141 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2142 // If this is an Op, we can remove the op from the list. Remark any
2143 // search associated with it as from the current OpNumber.
2144 if (SeenOps.contains(Op)) {
2145 Changed = true;
2146 DidPruneOps = true;
2147 unsigned OrigOpNumber = 0;
2148 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2149 OrigOpNumber++;
2150 assert((OrigOpNumber != Ops.size()) &&
2151 "expected to find TokenFactor Operand");
2152 // Re-mark worklist from OrigOpNumber to OpNumber
2153 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2154 if (Worklist[i].second == OrigOpNumber) {
2155 Worklist[i].second = OpNumber;
2156 }
2157 }
2158 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2159 OpWorkCount[OrigOpNumber] = 0;
2160 NumLeftToConsider--;
2161 }
2162 // Add if it's a new chain
2163 if (SeenChains.insert(Op).second) {
2164 OpWorkCount[OpNumber]++;
2165 Worklist.push_back(std::make_pair(Op, OpNumber));
2166 }
2167 };
2168
2169 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2170 // We need at least be consider at least 2 Ops to prune.
2171 if (NumLeftToConsider <= 1)
2172 break;
2173 auto CurNode = Worklist[i].first;
2174 auto CurOpNumber = Worklist[i].second;
2175 assert((OpWorkCount[CurOpNumber] > 0) &&
2176 "Node should not appear in worklist");
2177 switch (CurNode->getOpcode()) {
2178 case ISD::EntryToken:
2179 // Hitting EntryToken is the only way for the search to terminate without
2180 // hitting
2181 // another operand's search. Prevent us from marking this operand
2182 // considered.
2183 NumLeftToConsider++;
2184 break;
2185 case ISD::TokenFactor:
2186 for (const SDValue &Op : CurNode->op_values())
2187 AddToWorklist(i, Op.getNode(), CurOpNumber);
2188 break;
2190 case ISD::LIFETIME_END:
2191 case ISD::CopyFromReg:
2192 case ISD::CopyToReg:
2193 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2194 break;
2195 default:
2196 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2197 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2198 break;
2199 }
2200 OpWorkCount[CurOpNumber]--;
2201 if (OpWorkCount[CurOpNumber] == 0)
2202 NumLeftToConsider--;
2203 }
2204
2205 // If we've changed things around then replace token factor.
2206 if (Changed) {
2208 if (Ops.empty()) {
2209 // The entry token is the only possible outcome.
2210 Result = DAG.getEntryNode();
2211 } else {
2212 if (DidPruneOps) {
2213 SmallVector<SDValue, 8> PrunedOps;
2214 //
2215 for (const SDValue &Op : Ops) {
2216 if (SeenChains.count(Op.getNode()) == 0)
2217 PrunedOps.push_back(Op);
2218 }
2219 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2220 } else {
2221 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2222 }
2223 }
2224 return Result;
2225 }
2226 return SDValue();
2227}
2228
2229/// MERGE_VALUES can always be eliminated.
2230SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2231 WorklistRemover DeadNodes(*this);
2232 // Replacing results may cause a different MERGE_VALUES to suddenly
2233 // be CSE'd with N, and carry its uses with it. Iterate until no
2234 // uses remain, to ensure that the node can be safely deleted.
2235 // First add the users of this node to the work list so that they
2236 // can be tried again once they have new operands.
2237 AddUsersToWorklist(N);
2238 do {
2239 // Do as a single replacement to avoid rewalking use lists.
2241 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2242 Ops.push_back(N->getOperand(i));
2243 DAG.ReplaceAllUsesWith(N, Ops.data());
2244 } while (!N->use_empty());
2245 deleteAndRecombine(N);
2246 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2247}
2248
2249/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2250/// ConstantSDNode pointer else nullptr.
2252 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2253 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2254}
2255
2256// isTruncateOf - If N is a truncate of some other value, return true, record
2257// the value being truncated in Op and which of Op's bits are zero/one in Known.
2258// This function computes KnownBits to avoid a duplicated call to
2259// computeKnownBits in the caller.
2261 KnownBits &Known) {
2262 if (N->getOpcode() == ISD::TRUNCATE) {
2263 Op = N->getOperand(0);
2264 Known = DAG.computeKnownBits(Op);
2265 return true;
2266 }
2267
2268 if (N.getOpcode() != ISD::SETCC ||
2269 N.getValueType().getScalarType() != MVT::i1 ||
2270 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2271 return false;
2272
2273 SDValue Op0 = N->getOperand(0);
2274 SDValue Op1 = N->getOperand(1);
2275 assert(Op0.getValueType() == Op1.getValueType());
2276
2277 if (isNullOrNullSplat(Op0))
2278 Op = Op1;
2279 else if (isNullOrNullSplat(Op1))
2280 Op = Op0;
2281 else
2282 return false;
2283
2284 Known = DAG.computeKnownBits(Op);
2285
2286 return (Known.Zero | 1).isAllOnes();
2287}
2288
2289/// Return true if 'Use' is a load or a store that uses N as its base pointer
2290/// and that N may be folded in the load / store addressing mode.
2292 const TargetLowering &TLI) {
2293 EVT VT;
2294 unsigned AS;
2295
2296 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2297 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2298 return false;
2299 VT = LD->getMemoryVT();
2300 AS = LD->getAddressSpace();
2301 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2302 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2303 return false;
2304 VT = ST->getMemoryVT();
2305 AS = ST->getAddressSpace();
2306 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2307 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2308 return false;
2309 VT = LD->getMemoryVT();
2310 AS = LD->getAddressSpace();
2311 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2312 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2313 return false;
2314 VT = ST->getMemoryVT();
2315 AS = ST->getAddressSpace();
2316 } else {
2317 return false;
2318 }
2319
2321 if (N->getOpcode() == ISD::ADD) {
2322 AM.HasBaseReg = true;
2323 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2324 if (Offset)
2325 // [reg +/- imm]
2326 AM.BaseOffs = Offset->getSExtValue();
2327 else
2328 // [reg +/- reg]
2329 AM.Scale = 1;
2330 } else if (N->getOpcode() == ISD::SUB) {
2331 AM.HasBaseReg = true;
2332 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2333 if (Offset)
2334 // [reg +/- imm]
2335 AM.BaseOffs = -Offset->getSExtValue();
2336 else
2337 // [reg +/- reg]
2338 AM.Scale = 1;
2339 } else {
2340 return false;
2341 }
2342
2343 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2344 VT.getTypeForEVT(*DAG.getContext()), AS);
2345}
2346
2347/// This inverts a canonicalization in IR that replaces a variable select arm
2348/// with an identity constant. Codegen improves if we re-use the variable
2349/// operand rather than load a constant. This can also be converted into a
2350/// masked vector operation if the target supports it.
2352 bool ShouldCommuteOperands) {
2353 // Match a select as operand 1. The identity constant that we are looking for
2354 // is only valid as operand 1 of a non-commutative binop.
2355 SDValue N0 = N->getOperand(0);
2356 SDValue N1 = N->getOperand(1);
2357 if (ShouldCommuteOperands)
2358 std::swap(N0, N1);
2359
2360 // TODO: Should this apply to scalar select too?
2361 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2362 return SDValue();
2363
2364 // We can't hoist all instructions because of immediate UB (not speculatable).
2365 // For example div/rem by zero.
2367 return SDValue();
2368
2369 unsigned Opcode = N->getOpcode();
2370 EVT VT = N->getValueType(0);
2371 SDValue Cond = N1.getOperand(0);
2372 SDValue TVal = N1.getOperand(1);
2373 SDValue FVal = N1.getOperand(2);
2374
2375 // This transform increases uses of N0, so freeze it to be safe.
2376 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2377 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2378 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2379 SDValue F0 = DAG.getFreeze(N0);
2380 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2381 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2382 }
2383 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2384 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2385 SDValue F0 = DAG.getFreeze(N0);
2386 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2387 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2388 }
2389
2390 return SDValue();
2391}
2392
2393SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2394 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2395 "Unexpected binary operator");
2396
2397 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2398 auto BinOpcode = BO->getOpcode();
2399 EVT VT = BO->getValueType(0);
2400 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2401 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2402 return Sel;
2403
2404 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2405 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2406 return Sel;
2407 }
2408
2409 // Don't do this unless the old select is going away. We want to eliminate the
2410 // binary operator, not replace a binop with a select.
2411 // TODO: Handle ISD::SELECT_CC.
2412 unsigned SelOpNo = 0;
2413 SDValue Sel = BO->getOperand(0);
2414 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2415 SelOpNo = 1;
2416 Sel = BO->getOperand(1);
2417
2418 // Peek through trunc to shift amount type.
2419 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2420 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2421 // This is valid when the truncated bits of x are already zero.
2422 SDValue Op;
2423 KnownBits Known;
2424 if (isTruncateOf(DAG, Sel, Op, Known) &&
2426 Sel = Op;
2427 }
2428 }
2429
2430 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2431 return SDValue();
2432
2433 SDValue CT = Sel.getOperand(1);
2434 if (!isConstantOrConstantVector(CT, true) &&
2436 return SDValue();
2437
2438 SDValue CF = Sel.getOperand(2);
2439 if (!isConstantOrConstantVector(CF, true) &&
2441 return SDValue();
2442
2443 // Bail out if any constants are opaque because we can't constant fold those.
2444 // The exception is "and" and "or" with either 0 or -1 in which case we can
2445 // propagate non constant operands into select. I.e.:
2446 // and (select Cond, 0, -1), X --> select Cond, 0, X
2447 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2448 bool CanFoldNonConst =
2449 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2452
2453 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2454 if (!CanFoldNonConst &&
2455 !isConstantOrConstantVector(CBO, true) &&
2457 return SDValue();
2458
2459 SDLoc DL(Sel);
2460 SDValue NewCT, NewCF;
2461
2462 if (CanFoldNonConst) {
2463 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2464 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2465 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2466 NewCT = CT;
2467 else
2468 NewCT = CBO;
2469
2470 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2471 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2472 NewCF = CF;
2473 else
2474 NewCF = CBO;
2475 } else {
2476 // We have a select-of-constants followed by a binary operator with a
2477 // constant. Eliminate the binop by pulling the constant math into the
2478 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2479 // CBO, CF + CBO
2480 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2481 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2482 if (!NewCT)
2483 return SDValue();
2484
2485 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2486 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2487 if (!NewCF)
2488 return SDValue();
2489 }
2490
2491 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2492 SelectOp->setFlags(BO->getFlags());
2493 return SelectOp;
2494}
2495
2497 SelectionDAG &DAG) {
2498 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2499 "Expecting add or sub");
2500
2501 // Match a constant operand and a zext operand for the math instruction:
2502 // add Z, C
2503 // sub C, Z
2504 bool IsAdd = N->getOpcode() == ISD::ADD;
2505 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2506 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2507 auto *CN = dyn_cast<ConstantSDNode>(C);
2508 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2509 return SDValue();
2510
2511 // Match the zext operand as a setcc of a boolean.
2512 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2513 Z.getOperand(0).getValueType() != MVT::i1)
2514 return SDValue();
2515
2516 // Match the compare as: setcc (X & 1), 0, eq.
2517 SDValue SetCC = Z.getOperand(0);
2518 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2519 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2520 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2521 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2522 return SDValue();
2523
2524 // We are adding/subtracting a constant and an inverted low bit. Turn that
2525 // into a subtract/add of the low bit with incremented/decremented constant:
2526 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2527 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2528 EVT VT = C.getValueType();
2529 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2530 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2531 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2532 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2533}
2534
2535// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2536SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2537 SDValue N0 = N->getOperand(0);
2538 EVT VT = N0.getValueType();
2539 SDValue A, B;
2540
2541 if (hasOperation(ISD::AVGCEILU, VT) &&
2544 m_SpecificInt(1))))) {
2545 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2546 }
2547 if (hasOperation(ISD::AVGCEILS, VT) &&
2550 m_SpecificInt(1))))) {
2551 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2552 }
2553 return SDValue();
2554}
2555
2556/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2557/// a shift and add with a different constant.
2559 SelectionDAG &DAG) {
2560 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2561 "Expecting add or sub");
2562
2563 // We need a constant operand for the add/sub, and the other operand is a
2564 // logical shift right: add (srl), C or sub C, (srl).
2565 bool IsAdd = N->getOpcode() == ISD::ADD;
2566 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2567 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2568 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2569 ShiftOp.getOpcode() != ISD::SRL)
2570 return SDValue();
2571
2572 // The shift must be of a 'not' value.
2573 SDValue Not = ShiftOp.getOperand(0);
2574 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2575 return SDValue();
2576
2577 // The shift must be moving the sign bit to the least-significant-bit.
2578 EVT VT = ShiftOp.getValueType();
2579 SDValue ShAmt = ShiftOp.getOperand(1);
2580 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2581 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2582 return SDValue();
2583
2584 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2585 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2586 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2587 if (SDValue NewC = DAG.FoldConstantArithmetic(
2588 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2589 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2590 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2591 Not.getOperand(0), ShAmt);
2592 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2593 }
2594
2595 return SDValue();
2596}
2597
2598static bool
2600 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2601 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2602}
2603
2604/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2605/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2606/// are no common bits set in the operands).
2607SDValue DAGCombiner::visitADDLike(SDNode *N) {
2608 SDValue N0 = N->getOperand(0);
2609 SDValue N1 = N->getOperand(1);
2610 EVT VT = N0.getValueType();
2611 SDLoc DL(N);
2612
2613 // fold (add x, undef) -> undef
2614 if (N0.isUndef())
2615 return N0;
2616 if (N1.isUndef())
2617 return N1;
2618
2619 // fold (add c1, c2) -> c1+c2
2620 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2621 return C;
2622
2623 // canonicalize constant to RHS
2626 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2627
2628 if (areBitwiseNotOfEachother(N0, N1))
2630 SDLoc(N), VT);
2631
2632 // fold vector ops
2633 if (VT.isVector()) {
2634 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2635 return FoldedVOp;
2636
2637 // fold (add x, 0) -> x, vector edition
2639 return N0;
2640 }
2641
2642 // fold (add x, 0) -> x
2643 if (isNullConstant(N1))
2644 return N0;
2645
2646 if (N0.getOpcode() == ISD::SUB) {
2647 SDValue N00 = N0.getOperand(0);
2648 SDValue N01 = N0.getOperand(1);
2649
2650 // fold ((A-c1)+c2) -> (A+(c2-c1))
2651 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2652 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2653
2654 // fold ((c1-A)+c2) -> (c1+c2)-A
2655 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2656 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2657 }
2658
2659 // add (sext i1 X), 1 -> zext (not i1 X)
2660 // We don't transform this pattern:
2661 // add (zext i1 X), -1 -> sext (not i1 X)
2662 // because most (?) targets generate better code for the zext form.
2663 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2664 isOneOrOneSplat(N1)) {
2665 SDValue X = N0.getOperand(0);
2666 if ((!LegalOperations ||
2667 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2669 X.getScalarValueSizeInBits() == 1) {
2670 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2671 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2672 }
2673 }
2674
2675 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2676 // iff (or x, c0) is equivalent to (add x, c0).
2677 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2678 // iff (xor x, c0) is equivalent to (add x, c0).
2679 if (DAG.isADDLike(N0)) {
2680 SDValue N01 = N0.getOperand(1);
2681 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2682 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2683 }
2684
2685 if (SDValue NewSel = foldBinOpIntoSelect(N))
2686 return NewSel;
2687
2688 // reassociate add
2689 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2690 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2691 return RADD;
2692
2693 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2694 // equivalent to (add x, c).
2695 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2696 // equivalent to (add x, c).
2697 // Do this optimization only when adding c does not introduce instructions
2698 // for adding carries.
2699 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2700 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2701 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2702 // If N0's type does not split or is a sign mask, it does not introduce
2703 // add carry.
2704 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2705 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2708 if (NoAddCarry)
2709 return DAG.getNode(
2710 ISD::ADD, DL, VT,
2711 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2712 N0.getOperand(1));
2713 }
2714 return SDValue();
2715 };
2716 if (SDValue Add = ReassociateAddOr(N0, N1))
2717 return Add;
2718 if (SDValue Add = ReassociateAddOr(N1, N0))
2719 return Add;
2720
2721 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2722 if (SDValue SD =
2723 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2724 return SD;
2725 }
2726
2727 SDValue A, B, C;
2728
2729 // fold ((0-A) + B) -> B-A
2730 if (sd_match(N0, m_Neg(m_Value(A))))
2731 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2732
2733 // fold (A + (0-B)) -> A-B
2734 if (sd_match(N1, m_Neg(m_Value(B))))
2735 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2736
2737 // fold (A+(B-A)) -> B
2738 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2739 return B;
2740
2741 // fold ((B-A)+A) -> B
2742 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2743 return B;
2744
2745 // fold ((A-B)+(C-A)) -> (C-B)
2746 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2748 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2749
2750 // fold ((A-B)+(B-C)) -> (A-C)
2751 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2753 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2754
2755 // fold (A+(B-(A+C))) to (B-C)
2756 // fold (A+(B-(C+A))) to (B-C)
2757 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2758 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2759
2760 // fold (A+((B-A)+or-C)) to (B+or-C)
2761 if (sd_match(N1,
2763 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2764 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2765
2766 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2767 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2768 N0->hasOneUse() && N1->hasOneUse()) {
2769 SDValue N00 = N0.getOperand(0);
2770 SDValue N01 = N0.getOperand(1);
2771 SDValue N10 = N1.getOperand(0);
2772 SDValue N11 = N1.getOperand(1);
2773
2775 return DAG.getNode(ISD::SUB, DL, VT,
2776 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2777 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2778 }
2779
2780 // fold (add (umax X, C), -C) --> (usubsat X, C)
2781 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2782 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2783 return (!Max && !Op) ||
2784 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2785 };
2786 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2787 /*AllowUndefs*/ true))
2788 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2789 N0.getOperand(1));
2790 }
2791
2793 return SDValue(N, 0);
2794
2795 if (isOneOrOneSplat(N1)) {
2796 // fold (add (xor a, -1), 1) -> (sub 0, a)
2797 if (isBitwiseNot(N0))
2798 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2799 N0.getOperand(0));
2800
2801 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2802 if (N0.getOpcode() == ISD::ADD) {
2803 SDValue A, Xor;
2804
2805 if (isBitwiseNot(N0.getOperand(0))) {
2806 A = N0.getOperand(1);
2807 Xor = N0.getOperand(0);
2808 } else if (isBitwiseNot(N0.getOperand(1))) {
2809 A = N0.getOperand(0);
2810 Xor = N0.getOperand(1);
2811 }
2812
2813 if (Xor)
2814 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2815 }
2816
2817 // Look for:
2818 // add (add x, y), 1
2819 // And if the target does not like this form then turn into:
2820 // sub y, (xor x, -1)
2821 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2822 N0.hasOneUse() &&
2823 // Limit this to after legalization if the add has wrap flags
2824 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2825 !N->getFlags().hasNoSignedWrap()))) {
2826 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2827 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2828 }
2829 }
2830
2831 // (x - y) + -1 -> add (xor y, -1), x
2832 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2834 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2835 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2836 }
2837
2838 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2839 return Combined;
2840
2841 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2842 return Combined;
2843
2844 return SDValue();
2845}
2846
2847// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2848SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2849 SDValue N0 = N->getOperand(0);
2850 EVT VT = N0.getValueType();
2851 SDValue A, B;
2852
2853 if (hasOperation(ISD::AVGFLOORU, VT) &&
2856 m_SpecificInt(1))))) {
2857 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2858 }
2859 if (hasOperation(ISD::AVGFLOORS, VT) &&
2862 m_SpecificInt(1))))) {
2863 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2864 }
2865
2866 return SDValue();
2867}
2868
2869SDValue DAGCombiner::visitADD(SDNode *N) {
2870 SDValue N0 = N->getOperand(0);
2871 SDValue N1 = N->getOperand(1);
2872 EVT VT = N0.getValueType();
2873 SDLoc DL(N);
2874
2875 if (SDValue Combined = visitADDLike(N))
2876 return Combined;
2877
2878 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2879 return V;
2880
2881 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2882 return V;
2883
2884 // Try to match AVGFLOOR fixedwidth pattern
2885 if (SDValue V = foldAddToAvg(N, DL))
2886 return V;
2887
2888 // fold (a+b) -> (a|b) iff a and b share no bits.
2889 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2890 DAG.haveNoCommonBitsSet(N0, N1))
2891 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2892
2893 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2894 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2895 const APInt &C0 = N0->getConstantOperandAPInt(0);
2896 const APInt &C1 = N1->getConstantOperandAPInt(0);
2897 return DAG.getVScale(DL, VT, C0 + C1);
2898 }
2899
2900 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2901 if (N0.getOpcode() == ISD::ADD &&
2902 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2903 N1.getOpcode() == ISD::VSCALE) {
2904 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2905 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2906 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2907 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2908 }
2909
2910 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2911 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2912 N1.getOpcode() == ISD::STEP_VECTOR) {
2913 const APInt &C0 = N0->getConstantOperandAPInt(0);
2914 const APInt &C1 = N1->getConstantOperandAPInt(0);
2915 APInt NewStep = C0 + C1;
2916 return DAG.getStepVector(DL, VT, NewStep);
2917 }
2918
2919 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2920 if (N0.getOpcode() == ISD::ADD &&
2922 N1.getOpcode() == ISD::STEP_VECTOR) {
2923 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2924 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2925 APInt NewStep = SV0 + SV1;
2926 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2927 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2928 }
2929
2930 return SDValue();
2931}
2932
2933SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2934 unsigned Opcode = N->getOpcode();
2935 SDValue N0 = N->getOperand(0);
2936 SDValue N1 = N->getOperand(1);
2937 EVT VT = N0.getValueType();
2938 bool IsSigned = Opcode == ISD::SADDSAT;
2939 SDLoc DL(N);
2940
2941 // fold (add_sat x, undef) -> -1
2942 if (N0.isUndef() || N1.isUndef())
2943 return DAG.getAllOnesConstant(DL, VT);
2944
2945 // fold (add_sat c1, c2) -> c3
2946 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2947 return C;
2948
2949 // canonicalize constant to RHS
2952 return DAG.getNode(Opcode, DL, VT, N1, N0);
2953
2954 // fold vector ops
2955 if (VT.isVector()) {
2956 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2957 return FoldedVOp;
2958
2959 // fold (add_sat x, 0) -> x, vector edition
2961 return N0;
2962 }
2963
2964 // fold (add_sat x, 0) -> x
2965 if (isNullConstant(N1))
2966 return N0;
2967
2968 // If it cannot overflow, transform into an add.
2969 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
2970 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2971
2972 return SDValue();
2973}
2974
2976 bool ForceCarryReconstruction = false) {
2977 bool Masked = false;
2978
2979 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2980 while (true) {
2981 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2982 V = V.getOperand(0);
2983 continue;
2984 }
2985
2986 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2987 if (ForceCarryReconstruction)
2988 return V;
2989
2990 Masked = true;
2991 V = V.getOperand(0);
2992 continue;
2993 }
2994
2995 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
2996 return V;
2997
2998 break;
2999 }
3000
3001 // If this is not a carry, return.
3002 if (V.getResNo() != 1)
3003 return SDValue();
3004
3005 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3006 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3007 return SDValue();
3008
3009 EVT VT = V->getValueType(0);
3010 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3011 return SDValue();
3012
3013 // If the result is masked, then no matter what kind of bool it is we can
3014 // return. If it isn't, then we need to make sure the bool type is either 0 or
3015 // 1 and not other values.
3016 if (Masked ||
3017 TLI.getBooleanContents(V.getValueType()) ==
3019 return V;
3020
3021 return SDValue();
3022}
3023
3024/// Given the operands of an add/sub operation, see if the 2nd operand is a
3025/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3026/// the opcode and bypass the mask operation.
3027static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3028 SelectionDAG &DAG, const SDLoc &DL) {
3029 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3030 N1 = N1.getOperand(0);
3031
3032 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3033 return SDValue();
3034
3035 EVT VT = N0.getValueType();
3036 SDValue N10 = N1.getOperand(0);
3037 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3038 N10 = N10.getOperand(0);
3039
3040 if (N10.getValueType() != VT)
3041 return SDValue();
3042
3043 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3044 return SDValue();
3045
3046 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3047 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3048 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3049}
3050
3051/// Helper for doing combines based on N0 and N1 being added to each other.
3052SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3053 SDNode *LocReference) {
3054 EVT VT = N0.getValueType();
3055 SDLoc DL(LocReference);
3056
3057 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3058 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
3060 return DAG.getNode(ISD::SUB, DL, VT, N0,
3061 DAG.getNode(ISD::SHL, DL, VT,
3062 N1.getOperand(0).getOperand(1),
3063 N1.getOperand(1)));
3064
3065 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3066 return V;
3067
3068 // Look for:
3069 // add (add x, 1), y
3070 // And if the target does not like this form then turn into:
3071 // sub y, (xor x, -1)
3072 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3073 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3074 // Limit this to after legalization if the add has wrap flags
3075 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3076 !N0->getFlags().hasNoSignedWrap()))) {
3077 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3078 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3079 }
3080
3081 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3082 // Hoist one-use subtraction by non-opaque constant:
3083 // (x - C) + y -> (x + y) - C
3084 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3085 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3086 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3087 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3088 }
3089 // Hoist one-use subtraction from non-opaque constant:
3090 // (C - x) + y -> (y - x) + C
3091 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3092 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3093 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3094 }
3095 }
3096
3097 // add (mul x, C), x -> mul x, C+1
3098 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3099 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3100 N0.hasOneUse()) {
3101 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3102 DAG.getConstant(1, DL, VT));
3103 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3104 }
3105
3106 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3107 // rather than 'add 0/-1' (the zext should get folded).
3108 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3109 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3110 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3112 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3113 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3114 }
3115
3116 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3117 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3118 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3119 if (TN->getVT() == MVT::i1) {
3120 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3121 DAG.getConstant(1, DL, VT));
3122 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3123 }
3124 }
3125
3126 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3127 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3128 N1.getResNo() == 0)
3129 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3130 N0, N1.getOperand(0), N1.getOperand(2));
3131
3132 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3134 if (SDValue Carry = getAsCarry(TLI, N1))
3135 return DAG.getNode(ISD::UADDO_CARRY, DL,
3136 DAG.getVTList(VT, Carry.getValueType()), N0,
3137 DAG.getConstant(0, DL, VT), Carry);
3138
3139 return SDValue();
3140}
3141
3142SDValue DAGCombiner::visitADDC(SDNode *N) {
3143 SDValue N0 = N->getOperand(0);
3144 SDValue N1 = N->getOperand(1);
3145 EVT VT = N0.getValueType();
3146 SDLoc DL(N);
3147
3148 // If the flag result is dead, turn this into an ADD.
3149 if (!N->hasAnyUseOfValue(1))
3150 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3151 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3152
3153 // canonicalize constant to RHS.
3154 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3155 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3156 if (N0C && !N1C)
3157 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3158
3159 // fold (addc x, 0) -> x + no carry out
3160 if (isNullConstant(N1))
3161 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3162 DL, MVT::Glue));
3163
3164 // If it cannot overflow, transform into an add.
3166 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3167 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3168
3169 return SDValue();
3170}
3171
3172/**
3173 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3174 * then the flip also occurs if computing the inverse is the same cost.
3175 * This function returns an empty SDValue in case it cannot flip the boolean
3176 * without increasing the cost of the computation. If you want to flip a boolean
3177 * no matter what, use DAG.getLogicalNOT.
3178 */
3180 const TargetLowering &TLI,
3181 bool Force) {
3182 if (Force && isa<ConstantSDNode>(V))
3183 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3184
3185 if (V.getOpcode() != ISD::XOR)
3186 return SDValue();
3187
3188 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3189 if (!Const)
3190 return SDValue();
3191
3192 EVT VT = V.getValueType();
3193
3194 bool IsFlip = false;
3195 switch(TLI.getBooleanContents(VT)) {
3197 IsFlip = Const->isOne();
3198 break;
3200 IsFlip = Const->isAllOnes();
3201 break;
3203 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3204 break;
3205 }
3206
3207 if (IsFlip)
3208 return V.getOperand(0);
3209 if (Force)
3210 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3211 return SDValue();
3212}
3213
3214SDValue DAGCombiner::visitADDO(SDNode *N) {
3215 SDValue N0 = N->getOperand(0);
3216 SDValue N1 = N->getOperand(1);
3217 EVT VT = N0.getValueType();
3218 bool IsSigned = (ISD::SADDO == N->getOpcode());
3219
3220 EVT CarryVT = N->getValueType(1);
3221 SDLoc DL(N);
3222
3223 // If the flag result is dead, turn this into an ADD.
3224 if (!N->hasAnyUseOfValue(1))
3225 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3226 DAG.getUNDEF(CarryVT));
3227
3228 // canonicalize constant to RHS.
3231 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3232
3233 // fold (addo x, 0) -> x + no carry out
3234 if (isNullOrNullSplat(N1))
3235 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3236
3237 // If it cannot overflow, transform into an add.
3238 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3239 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3240 DAG.getConstant(0, DL, CarryVT));
3241
3242 if (IsSigned) {
3243 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3244 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3245 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3246 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3247 } else {
3248 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3249 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3250 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3251 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3252 return CombineTo(
3253 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3254 }
3255
3256 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3257 return Combined;
3258
3259 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3260 return Combined;
3261 }
3262
3263 return SDValue();
3264}
3265
3266SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3267 EVT VT = N0.getValueType();
3268 if (VT.isVector())
3269 return SDValue();
3270
3271 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3272 // If Y + 1 cannot overflow.
3273 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3274 SDValue Y = N1.getOperand(0);
3275 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3277 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3278 N1.getOperand(2));
3279 }
3280
3281 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3283 if (SDValue Carry = getAsCarry(TLI, N1))
3284 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3285 DAG.getConstant(0, SDLoc(N), VT), Carry);
3286
3287 return SDValue();
3288}
3289
3290SDValue DAGCombiner::visitADDE(SDNode *N) {
3291 SDValue N0 = N->getOperand(0);
3292 SDValue N1 = N->getOperand(1);
3293 SDValue CarryIn = N->getOperand(2);
3294
3295 // canonicalize constant to RHS
3296 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3297 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3298 if (N0C && !N1C)
3299 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3300 N1, N0, CarryIn);
3301
3302 // fold (adde x, y, false) -> (addc x, y)
3303 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3304 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3305
3306 return SDValue();
3307}
3308
3309SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3310 SDValue N0 = N->getOperand(0);
3311 SDValue N1 = N->getOperand(1);
3312 SDValue CarryIn = N->getOperand(2);
3313 SDLoc DL(N);
3314
3315 // canonicalize constant to RHS
3316 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3317 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3318 if (N0C && !N1C)
3319 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3320
3321 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3322 if (isNullConstant(CarryIn)) {
3323 if (!LegalOperations ||
3324 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3325 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3326 }
3327
3328 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3329 if (isNullConstant(N0) && isNullConstant(N1)) {
3330 EVT VT = N0.getValueType();
3331 EVT CarryVT = CarryIn.getValueType();
3332 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3333 AddToWorklist(CarryExt.getNode());
3334 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3335 DAG.getConstant(1, DL, VT)),
3336 DAG.getConstant(0, DL, CarryVT));
3337 }
3338
3339 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3340 return Combined;
3341
3342 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3343 return Combined;
3344
3345 // We want to avoid useless duplication.
3346 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3347 // not a binary operation, this is not really possible to leverage this
3348 // existing mechanism for it. However, if more operations require the same
3349 // deduplication logic, then it may be worth generalize.
3350 SDValue Ops[] = {N1, N0, CarryIn};
3351 SDNode *CSENode =
3352 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3353 if (CSENode)
3354 return SDValue(CSENode, 0);
3355
3356 return SDValue();
3357}
3358
3359/**
3360 * If we are facing some sort of diamond carry propapagtion pattern try to
3361 * break it up to generate something like:
3362 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3363 *
3364 * The end result is usually an increase in operation required, but because the
3365 * carry is now linearized, other transforms can kick in and optimize the DAG.
3366 *
3367 * Patterns typically look something like
3368 * (uaddo A, B)
3369 * / \
3370 * Carry Sum
3371 * | \
3372 * | (uaddo_carry *, 0, Z)
3373 * | /
3374 * \ Carry
3375 * | /
3376 * (uaddo_carry X, *, *)
3377 *
3378 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3379 * produce a combine with a single path for carry propagation.
3380 */
3382 SelectionDAG &DAG, SDValue X,
3383 SDValue Carry0, SDValue Carry1,
3384 SDNode *N) {
3385 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3386 return SDValue();
3387 if (Carry1.getOpcode() != ISD::UADDO)
3388 return SDValue();
3389
3390 SDValue Z;
3391
3392 /**
3393 * First look for a suitable Z. It will present itself in the form of
3394 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3395 */
3396 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3397 isNullConstant(Carry0.getOperand(1))) {
3398 Z = Carry0.getOperand(2);
3399 } else if (Carry0.getOpcode() == ISD::UADDO &&
3400 isOneConstant(Carry0.getOperand(1))) {
3401 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3402 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3403 } else {
3404 // We couldn't find a suitable Z.
3405 return SDValue();
3406 }
3407
3408
3409 auto cancelDiamond = [&](SDValue A,SDValue B) {
3410 SDLoc DL(N);
3411 SDValue NewY =
3412 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3413 Combiner.AddToWorklist(NewY.getNode());
3414 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3415 DAG.getConstant(0, DL, X.getValueType()),
3416 NewY.getValue(1));
3417 };
3418
3419 /**
3420 * (uaddo A, B)
3421 * |
3422 * Sum
3423 * |
3424 * (uaddo_carry *, 0, Z)
3425 */
3426 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3427 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3428 }
3429
3430 /**
3431 * (uaddo_carry A, 0, Z)
3432 * |
3433 * Sum
3434 * |
3435 * (uaddo *, B)
3436 */
3437 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3438 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3439 }
3440
3441 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3442 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3443 }
3444
3445 return SDValue();
3446}
3447
3448// If we are facing some sort of diamond carry/borrow in/out pattern try to
3449// match patterns like:
3450//
3451// (uaddo A, B) CarryIn
3452// | \ |
3453// | \ |
3454// PartialSum PartialCarryOutX /
3455// | | /
3456// | ____|____________/
3457// | / |
3458// (uaddo *, *) \________
3459// | \ \
3460// | \ |
3461// | PartialCarryOutY |
3462// | \ |
3463// | \ /
3464// AddCarrySum | ______/
3465// | /
3466// CarryOut = (or *, *)
3467//
3468// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3469//
3470// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3471//
3472// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3473// with a single path for carry/borrow out propagation.
3475 SDValue N0, SDValue N1, SDNode *N) {
3476 SDValue Carry0 = getAsCarry(TLI, N0);
3477 if (!Carry0)
3478 return SDValue();
3479 SDValue Carry1 = getAsCarry(TLI, N1);
3480 if (!Carry1)
3481 return SDValue();
3482
3483 unsigned Opcode = Carry0.getOpcode();
3484 if (Opcode != Carry1.getOpcode())
3485 return SDValue();
3486 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3487 return SDValue();
3488 // Guarantee identical type of CarryOut
3489 EVT CarryOutType = N->getValueType(0);
3490 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3491 CarryOutType != Carry1.getValue(1).getValueType())
3492 return SDValue();
3493
3494 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3495 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3496 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3497 std::swap(Carry0, Carry1);
3498
3499 // Check if nodes are connected in expected way.
3500 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3501 Carry1.getOperand(1) != Carry0.getValue(0))
3502 return SDValue();
3503
3504 // The carry in value must be on the righthand side for subtraction.
3505 unsigned CarryInOperandNum =
3506 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3507 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3508 return SDValue();
3509 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3510
3511 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3512 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3513 return SDValue();
3514
3515 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3516 CarryIn = getAsCarry(TLI, CarryIn, true);
3517 if (!CarryIn)
3518 return SDValue();
3519
3520 SDLoc DL(N);
3521 SDValue Merged =
3522 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3523 Carry0.getOperand(1), CarryIn);
3524
3525 // Please note that because we have proven that the result of the UADDO/USUBO
3526 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3527 // therefore prove that if the first UADDO/USUBO overflows, the second
3528 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3529 // maximum value.
3530 //
3531 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3532 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3533 //
3534 // This is important because it means that OR and XOR can be used to merge
3535 // carry flags; and that AND can return a constant zero.
3536 //
3537 // TODO: match other operations that can merge flags (ADD, etc)
3538 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3539 if (N->getOpcode() == ISD::AND)
3540 return DAG.getConstant(0, DL, CarryOutType);
3541 return Merged.getValue(1);
3542}
3543
3544SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3545 SDValue CarryIn, SDNode *N) {
3546 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3547 // carry.
3548 if (isBitwiseNot(N0))
3549 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3550 SDLoc DL(N);
3551 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3552 N0.getOperand(0), NotC);
3553 return CombineTo(
3554 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3555 }
3556
3557 // Iff the flag result is dead:
3558 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3559 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3560 // or the dependency between the instructions.
3561 if ((N0.getOpcode() == ISD::ADD ||
3562 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3563 N0.getValue(1) != CarryIn)) &&
3564 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3565 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3566 N0.getOperand(0), N0.getOperand(1), CarryIn);
3567
3568 /**
3569 * When one of the uaddo_carry argument is itself a carry, we may be facing
3570 * a diamond carry propagation. In which case we try to transform the DAG
3571 * to ensure linear carry propagation if that is possible.
3572 */
3573 if (auto Y = getAsCarry(TLI, N1)) {
3574 // Because both are carries, Y and Z can be swapped.
3575 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3576 return R;
3577 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3578 return R;
3579 }
3580
3581 return SDValue();
3582}
3583
3584SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3585 SDValue CarryIn, SDNode *N) {
3586 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3587 if (isBitwiseNot(N0)) {
3588 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3589 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3590 N0.getOperand(0), NotC);
3591 }
3592
3593 return SDValue();
3594}
3595
3596SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3597 SDValue N0 = N->getOperand(0);
3598 SDValue N1 = N->getOperand(1);
3599 SDValue CarryIn = N->getOperand(2);
3600 SDLoc DL(N);
3601
3602 // canonicalize constant to RHS
3603 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3604 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3605 if (N0C && !N1C)
3606 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3607
3608 // fold (saddo_carry x, y, false) -> (saddo x, y)
3609 if (isNullConstant(CarryIn)) {
3610 if (!LegalOperations ||
3611 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3612 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3613 }
3614
3615 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3616 return Combined;
3617
3618 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3619 return Combined;
3620
3621 return SDValue();
3622}
3623
3624// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3625// clamp/truncation if necessary.
3626static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3627 SDValue RHS, SelectionDAG &DAG,
3628 const SDLoc &DL) {
3629 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3630 "Illegal truncation");
3631
3632 if (DstVT == SrcVT)
3633 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3634
3635 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3636 // clamping RHS.
3638 DstVT.getScalarSizeInBits());
3639 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3640 return SDValue();
3641
3642 SDValue SatLimit =
3644 DstVT.getScalarSizeInBits()),
3645 DL, SrcVT);
3646 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3647 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3648 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3649 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3650}
3651
3652// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3653// usubsat(a,b), optionally as a truncated type.
3654SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3655 if (N->getOpcode() != ISD::SUB ||
3656 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3657 return SDValue();
3658
3659 EVT SubVT = N->getValueType(0);
3660 SDValue Op0 = N->getOperand(0);
3661 SDValue Op1 = N->getOperand(1);
3662
3663 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3664 // they may be converted to usubsat(a,b).
3665 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3666 SDValue MaxLHS = Op0.getOperand(0);
3667 SDValue MaxRHS = Op0.getOperand(1);
3668 if (MaxLHS == Op1)
3669 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3670 if (MaxRHS == Op1)
3671 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3672 }
3673
3674 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3675 SDValue MinLHS = Op1.getOperand(0);
3676 SDValue MinRHS = Op1.getOperand(1);
3677 if (MinLHS == Op0)
3678 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3679 if (MinRHS == Op0)
3680 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3681 }
3682
3683 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3684 if (Op1.getOpcode() == ISD::TRUNCATE &&
3685 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3686 Op1.getOperand(0).hasOneUse()) {
3687 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3688 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3689 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3690 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3691 DAG, DL);
3692 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3693 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3694 DAG, DL);
3695 }
3696
3697 return SDValue();
3698}
3699
3700// Since it may not be valid to emit a fold to zero for vector initializers
3701// check if we can before folding.
3702static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3703 SelectionDAG &DAG, bool LegalOperations) {
3704 if (!VT.isVector())
3705 return DAG.getConstant(0, DL, VT);
3706 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3707 return DAG.getConstant(0, DL, VT);
3708 return SDValue();
3709}
3710
3711SDValue DAGCombiner::visitSUB(SDNode *N) {
3712 SDValue N0 = N->getOperand(0);
3713 SDValue N1 = N->getOperand(1);
3714 EVT VT = N0.getValueType();
3715 unsigned BitWidth = VT.getScalarSizeInBits();
3716 SDLoc DL(N);
3717
3718 auto PeekThroughFreeze = [](SDValue N) {
3719 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3720 return N->getOperand(0);
3721 return N;
3722 };
3723
3724 // fold (sub x, x) -> 0
3725 // FIXME: Refactor this and xor and other similar operations together.
3726 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3727 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3728
3729 // fold (sub c1, c2) -> c3
3730 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3731 return C;
3732
3733 // fold vector ops
3734 if (VT.isVector()) {
3735 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3736 return FoldedVOp;
3737
3738 // fold (sub x, 0) -> x, vector edition
3740 return N0;
3741 }
3742
3743 if (SDValue NewSel = foldBinOpIntoSelect(N))
3744 return NewSel;
3745
3746 // fold (sub x, c) -> (add x, -c)
3748 return DAG.getNode(ISD::ADD, DL, VT, N0,
3749 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3750
3751 if (isNullOrNullSplat(N0)) {
3752 // Right-shifting everything out but the sign bit followed by negation is
3753 // the same as flipping arithmetic/logical shift type without the negation:
3754 // -(X >>u 31) -> (X >>s 31)
3755 // -(X >>s 31) -> (X >>u 31)
3756 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3758 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3759 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3760 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3761 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3762 }
3763 }
3764
3765 // 0 - X --> 0 if the sub is NUW.
3766 if (N->getFlags().hasNoUnsignedWrap())
3767 return N0;
3768
3770 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3771 // N1 must be 0 because negating the minimum signed value is undefined.
3772 if (N->getFlags().hasNoSignedWrap())
3773 return N0;
3774
3775 // 0 - X --> X if X is 0 or the minimum signed value.
3776 return N1;
3777 }
3778
3779 // Convert 0 - abs(x).
3780 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3782 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3783 return Result;
3784
3785 // Fold neg(splat(neg(x)) -> splat(x)
3786 if (VT.isVector()) {
3787 SDValue N1S = DAG.getSplatValue(N1, true);
3788 if (N1S && N1S.getOpcode() == ISD::SUB &&
3789 isNullConstant(N1S.getOperand(0)))
3790 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3791 }
3792 }
3793
3794 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3796 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3797
3798 // fold (A - (0-B)) -> A+B
3799 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3800 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3801
3802 // fold A-(A-B) -> B
3803 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3804 return N1.getOperand(1);
3805
3806 // fold (A+B)-A -> B
3807 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3808 return N0.getOperand(1);
3809
3810 // fold (A+B)-B -> A
3811 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3812 return N0.getOperand(0);
3813
3814 // fold (A+C1)-C2 -> A+(C1-C2)
3815 if (N0.getOpcode() == ISD::ADD) {
3816 SDValue N01 = N0.getOperand(1);
3817 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3818 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3819 }
3820
3821 // fold C2-(A+C1) -> (C2-C1)-A
3822 if (N1.getOpcode() == ISD::ADD) {
3823 SDValue N11 = N1.getOperand(1);
3824 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3825 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3826 }
3827
3828 // fold (A-C1)-C2 -> A-(C1+C2)
3829 if (N0.getOpcode() == ISD::SUB) {
3830 SDValue N01 = N0.getOperand(1);
3831 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3832 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3833 }
3834
3835 // fold (c1-A)-c2 -> (c1-c2)-A
3836 if (N0.getOpcode() == ISD::SUB) {
3837 SDValue N00 = N0.getOperand(0);
3838 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3839 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3840 }
3841
3842 SDValue A, B, C;
3843
3844 // fold ((A+(B+C))-B) -> A+C
3845 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3846 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3847
3848 // fold ((A+(B-C))-B) -> A-C
3849 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3850 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3851
3852 // fold ((A-(B-C))-C) -> A-B
3853 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3854 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3855
3856 // fold (A-(B-C)) -> A+(C-B)
3857 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3858 return DAG.getNode(ISD::ADD, DL, VT, N0,
3859 DAG.getNode(ISD::SUB, DL, VT, C, B));
3860
3861 // A - (A & B) -> A & (~B)
3862 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3863 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3864 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3865
3866 // fold (A - (-B * C)) -> (A + (B * C))
3867 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3868 return DAG.getNode(ISD::ADD, DL, VT, N0,
3869 DAG.getNode(ISD::MUL, DL, VT, B, C));
3870
3871 // If either operand of a sub is undef, the result is undef
3872 if (N0.isUndef())
3873 return N0;
3874 if (N1.isUndef())
3875 return N1;
3876
3877 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3878 return V;
3879
3880 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3881 return V;
3882
3883 // Try to match AVGCEIL fixedwidth pattern
3884 if (SDValue V = foldSubToAvg(N, DL))
3885 return V;
3886
3887 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3888 return V;
3889
3890 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3891 return V;
3892
3893 // (A - B) - 1 -> add (xor B, -1), A
3895 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3896
3897 // Look for:
3898 // sub y, (xor x, -1)
3899 // And if the target does not like this form then turn into:
3900 // add (add x, y), 1
3901 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3902 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3903 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3904 }
3905
3906 // Hoist one-use addition by non-opaque constant:
3907 // (x + C) - y -> (x - y) + C
3908 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3909 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3910 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3911 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3912 }
3913 // y - (x + C) -> (y - x) - C
3914 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3915 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3916 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3917 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3918 }
3919 // (x - C) - y -> (x - y) - C
3920 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3921 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3922 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3923 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3924 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3925 }
3926 // (C - x) - y -> C - (x + y)
3927 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3928 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3929 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3930 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3931 }
3932
3933 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3934 // rather than 'sub 0/1' (the sext should get folded).
3935 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3936 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3937 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3938 TLI.getBooleanContents(VT) ==
3940 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3941 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3942 }
3943
3944 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3945 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3946 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3947 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3948 SDValue S0 = N1.getOperand(0);
3949 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3951 if (C->getAPIntValue() == (BitWidth - 1))
3952 return DAG.getNode(ISD::ABS, DL, VT, S0);
3953 }
3954 }
3955
3956 // If the relocation model supports it, consider symbol offsets.
3957 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3958 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3959 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3960 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3961 if (GA->getGlobal() == GB->getGlobal())
3962 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3963 DL, VT);
3964 }
3965
3966 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3967 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3968 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3969 if (TN->getVT() == MVT::i1) {
3970 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3971 DAG.getConstant(1, DL, VT));
3972 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3973 }
3974 }
3975
3976 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3977 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3978 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3979 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3980 }
3981
3982 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3983 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3984 APInt NewStep = -N1.getConstantOperandAPInt(0);
3985 return DAG.getNode(ISD::ADD, DL, VT, N0,
3986 DAG.getStepVector(DL, VT, NewStep));
3987 }
3988
3989 // Prefer an add for more folding potential and possibly better codegen:
3990 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3991 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3992 SDValue ShAmt = N1.getOperand(1);
3993 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3994 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
3995 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3996 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3997 }
3998 }
3999
4000 // As with the previous fold, prefer add for more folding potential.
4001 // Subtracting SMIN/0 is the same as adding SMIN/0:
4002 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4003 if (N1.getOpcode() == ISD::SHL) {
4005 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4006 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4007 }
4008
4009 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4010 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4011 N0.getResNo() == 0 && N0.hasOneUse())
4012 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4013 N0.getOperand(0), N1, N0.getOperand(2));
4014
4016 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4017 if (SDValue Carry = getAsCarry(TLI, N0)) {
4018 SDValue X = N1;
4019 SDValue Zero = DAG.getConstant(0, DL, VT);
4020 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4021 return DAG.getNode(ISD::UADDO_CARRY, DL,
4022 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4023 Carry);
4024 }
4025 }
4026
4027 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4028 // sub C0, X --> xor X, C0
4029 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4030 if (!C0->isOpaque()) {
4031 const APInt &C0Val = C0->getAPIntValue();
4032 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4033 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4034 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4035 }
4036 }
4037
4038 // smax(a,b) - smin(a,b) --> abds(a,b)
4039 if (hasOperation(ISD::ABDS, VT) &&
4040 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4042 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4043
4044 // umax(a,b) - umin(a,b) --> abdu(a,b)
4045 if (hasOperation(ISD::ABDU, VT) &&
4046 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4048 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4049
4050 return SDValue();
4051}
4052
4053SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4054 unsigned Opcode = N->getOpcode();
4055 SDValue N0 = N->getOperand(0);
4056 SDValue N1 = N->getOperand(1);
4057 EVT VT = N0.getValueType();
4058 bool IsSigned = Opcode == ISD::SSUBSAT;
4059 SDLoc DL(N);
4060
4061 // fold (sub_sat x, undef) -> 0
4062 if (N0.isUndef() || N1.isUndef())
4063 return DAG.getConstant(0, DL, VT);
4064
4065 // fold (sub_sat x, x) -> 0
4066 if (N0 == N1)
4067 return DAG.getConstant(0, DL, VT);
4068
4069 // fold (sub_sat c1, c2) -> c3
4070 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4071 return C;
4072
4073 // fold vector ops
4074 if (VT.isVector()) {
4075 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4076 return FoldedVOp;
4077
4078 // fold (sub_sat x, 0) -> x, vector edition
4080 return N0;
4081 }
4082
4083 // fold (sub_sat x, 0) -> x
4084 if (isNullConstant(N1))
4085 return N0;
4086
4087 // If it cannot overflow, transform into an sub.
4088 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4089 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4090
4091 return SDValue();
4092}
4093
4094SDValue DAGCombiner::visitSUBC(SDNode *N) {
4095 SDValue N0 = N->getOperand(0);
4096 SDValue N1 = N->getOperand(1);
4097 EVT VT = N0.getValueType();
4098 SDLoc DL(N);
4099
4100 // If the flag result is dead, turn this into an SUB.
4101 if (!N->hasAnyUseOfValue(1))
4102 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4103 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4104
4105 // fold (subc x, x) -> 0 + no borrow
4106 if (N0 == N1)
4107 return CombineTo(N, DAG.getConstant(0, DL, VT),
4108 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4109
4110 // fold (subc x, 0) -> x + no borrow
4111 if (isNullConstant(N1))
4112 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4113
4114 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4115 if (isAllOnesConstant(N0))
4116 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4117 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4118
4119 return SDValue();
4120}
4121
4122SDValue DAGCombiner::visitSUBO(SDNode *N) {
4123 SDValue N0 = N->getOperand(0);
4124 SDValue N1 = N->getOperand(1);
4125 EVT VT = N0.getValueType();
4126 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4127
4128 EVT CarryVT = N->getValueType(1);
4129 SDLoc DL(N);
4130
4131 // If the flag result is dead, turn this into an SUB.
4132 if (!N->hasAnyUseOfValue(1))
4133 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4134 DAG.getUNDEF(CarryVT));
4135
4136 // fold (subo x, x) -> 0 + no borrow
4137 if (N0 == N1)
4138 return CombineTo(N, DAG.getConstant(0, DL, VT),
4139 DAG.getConstant(0, DL, CarryVT));
4140
4141 // fold (subox, c) -> (addo x, -c)
4143 if (IsSigned && !N1C->isMinSignedValue())
4144 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4145 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4146
4147 // fold (subo x, 0) -> x + no borrow
4148 if (isNullOrNullSplat(N1))
4149 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4150
4151 // If it cannot overflow, transform into an sub.
4152 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4153 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4154 DAG.getConstant(0, DL, CarryVT));
4155
4156 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4157 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4158 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4159 DAG.getConstant(0, DL, CarryVT));
4160
4161 return SDValue();
4162}
4163
4164SDValue DAGCombiner::visitSUBE(SDNode *N) {
4165 SDValue N0 = N->getOperand(0);
4166 SDValue N1 = N->getOperand(1);
4167 SDValue CarryIn = N->getOperand(2);
4168
4169 // fold (sube x, y, false) -> (subc x, y)
4170 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4171 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4172
4173 return SDValue();
4174}
4175
4176SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4177 SDValue N0 = N->getOperand(0);
4178 SDValue N1 = N->getOperand(1);
4179 SDValue CarryIn = N->getOperand(2);
4180
4181 // fold (usubo_carry x, y, false) -> (usubo x, y)
4182 if (isNullConstant(CarryIn)) {
4183 if (!LegalOperations ||
4184 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4185 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4186 }
4187
4188 return SDValue();
4189}
4190
4191SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4192 SDValue N0 = N->getOperand(0);
4193 SDValue N1 = N->getOperand(1);
4194 SDValue CarryIn = N->getOperand(2);
4195
4196 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4197 if (isNullConstant(CarryIn)) {
4198 if (!LegalOperations ||
4199 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4200 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4201 }
4202
4203 return SDValue();
4204}
4205
4206// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4207// UMULFIXSAT here.
4208SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4209 SDValue N0 = N->getOperand(0);
4210 SDValue N1 = N->getOperand(1);
4211 SDValue Scale = N->getOperand(2);
4212 EVT VT = N0.getValueType();
4213
4214 // fold (mulfix x, undef, scale) -> 0
4215 if (N0.isUndef() || N1.isUndef())
4216 return DAG.getConstant(0, SDLoc(N), VT);
4217
4218 // Canonicalize constant to RHS (vector doesn't have to splat)
4221 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4222
4223 // fold (mulfix x, 0, scale) -> 0
4224 if (isNullConstant(N1))
4225 return DAG.getConstant(0, SDLoc(N), VT);
4226
4227 return SDValue();
4228}
4229
4230SDValue DAGCombiner::visitMUL(SDNode *N) {
4231 SDValue N0 = N->getOperand(0);
4232 SDValue N1 = N->getOperand(1);
4233 EVT VT = N0.getValueType();
4234 SDLoc DL(N);
4235
4236 // fold (mul x, undef) -> 0
4237 if (N0.isUndef() || N1.isUndef())
4238 return DAG.getConstant(0, DL, VT);
4239
4240 // fold (mul c1, c2) -> c1*c2
4241 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4242 return C;
4243
4244 // canonicalize constant to RHS (vector doesn't have to splat)
4247 return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4248
4249 bool N1IsConst = false;
4250 bool N1IsOpaqueConst = false;
4251 APInt ConstValue1;
4252
4253 // fold vector ops
4254 if (VT.isVector()) {
4255 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4256 return FoldedVOp;
4257
4258 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4259 assert((!N1IsConst ||
4260 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4261 "Splat APInt should be element width");
4262 } else {
4263 N1IsConst = isa<ConstantSDNode>(N1);
4264 if (N1IsConst) {
4265 ConstValue1 = N1->getAsAPIntVal();
4266 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4267 }
4268 }
4269
4270 // fold (mul x, 0) -> 0
4271 if (N1IsConst && ConstValue1.isZero())
4272 return N1;
4273
4274 // fold (mul x, 1) -> x
4275 if (N1IsConst && ConstValue1.isOne())
4276 return N0;
4277
4278 if (SDValue NewSel = foldBinOpIntoSelect(N))
4279 return NewSel;
4280
4281 // fold (mul x, -1) -> 0-x
4282 if (N1IsConst && ConstValue1.isAllOnes())
4283 return DAG.getNegative(N0, DL, VT);
4284
4285 // fold (mul x, (1 << c)) -> x << c
4286 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4287 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4288 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4289 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4290 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4291 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4292 }
4293 }
4294
4295 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4296 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4297 unsigned Log2Val = (-ConstValue1).logBase2();
4298 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4299
4300 // FIXME: If the input is something that is easily negated (e.g. a
4301 // single-use add), we should put the negate there.
4302 return DAG.getNode(ISD::SUB, DL, VT,
4303 DAG.getConstant(0, DL, VT),
4304 DAG.getNode(ISD::SHL, DL, VT, N0,
4305 DAG.getConstant(Log2Val, DL, ShiftVT)));
4306 }
4307
4308 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4309 // hi result is in use in case we hit this mid-legalization.
4310 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4311 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4312 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4313 // TODO: Can we match commutable operands with getNodeIfExists?
4314 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4315 if (LoHi->hasAnyUseOfValue(1))
4316 return SDValue(LoHi, 0);
4317 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4318 if (LoHi->hasAnyUseOfValue(1))
4319 return SDValue(LoHi, 0);
4320 }
4321 }
4322
4323 // Try to transform:
4324 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4325 // mul x, (2^N + 1) --> add (shl x, N), x
4326 // mul x, (2^N - 1) --> sub (shl x, N), x
4327 // Examples: x * 33 --> (x << 5) + x
4328 // x * 15 --> (x << 4) - x
4329 // x * -33 --> -((x << 5) + x)
4330 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4331 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4332 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4333 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4334 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4335 // x * 0xf800 --> (x << 16) - (x << 11)
4336 // x * -0x8800 --> -((x << 15) + (x << 11))
4337 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4338 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4339 // TODO: We could handle more general decomposition of any constant by
4340 // having the target set a limit on number of ops and making a
4341 // callback to determine that sequence (similar to sqrt expansion).
4342 unsigned MathOp = ISD::DELETED_NODE;
4343 APInt MulC = ConstValue1.abs();
4344 // The constant `2` should be treated as (2^0 + 1).
4345 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4346 MulC.lshrInPlace(TZeros);
4347 if ((MulC - 1).isPowerOf2())
4348 MathOp = ISD::ADD;
4349 else if ((MulC + 1).isPowerOf2())
4350 MathOp = ISD::SUB;
4351
4352 if (MathOp != ISD::DELETED_NODE) {
4353 unsigned ShAmt =
4354 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4355 ShAmt += TZeros;
4356 assert(ShAmt < VT.getScalarSizeInBits() &&
4357 "multiply-by-constant generated out of bounds shift");
4358 SDValue Shl =
4359 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4360 SDValue R =
4361 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4362 DAG.getNode(ISD::SHL, DL, VT, N0,
4363 DAG.getConstant(TZeros, DL, VT)))
4364 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4365 if (ConstValue1.isNegative())
4366 R = DAG.getNegative(R, DL, VT);
4367 return R;
4368 }
4369 }
4370
4371 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4372 if (N0.getOpcode() == ISD::SHL) {
4373 SDValue N01 = N0.getOperand(1);
4374 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4375 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4376 }
4377
4378 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4379 // use.
4380 {
4381 SDValue Sh, Y;
4382
4383 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4384 if (N0.getOpcode() == ISD::SHL &&
4386 Sh = N0; Y = N1;
4387 } else if (N1.getOpcode() == ISD::SHL &&
4389 N1->hasOneUse()) {
4390 Sh = N1; Y = N0;
4391 }
4392
4393 if (Sh.getNode()) {
4394 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4395 return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4396 }
4397 }
4398
4399 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4400 if (N0.getOpcode() == ISD::ADD &&
4404 return DAG.getNode(
4405 ISD::ADD, DL, VT,
4406 DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4407 DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4408
4409 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4411 if (N0.getOpcode() == ISD::VSCALE && NC1) {
4412 const APInt &C0 = N0.getConstantOperandAPInt(0);
4413 const APInt &C1 = NC1->getAPIntValue();
4414 return DAG.getVScale(DL, VT, C0 * C1);
4415 }
4416
4417 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4418 APInt MulVal;
4419 if (N0.getOpcode() == ISD::STEP_VECTOR &&
4420 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4421 const APInt &C0 = N0.getConstantOperandAPInt(0);
4422 APInt NewStep = C0 * MulVal;
4423 return DAG.getStepVector(DL, VT, NewStep);
4424 }
4425
4426 // Fold ((mul x, 0/undef) -> 0,
4427 // (mul x, 1) -> x) -> x)
4428 // -> and(x, mask)
4429 // We can replace vectors with '0' and '1' factors with a clearing mask.
4430 if (VT.isFixedLengthVector()) {
4431 unsigned NumElts = VT.getVectorNumElements();
4432 SmallBitVector ClearMask;
4433 ClearMask.reserve(NumElts);
4434 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4435 if (!V || V->isZero()) {
4436 ClearMask.push_back(true);
4437 return true;
4438 }
4439 ClearMask.push_back(false);
4440 return V->isOne();
4441 };
4442 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4443 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4444 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4445 EVT LegalSVT = N1.getOperand(0).getValueType();
4446 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4447 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4449 for (unsigned I = 0; I != NumElts; ++I)
4450 if (ClearMask[I])
4451 Mask[I] = Zero;
4452 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4453 }
4454 }
4455
4456 // reassociate mul
4457 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4458 return RMUL;
4459
4460 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4461 if (SDValue SD =
4462 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4463 return SD;
4464
4465 // Simplify the operands using demanded-bits information.
4467 return SDValue(N, 0);
4468
4469 return SDValue();
4470}
4471
4472/// Return true if divmod libcall is available.
4474 const TargetLowering &TLI) {
4475 RTLIB::Libcall LC;
4476 EVT NodeType = Node->getValueType(0);
4477 if (!NodeType.isSimple())
4478 return false;
4479 switch (NodeType.getSimpleVT().SimpleTy) {
4480 default: return false; // No libcall for vector types.
4481 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4482 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4483 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4484 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4485 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4486 }
4487
4488 return TLI.getLibcallName(LC) != nullptr;
4489}
4490
4491/// Issue divrem if both quotient and remainder are needed.
4492SDValue DAGCombiner::useDivRem(SDNode *Node) {
4493 if (Node->use_empty())
4494 return SDValue(); // This is a dead node, leave it alone.
4495
4496 unsigned Opcode = Node->getOpcode();
4497 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4498 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4499
4500 // DivMod lib calls can still work on non-legal types if using lib-calls.
4501 EVT VT = Node->getValueType(0);
4502 if (VT.isVector() || !VT.isInteger())
4503 return SDValue();
4504
4505 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4506 return SDValue();
4507
4508 // If DIVREM is going to get expanded into a libcall,
4509 // but there is no libcall available, then don't combine.
4510 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4512 return SDValue();
4513
4514 // If div is legal, it's better to do the normal expansion
4515 unsigned OtherOpcode = 0;
4516 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4517 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4518 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4519 return SDValue();
4520 } else {
4521 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4522 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4523 return SDValue();
4524 }
4525
4526 SDValue Op0 = Node->getOperand(0);
4527 SDValue Op1 = Node->getOperand(1);
4528 SDValue combined;
4529 for (SDNode *User : Op0->uses()) {
4530 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4531 User->use_empty())
4532 continue;
4533 // Convert the other matching node(s), too;
4534 // otherwise, the DIVREM may get target-legalized into something
4535 // target-specific that we won't be able to recognize.
4536 unsigned UserOpc = User->getOpcode();
4537 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4538 User->getOperand(0) == Op0 &&
4539 User->getOperand(1) == Op1) {
4540 if (!combined) {
4541 if (UserOpc == OtherOpcode) {
4542 SDVTList VTs = DAG.getVTList(VT, VT);
4543 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4544 } else if (UserOpc == DivRemOpc) {
4545 combined = SDValue(User, 0);
4546 } else {
4547 assert(UserOpc == Opcode);
4548 continue;
4549 }
4550 }
4551 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4552 CombineTo(User, combined);
4553 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4554 CombineTo(User, combined.getValue(1));
4555 }
4556 }
4557 return combined;
4558}
4559
4561 SDValue N0 = N->getOperand(0);
4562 SDValue N1 = N->getOperand(1);
4563 EVT VT = N->getValueType(0);
4564 SDLoc DL(N);
4565
4566 unsigned Opc = N->getOpcode();
4567 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4569
4570 // X / undef -> undef
4571 // X % undef -> undef
4572 // X / 0 -> undef
4573 // X % 0 -> undef
4574 // NOTE: This includes vectors where any divisor element is zero/undef.
4575 if (DAG.isUndef(Opc, {N0, N1}))
4576 return DAG.getUNDEF(VT);
4577
4578 // undef / X -> 0
4579 // undef % X -> 0
4580 if (N0.isUndef())
4581 return DAG.getConstant(0, DL, VT);
4582
4583 // 0 / X -> 0
4584 // 0 % X -> 0
4586 if (N0C && N0C->isZero())
4587 return N0;
4588
4589 // X / X -> 1
4590 // X % X -> 0
4591 if (N0 == N1)
4592 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4593
4594 // X / 1 -> X
4595 // X % 1 -> 0
4596 // If this is a boolean op (single-bit element type), we can't have
4597 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4598 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4599 // it's a 1.
4600 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4601 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4602
4603 return SDValue();
4604}
4605
4606SDValue DAGCombiner::visitSDIV(SDNode *N) {
4607 SDValue N0 = N->getOperand(0);
4608 SDValue N1 = N->getOperand(1);
4609 EVT VT = N->getValueType(0);
4610 EVT CCVT = getSetCCResultType(VT);
4611 SDLoc DL(N);
4612
4613 // fold (sdiv c1, c2) -> c1/c2
4614 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4615 return C;
4616
4617 // fold vector ops
4618 if (VT.isVector())
4619 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4620 return FoldedVOp;
4621
4622 // fold (sdiv X, -1) -> 0-X
4624 if (N1C && N1C->isAllOnes())
4625 return DAG.getNegative(N0, DL, VT);
4626
4627 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4628 if (N1C && N1C->isMinSignedValue())
4629 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4630 DAG.getConstant(1, DL, VT),
4631 DAG.getConstant(0, DL, VT));
4632
4633 if (SDValue V = simplifyDivRem(N, DAG))
4634 return V;
4635
4636 if (SDValue NewSel = foldBinOpIntoSelect(N))
4637 return NewSel;
4638
4639 // If we know the sign bits of both operands are zero, strength reduce to a
4640 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4641 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4642 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4643
4644 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4645 // If the corresponding remainder node exists, update its users with
4646 // (Dividend - (Quotient * Divisor).
4647 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4648 { N0, N1 })) {
4649 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4650 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4651 AddToWorklist(Mul.getNode());
4652 AddToWorklist(Sub.getNode());
4653 CombineTo(RemNode, Sub);
4654 }
4655 return V;
4656 }
4657
4658 // sdiv, srem -> sdivrem
4659 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4660 // true. Otherwise, we break the simplification logic in visitREM().
4662 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4663 if (SDValue DivRem = useDivRem(N))
4664 return DivRem;
4665
4666 return SDValue();
4667}
4668
4669static bool isDivisorPowerOfTwo(SDValue Divisor) {
4670 // Helper for determining whether a value is a power-2 constant scalar or a
4671 // vector of such elements.
4672 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4673 if (C->isZero() || C->isOpaque())
4674 return false;
4675 if (C->getAPIntValue().isPowerOf2())
4676 return true;
4677 if (C->getAPIntValue().isNegatedPowerOf2())
4678 return true;
4679 return false;
4680 };
4681
4682 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4683}
4684
4685SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4686 SDLoc DL(N);
4687 EVT VT = N->getValueType(0);
4688 EVT CCVT = getSetCCResultType(VT);
4689 unsigned BitWidth = VT.getScalarSizeInBits();
4690
4691 // fold (sdiv X, pow2) -> simple ops after legalize
4692 // FIXME: We check for the exact bit here because the generic lowering gives
4693 // better results in that case. The target-specific lowering should learn how
4694 // to handle exact sdivs efficiently.
4695 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4696 // Target-specific implementation of sdiv x, pow2.
4697 if (SDValue Res = BuildSDIVPow2(N))
4698 return Res;
4699
4700 // Create constants that are functions of the shift amount value.
4701 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4702 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4703 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4704 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4705 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4706 if (!isConstantOrConstantVector(Inexact))
4707 return SDValue();
4708
4709 // Splat the sign bit into the register
4710 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4711 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4712 AddToWorklist(Sign.getNode());
4713
4714 // Add (N0 < 0) ? abs2 - 1 : 0;
4715 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4716 AddToWorklist(Srl.getNode());
4717 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4718 AddToWorklist(Add.getNode());
4719 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4720 AddToWorklist(Sra.getNode());
4721
4722 // Special case: (sdiv X, 1) -> X
4723 // Special Case: (sdiv X, -1) -> 0-X
4724 SDValue One = DAG.getConstant(1, DL, VT);
4726 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4727 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4728 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4729 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4730
4731 // If dividing by a positive value, we're done. Otherwise, the result must
4732 // be negated.
4733 SDValue Zero = DAG.getConstant(0, DL, VT);
4734 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4735
4736 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4737 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4738 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4739 return Res;
4740 }
4741
4742 // If integer divide is expensive and we satisfy the requirements, emit an
4743 // alternate sequence. Targets may check function attributes for size/speed
4744 // trade-offs.
4747 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4748 if (SDValue Op = BuildSDIV(N))
4749 return Op;
4750
4751 return SDValue();
4752}
4753
4754SDValue DAGCombiner::visitUDIV(SDNode *N) {
4755 SDValue N0 = N->getOperand(0);
4756 SDValue N1 = N->getOperand(1);
4757 EVT VT = N->getValueType(0);
4758 EVT CCVT = getSetCCResultType(VT);
4759 SDLoc DL(N);
4760
4761 // fold (udiv c1, c2) -> c1/c2
4762 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4763 return C;
4764
4765 // fold vector ops
4766 if (VT.isVector())
4767 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4768 return FoldedVOp;
4769
4770 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4772 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4773 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4774 DAG.getConstant(1, DL, VT),
4775 DAG.getConstant(0, DL, VT));
4776 }
4777
4778 if (SDValue V = simplifyDivRem(N, DAG))
4779 return V;
4780
4781 if (SDValue NewSel = foldBinOpIntoSelect(N))
4782 return NewSel;
4783
4784 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4785 // If the corresponding remainder node exists, update its users with
4786 // (Dividend - (Quotient * Divisor).
4787 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4788 { N0, N1 })) {
4789 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4790 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4791 AddToWorklist(Mul.getNode());
4792 AddToWorklist(Sub.getNode());
4793 CombineTo(RemNode, Sub);
4794 }
4795 return V;
4796 }
4797
4798 // sdiv, srem -> sdivrem
4799 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4800 // true. Otherwise, we break the simplification logic in visitREM().
4802 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4803 if (SDValue DivRem = useDivRem(N))
4804 return DivRem;
4805
4806 return SDValue();
4807}
4808
4809SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4810 SDLoc DL(N);
4811 EVT VT = N->getValueType(0);
4812
4813 // fold (udiv x, (1 << c)) -> x >>u c
4814 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4815 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4816 AddToWorklist(LogBase2.getNode());
4817
4818 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4819 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4820 AddToWorklist(Trunc.getNode());
4821 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4822 }
4823 }
4824
4825 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4826 if (N1.getOpcode() == ISD::SHL) {
4827 SDValue N10 = N1.getOperand(0);
4828 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4829 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4830 AddToWorklist(LogBase2.getNode());
4831
4832 EVT ADDVT = N1.getOperand(1).getValueType();
4833 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4834 AddToWorklist(Trunc.getNode());
4835 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4836 AddToWorklist(Add.getNode());
4837 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4838 }
4839 }
4840 }
4841
4842 // fold (udiv x, c) -> alternate
4845 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4846 if (SDValue Op = BuildUDIV(N))
4847 return Op;
4848
4849 return SDValue();
4850}
4851
4852SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4853 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4854 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4855 // Target-specific implementation of srem x, pow2.
4856 if (SDValue Res = BuildSREMPow2(N))
4857 return Res;
4858 }
4859 return SDValue();
4860}
4861
4862// handles ISD::SREM and ISD::UREM
4863SDValue DAGCombiner::visitREM(SDNode *N) {
4864 unsigned Opcode = N->getOpcode();
4865 SDValue N0 = N->getOperand(0);
4866 SDValue N1 = N->getOperand(1);
4867 EVT VT = N->getValueType(0);
4868 EVT CCVT = getSetCCResultType(VT);
4869
4870 bool isSigned = (Opcode == ISD::SREM);
4871 SDLoc DL(N);
4872
4873 // fold (rem c1, c2) -> c1%c2
4874 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4875 return C;
4876
4877 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4878 // Freeze the numerator to avoid a miscompile with an undefined value.
4879 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4880 CCVT.isVector() == VT.isVector()) {
4881 SDValue F0 = DAG.getFreeze(N0);
4882 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4883 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4884 }
4885
4886 if (SDValue V = simplifyDivRem(N, DAG))
4887 return V;
4888
4889 if (SDValue NewSel = foldBinOpIntoSelect(N))
4890 return NewSel;
4891
4892 if (isSigned) {
4893 // If we know the sign bits of both operands are zero, strength reduce to a
4894 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4895 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4896 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4897 } else {
4898 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4899 // fold (urem x, pow2) -> (and x, pow2-1)
4900 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4901 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4902 AddToWorklist(Add.getNode());
4903 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4904 }
4905 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4906 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4907 // TODO: We should sink the following into isKnownToBePowerOfTwo
4908 // using a OrZero parameter analogous to our handling in ValueTracking.
4909 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4911 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4912 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4913 AddToWorklist(Add.getNode());
4914 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4915 }
4916 }
4917
4919
4920 // If X/C can be simplified by the division-by-constant logic, lower
4921 // X%C to the equivalent of X-X/C*C.
4922 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4923 // speculative DIV must not cause a DIVREM conversion. We guard against this
4924 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4925 // combine will not return a DIVREM. Regardless, checking cheapness here
4926 // makes sense since the simplification results in fatter code.
4927 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4928 if (isSigned) {
4929 // check if we can build faster implementation for srem
4930 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4931 return OptimizedRem;
4932 }
4933
4934 SDValue OptimizedDiv =
4935 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4936 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4937 // If the equivalent Div node also exists, update its users.
4938 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4939 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4940 { N0, N1 }))
4941 CombineTo(DivNode, OptimizedDiv);
4942 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4943 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4944 AddToWorklist(OptimizedDiv.getNode());
4945 AddToWorklist(Mul.getNode());
4946 return Sub;
4947 }
4948 }
4949
4950 // sdiv, srem -> sdivrem
4951 if (SDValue DivRem = useDivRem(N))
4952 return DivRem.getValue(1);
4953
4954 return SDValue();
4955}
4956
4957SDValue DAGCombiner::visitMULHS(SDNode *N) {
4958 SDValue N0 = N->getOperand(0);
4959 SDValue N1 = N->getOperand(1);
4960 EVT VT = N->getValueType(0);
4961 SDLoc DL(N);
4962
4963 // fold (mulhs c1, c2)
4964 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4965 return C;
4966
4967 // canonicalize constant to RHS.
4970 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4971
4972 if (VT.isVector()) {
4973 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4974 return FoldedVOp;
4975
4976 // fold (mulhs x, 0) -> 0
4977 // do not return N1, because undef node may exist.
4979 return DAG.getConstant(0, DL, VT);
4980 }
4981
4982 // fold (mulhs x, 0) -> 0
4983 if (isNullConstant(N1))
4984 return N1;
4985
4986 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4987 if (isOneConstant(N1))
4988 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4991
4992 // fold (mulhs x, undef) -> 0
4993 if (N0.isUndef() || N1.isUndef())
4994 return DAG.getConstant(0, DL, VT);
4995
4996 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4997 // plus a shift.
4998 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4999 !VT.isVector()) {
5000 MVT Simple = VT.getSimpleVT();
5001 unsigned SimpleSize = Simple.getSizeInBits();
5002 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5003 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5004 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5005 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5006 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5007 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5008 DAG.getConstant(SimpleSize, DL,
5010 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5011 }
5012 }
5013
5014 return SDValue();
5015}
5016
5017SDValue DAGCombiner::visitMULHU(SDNode *N) {
5018 SDValue N0 = N->getOperand(0);
5019 SDValue N1 = N->getOperand(1);
5020 EVT VT = N->getValueType(0);
5021 SDLoc DL(N);
5022
5023 // fold (mulhu c1, c2)
5024 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5025 return C;
5026
5027 // canonicalize constant to RHS.
5030 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5031
5032 if (VT.isVector()) {
5033 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5034 return FoldedVOp;
5035
5036 // fold (mulhu x, 0) -> 0
5037 // do not return N1, because undef node may exist.
5039 return DAG.getConstant(0, DL, VT);
5040 }
5041
5042 // fold (mulhu x, 0) -> 0
5043 if (isNullConstant(N1))
5044 return N1;
5045
5046 // fold (mulhu x, 1) -> 0
5047 if (isOneConstant(N1))
5048 return DAG.getConstant(0, DL, N0.getValueType());
5049
5050 // fold (mulhu x, undef) -> 0
5051 if (N0.isUndef() || N1.isUndef())
5052 return DAG.getConstant(0, DL, VT);
5053
5054 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5055 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5056 hasOperation(ISD::SRL, VT)) {
5057 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5058 unsigned NumEltBits = VT.getScalarSizeInBits();
5059 SDValue SRLAmt = DAG.getNode(
5060 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5061 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5062 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5063 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5064 }
5065 }
5066
5067 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5068 // plus a shift.
5069 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5070 !VT.isVector()) {
5071 MVT Simple = VT.getSimpleVT();
5072 unsigned SimpleSize = Simple.getSizeInBits();
5073 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5074 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5075 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5076 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5077 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5078 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5079 DAG.getConstant(SimpleSize, DL,
5081 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5082 }
5083 }
5084
5085 // Simplify the operands using demanded-bits information.
5086 // We don't have demanded bits support for MULHU so this just enables constant
5087 // folding based on known bits.
5089 return SDValue(N, 0);
5090
5091 return SDValue();
5092}
5093
5094SDValue DAGCombiner::visitAVG(SDNode *N) {
5095 unsigned Opcode = N->getOpcode();
5096 SDValue N0 = N->getOperand(0);
5097 SDValue N1 = N->getOperand(1);
5098 EVT VT = N->getValueType(0);
5099 SDLoc DL(N);
5100
5101 // fold (avg c1, c2)
5102 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5103 return C;
5104
5105 // canonicalize constant to RHS.
5108 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5109
5110 if (VT.isVector()) {
5111 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5112 return FoldedVOp;
5113
5114 // fold (avgfloor x, 0) -> x >> 1
5116 if (Opcode == ISD::AVGFLOORS)
5117 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5118 if (Opcode == ISD::AVGFLOORU)
5119 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5120 }
5121 }
5122
5123 // fold (avg x, undef) -> x
5124 if (N0.isUndef())
5125 return N1;
5126 if (N1.isUndef())
5127 return N0;
5128
5129 // Fold (avg x, x) --> x
5130 if (N0 == N1 && Level >= AfterLegalizeTypes)
5131 return N0;
5132
5133 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5134
5135 return SDValue();
5136}
5137
5138SDValue DAGCombiner::visitABD(SDNode *N) {
5139 unsigned Opcode = N->getOpcode();
5140 SDValue N0 = N->getOperand(0);
5141 SDValue N1 = N->getOperand(1);
5142 EVT VT = N->getValueType(0);
5143 SDLoc DL(N);
5144
5145 // fold (abd c1, c2)
5146 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5147 return C;
5148
5149 // canonicalize constant to RHS.
5152 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5153
5154 if (VT.isVector()) {
5155 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5156 return FoldedVOp;
5157
5158 // fold (abds x, 0) -> abs x
5159 // fold (abdu x, 0) -> x
5161 if (Opcode == ISD::ABDS)
5162 return DAG.getNode(ISD::ABS, DL, VT, N0);
5163 if (Opcode == ISD::ABDU)
5164 return N0;
5165 }
5166 }
5167
5168 // fold (abd x, undef) -> 0
5169 if (N0.isUndef() || N1.isUndef())
5170 return DAG.getConstant(0, DL, VT);
5171
5172 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5173 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5174 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5175 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5176
5177 return SDValue();
5178}
5179
5180/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5181/// give the opcodes for the two computations that are being performed. Return
5182/// true if a simplification was made.
5183SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5184 unsigned HiOp) {
5185 // If the high half is not needed, just compute the low half.
5186 bool HiExists = N->hasAnyUseOfValue(1);
5187 if (!HiExists && (!LegalOperations ||
5188 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5189 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5190 return CombineTo(N, Res, Res);
5191 }
5192
5193 // If the low half is not needed, just compute the high half.
5194 bool LoExists = N->hasAnyUseOfValue(0);
5195 if (!LoExists && (!LegalOperations ||
5196 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5197 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5198 return CombineTo(N, Res, Res);
5199 }
5200
5201 // If both halves are used, return as it is.
5202 if (LoExists && HiExists)
5203 return SDValue();
5204
5205 // If the two computed results can be simplified separately, separate them.
5206 if (LoExists) {
5207 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5208 AddToWorklist(Lo.getNode());
5209 SDValue LoOpt = combine(Lo.getNode());
5210 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5211 (!LegalOperations ||
5212 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5213 return CombineTo(N, LoOpt, LoOpt);
5214 }
5215
5216 if (HiExists) {
5217 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5218 AddToWorklist(Hi.getNode());
5219 SDValue HiOpt = combine(Hi.getNode());
5220 if (HiOpt.getNode() && HiOpt != Hi &&
5221 (!LegalOperations ||
5222 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5223 return CombineTo(N, HiOpt, HiOpt);
5224 }
5225
5226 return SDValue();
5227}
5228
5229SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5230 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5231 return Res;
5232
5233 SDValue N0 = N->getOperand(0);
5234 SDValue N1 = N->getOperand(1);
5235 EVT VT = N->getValueType(0);
5236 SDLoc DL(N);
5237
5238 // Constant fold.
5239 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5240 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5241
5242 // canonicalize constant to RHS (vector doesn't have to splat)
5245 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5246
5247 // If the type is twice as wide is legal, transform the mulhu to a wider
5248 // multiply plus a shift.
5249 if (VT.isSimple() && !VT.isVector()) {
5250 MVT Simple = VT.getSimpleVT();
5251 unsigned SimpleSize = Simple.getSizeInBits();
5252 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5253 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5254 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5255 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5256 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5257 // Compute the high part as N1.
5258 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5259 DAG.getConstant(SimpleSize, DL,
5260 getShiftAmountTy(Lo.getValueType())));
5261 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5262 // Compute the low part as N0.
5263 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5264 return CombineTo(N, Lo, Hi);
5265 }
5266 }
5267
5268 return SDValue();
5269}
5270
5271SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5272 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5273 return Res;
5274
5275 SDValue N0 = N->getOperand(0);
5276 SDValue N1 = N->getOperand(1);
5277 EVT VT = N->getValueType(0);
5278 SDLoc DL(N);
5279
5280 // Constant fold.
5281 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5282 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5283
5284 // canonicalize constant to RHS (vector doesn't have to splat)
5287 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5288
5289 // (umul_lohi N0, 0) -> (0, 0)
5290 if (isNullConstant(N1)) {
5291 SDValue Zero = DAG.getConstant(0, DL, VT);
5292 return CombineTo(N, Zero, Zero);
5293 }
5294
5295 // (umul_lohi N0, 1) -> (N0, 0)
5296 if (isOneConstant(N1)) {
5297 SDValue Zero = DAG.getConstant(0, DL, VT);
5298 return CombineTo(N, N0, Zero);
5299 }
5300
5301 // If the type is twice as wide is legal, transform the mulhu to a wider
5302 // multiply plus a shift.
5303 if (VT.isSimple() && !VT.isVector()) {
5304 MVT Simple = VT.getSimpleVT();
5305 unsigned SimpleSize = Simple.getSizeInBits();
5306 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5307 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5308 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5309 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5310 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5311 // Compute the high part as N1.
5312 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5313 DAG.getConstant(SimpleSize, DL,
5314 getShiftAmountTy(Lo.getValueType())));
5315 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5316 // Compute the low part as N0.
5317 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5318 return CombineTo(N, Lo, Hi);
5319 }
5320 }
5321
5322 return SDValue();
5323}
5324
5325SDValue DAGCombiner::visitMULO(SDNode *N) {
5326 SDValue N0 = N->getOperand(0);
5327 SDValue N1 = N->getOperand(1);
5328 EVT VT = N0.getValueType();
5329 bool IsSigned = (ISD::SMULO == N->getOpcode());
5330
5331 EVT CarryVT = N->getValueType(1);
5332 SDLoc DL(N);
5333
5336
5337 // fold operation with constant operands.
5338 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5339 // multiple results.
5340 if (N0C && N1C) {
5341 bool Overflow;
5342 APInt Result =
5343 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5344 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5345 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5346 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5347 }
5348
5349 // canonicalize constant to RHS.
5352 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5353
5354 // fold (mulo x, 0) -> 0 + no carry out
5355 if (isNullOrNullSplat(N1))
5356 return CombineTo(N, DAG.getConstant(0, DL, VT),
5357 DAG.getConstant(0, DL, CarryVT));
5358
5359 // (mulo x, 2) -> (addo x, x)
5360 // FIXME: This needs a freeze.
5361 if (N1C && N1C->getAPIntValue() == 2 &&
5362 (!IsSigned || VT.getScalarSizeInBits() > 2))
5363 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5364 N->getVTList(), N0, N0);
5365
5366 // A 1 bit SMULO overflows if both inputs are 1.
5367 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5368 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5369 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5370 DAG.getConstant(0, DL, VT), ISD::SETNE);
5371 return CombineTo(N, And, Cmp);
5372 }
5373
5374 // If it cannot overflow, transform into a mul.
5375 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5376 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5377 DAG.getConstant(0, DL, CarryVT));
5378 return SDValue();
5379}
5380
5381// Function to calculate whether the Min/Max pair of SDNodes (potentially
5382// swapped around) make a signed saturate pattern, clamping to between a signed
5383// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5384// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5385// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5386// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5388 SDValue N3, ISD::CondCode CC, unsigned &BW,
5389 bool &Unsigned, SelectionDAG &DAG) {
5390 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5391 ISD::CondCode CC) {
5392 // The compare and select operand should be the same or the select operands
5393 // should be truncated versions of the comparison.
5394 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5395 return 0;
5396 // The constants need to be the same or a truncated version of each other.
5399 if (!N1C || !N3C)
5400 return 0;
5401 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5402 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5403 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5404 return 0;
5405 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5406 };
5407
5408 // Check the initial value is a SMIN/SMAX equivalent.
5409 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5410 if (!Opcode0)
5411 return SDValue();
5412
5413 // We could only need one range check, if the fptosi could never produce
5414 // the upper value.
5415 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5416 if (isNullOrNullSplat(N3)) {
5417 EVT IntVT = N0.getValueType().getScalarType();
5418 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5419 if (FPVT.isSimple()) {
5420 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5421 const fltSemantics &Semantics = InputTy->getFltSemantics();
5422 uint32_t MinBitWidth =
5423 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5424 if (IntVT.getSizeInBits() >= MinBitWidth) {
5425 Unsigned = true;
5426 BW = PowerOf2Ceil(MinBitWidth);
5427 return N0;
5428 }
5429 }
5430 }
5431 }
5432
5433 SDValue N00, N01, N02, N03;
5434 ISD::CondCode N0CC;
5435 switch (N0.getOpcode()) {
5436 case ISD::SMIN:
5437 case ISD::SMAX:
5438 N00 = N02 = N0.getOperand(0);
5439 N01 = N03 = N0.getOperand(1);
5440 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5441 break;
5442 case ISD::SELECT_CC:
5443 N00 = N0.getOperand(0);
5444 N01 = N0.getOperand(1);
5445 N02 = N0.getOperand(2);
5446 N03 = N0.getOperand(3);
5447 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5448 break;
5449 case ISD::SELECT:
5450 case ISD::VSELECT:
5451 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5452 return SDValue();
5453 N00 = N0.getOperand(0).getOperand(0);
5454 N01 = N0.getOperand(0).getOperand(1);
5455 N02 = N0.getOperand(1);
5456 N03 = N0.getOperand(2);
5457 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5458 break;
5459 default:
5460 return SDValue();
5461 }
5462
5463 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5464 if (!Opcode1 || Opcode0 == Opcode1)
5465 return SDValue();
5466
5467 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5468 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5469 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5470 return SDValue();
5471
5472 const APInt &MinC = MinCOp->getAPIntValue();
5473 const APInt &MaxC = MaxCOp->getAPIntValue();
5474 APInt MinCPlus1 = MinC + 1;
5475 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5476 BW = MinCPlus1.exactLogBase2() + 1;
5477 Unsigned = false;
5478 return N02;
5479 }
5480
5481 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5482 BW = MinCPlus1.exactLogBase2();
5483 Unsigned = true;
5484 return N02;
5485 }
5486
5487 return SDValue();
5488}
5489
5492 SelectionDAG &DAG) {
5493 unsigned BW;
5494 bool Unsigned;
5495 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5496 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5497 return SDValue();
5498 EVT FPVT = Fp.getOperand(0).getValueType();
5499 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5500 if (FPVT.isVector())
5501 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5502 FPVT.getVectorElementCount());
5503 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5504 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5505 return SDValue();
5506 SDLoc DL(Fp);
5507 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5508 DAG.getValueType(NewVT.getScalarType()));
5509 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5510}
5511
5514 SelectionDAG &DAG) {
5515 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5516 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5517 // be truncated versions of the setcc (N0/N1).
5518 if ((N0 != N2 &&
5519 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5521 return SDValue();
5524 if (!N1C || !N3C)
5525 return SDValue();
5526 const APInt &C1 = N1C->getAPIntValue();
5527 const APInt &C3 = N3C->getAPIntValue();
5528 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5529 C1 != C3.zext(C1.getBitWidth()))
5530 return SDValue();
5531
5532 unsigned BW = (C1 + 1).exactLogBase2();
5533 EVT FPVT = N0.getOperand(0).getValueType();
5534 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5535 if (FPVT.isVector())
5536 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5537 FPVT.getVectorElementCount());
5539 FPVT, NewVT))
5540 return SDValue();
5541
5542 SDValue Sat =
5543 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5544 DAG.getValueType(NewVT.getScalarType()));
5545 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5546}
5547
5548SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5549 SDValue N0 = N->getOperand(0);
5550 SDValue N1 = N->getOperand(1);
5551 EVT VT = N0.getValueType();
5552 unsigned Opcode = N->getOpcode();
5553 SDLoc DL(N);
5554
5555 // fold operation with constant operands.
5556 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5557 return C;
5558
5559 // If the operands are the same, this is a no-op.
5560 if (N0 == N1)
5561 return N0;
5562
5563 // canonicalize constant to RHS
5566 return DAG.getNode(Opcode, DL, VT, N1, N0);
5567
5568 // fold vector ops
5569 if (VT.isVector())
5570 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5571 return FoldedVOp;
5572
5573 // reassociate minmax
5574 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5575 return RMINMAX;
5576
5577 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5578 // Only do this if the current op isn't legal and the flipped is.
5579 if (!TLI.isOperationLegal(Opcode, VT) &&
5580 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5581 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5582 unsigned AltOpcode;
5583 switch (Opcode) {
5584 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5585 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5586 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5587 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5588 default: llvm_unreachable("Unknown MINMAX opcode");
5589 }
5590 if (TLI.isOperationLegal(AltOpcode, VT))
5591 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5592 }
5593
5594 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5596 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5597 return S;
5598 if (Opcode == ISD::UMIN)
5599 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5600 return S;
5601
5602 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5603 auto ReductionOpcode = [](unsigned Opcode) {
5604 switch (Opcode) {
5605 case ISD::SMIN:
5606 return ISD::VECREDUCE_SMIN;
5607 case ISD::SMAX:
5608 return ISD::VECREDUCE_SMAX;
5609 case ISD::UMIN:
5610 return ISD::VECREDUCE_UMIN;
5611 case ISD::UMAX:
5612 return ISD::VECREDUCE_UMAX;
5613 default:
5614 llvm_unreachable("Unexpected opcode");
5615 }
5616 };
5617 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5618 SDLoc(N), VT, N0, N1))
5619 return SD;
5620
5621 // Simplify the operands using demanded-bits information.
5623 return SDValue(N, 0);
5624
5625 return SDValue();
5626}
5627
5628/// If this is a bitwise logic instruction and both operands have the same
5629/// opcode, try to sink the other opcode after the logic instruction.
5630SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5631 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5632 EVT VT = N0.getValueType();
5633 unsigned LogicOpcode = N->getOpcode();
5634 unsigned HandOpcode = N0.getOpcode();
5635 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5636 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5637
5638 // Bail early if none of these transforms apply.
5639 if (N0.getNumOperands() == 0)
5640 return SDValue();
5641
5642 // FIXME: We should check number of uses of the operands to not increase
5643 // the instruction count for all transforms.
5644
5645 // Handle size-changing casts (or sign_extend_inreg).
5646 SDValue X = N0.getOperand(0);
5647 SDValue Y = N1.getOperand(0);
5648 EVT XVT = X.getValueType();
5649 SDLoc DL(N);
5650 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5651 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5652 N0.getOperand(1) == N1.getOperand(1))) {
5653 // If both operands have other uses, this transform would create extra
5654 // instructions without eliminating anything.
5655 if (!N0.hasOneUse() && !N1.hasOneUse())
5656 return SDValue();
5657 // We need matching integer source types.
5658 if (XVT != Y.getValueType())
5659 return SDValue();
5660 // Don't create an illegal op during or after legalization. Don't ever
5661 // create an unsupported vector op.
5662 if ((VT.isVector() || LegalOperations) &&
5663 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5664 return SDValue();
5665 // Avoid infinite looping with PromoteIntBinOp.
5666 // TODO: Should we apply desirable/legal constraints to all opcodes?
5667 if ((HandOpcode == ISD::ANY_EXTEND ||
5668 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5669 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5670 return SDValue();
5671 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5672 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5673 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5674 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5675 return DAG.getNode(HandOpcode, DL, VT, Logic);
5676 }
5677
5678 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5679 if (HandOpcode == ISD::TRUNCATE) {
5680 // If both operands have other uses, this transform would create extra
5681 // instructions without eliminating anything.
5682 if (!N0.hasOneUse() && !N1.hasOneUse())
5683 return SDValue();
5684 // We need matching source types.
5685 if (XVT != Y.getValueType())
5686 return SDValue();
5687 // Don't create an illegal op during or after legalization.
5688 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5689 return SDValue();
5690 // Be extra careful sinking truncate. If it's free, there's no benefit in
5691 // widening a binop. Also, don't create a logic op on an illegal type.
5692 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5693 return SDValue();
5694 if (!TLI.isTypeLegal(XVT))
5695 return SDValue();
5696 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5697 return DAG.getNode(HandOpcode, DL, VT, Logic);
5698 }
5699
5700 // For binops SHL/SRL/SRA/AND:
5701 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5702 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5703 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5704 N0.getOperand(1) == N1.getOperand(1)) {
5705 // If either operand has other uses, this transform is not an improvement.
5706 if (!N0.hasOneUse() || !N1.hasOneUse())
5707 return SDValue();
5708 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5709 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5710 }
5711
5712 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5713 if (HandOpcode == ISD::BSWAP) {
5714 // If either operand has other uses, this transform is not an improvement.
5715 if (!N0.hasOneUse() || !N1.hasOneUse())
5716 return SDValue();
5717 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5718 return DAG.getNode(HandOpcode, DL, VT, Logic);
5719 }
5720
5721 // For funnel shifts FSHL/FSHR:
5722 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5723 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5724 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5725 N0.getOperand(2) == N1.getOperand(2)) {
5726 if (!N0.hasOneUse() || !N1.hasOneUse())
5727 return SDValue();
5728 SDValue X1 = N0.getOperand(1);
5729 SDValue Y1 = N1.getOperand(1);
5730 SDValue S = N0.getOperand(2);
5731 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5732 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5733 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5734 }
5735
5736 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5737 // Only perform this optimization up until type legalization, before
5738 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5739 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5740 // we don't want to undo this promotion.
5741 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5742 // on scalars.
5743 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5744 Level <= AfterLegalizeTypes) {
5745 // Input types must be integer and the same.
5746 if (XVT.isInteger() && XVT == Y.getValueType() &&
5747 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5748 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5749 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5750 return DAG.getNode(HandOpcode, DL, VT, Logic);
5751 }
5752 }
5753
5754 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5755 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5756 // If both shuffles use the same mask, and both shuffle within a single
5757 // vector, then it is worthwhile to move the swizzle after the operation.
5758 // The type-legalizer generates this pattern when loading illegal
5759 // vector types from memory. In many cases this allows additional shuffle
5760 // optimizations.
5761 // There are other cases where moving the shuffle after the xor/and/or
5762 // is profitable even if shuffles don't perform a swizzle.
5763 // If both shuffles use the same mask, and both shuffles have the same first
5764 // or second operand, then it might still be profitable to move the shuffle
5765 // after the xor/and/or operation.
5766 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5767 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5768 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5769 assert(X.getValueType() == Y.getValueType() &&
5770 "Inputs to shuffles are not the same type");
5771
5772 // Check that both shuffles use the same mask. The masks are known to be of
5773 // the same length because the result vector type is the same.
5774 // Check also that shuffles have only one use to avoid introducing extra
5775 // instructions.
5776 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5777 !SVN0->getMask().equals(SVN1->getMask()))
5778 return SDValue();
5779
5780 // Don't try to fold this node if it requires introducing a
5781 // build vector of all zeros that might be illegal at this stage.
5782 SDValue ShOp = N0.getOperand(1);
5783 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5784 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5785
5786 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5787 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5788 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5789 N0.getOperand(0), N1.getOperand(0));
5790 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5791 }
5792
5793 // Don't try to fold this node if it requires introducing a
5794 // build vector of all zeros that might be illegal at this stage.
5795 ShOp = N0.getOperand(0);
5796 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5797 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5798
5799 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5800 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5801 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5802 N1.getOperand(1));
5803 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5804 }
5805 }
5806
5807 return SDValue();
5808}
5809
5810/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5811SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5812 const SDLoc &DL) {
5813 SDValue LL, LR, RL, RR, N0CC, N1CC;
5814 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5815 !isSetCCEquivalent(N1, RL, RR, N1CC))
5816 return SDValue();
5817
5818 assert(N0.getValueType() == N1.getValueType() &&
5819 "Unexpected operand types for bitwise logic op");
5820 assert(LL.getValueType() == LR.getValueType() &&
5821 RL.getValueType() == RR.getValueType() &&
5822 "Unexpected operand types for setcc");
5823
5824 // If we're here post-legalization or the logic op type is not i1, the logic
5825 // op type must match a setcc result type. Also, all folds require new
5826 // operations on the left and right operands, so those types must match.
5827 EVT VT = N0.getValueType();
5828 EVT OpVT = LL.getValueType();
5829 if (LegalOperations || VT.getScalarType() != MVT::i1)
5830 if (VT != getSetCCResultType(OpVT))
5831 return SDValue();
5832 if (OpVT != RL.getValueType())
5833 return SDValue();
5834
5835 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5836 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5837 bool IsInteger = OpVT.isInteger();
5838 if (LR == RR && CC0 == CC1 && IsInteger) {
5839 bool IsZero = isNullOrNullSplat(LR);
5840 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5841
5842 // All bits clear?
5843 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5844 // All sign bits clear?
5845 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5846 // Any bits set?
5847 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5848 // Any sign bits set?
5849 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5850
5851 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5852 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5853 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5854 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5855 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5856 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5857 AddToWorklist(Or.getNode());
5858 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5859 }
5860
5861 // All bits set?
5862 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5863 // All sign bits set?
5864 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5865 // Any bits clear?
5866 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5867 // Any sign bits clear?
5868 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5869
5870 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5871 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5872 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5873 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5874 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5875 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5876 AddToWorklist(And.getNode());
5877 return DAG.getSetCC(DL, VT, And, LR, CC1);
5878 }
5879 }
5880
5881 // TODO: What is the 'or' equivalent of this fold?
5882 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5883 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5884 IsInteger && CC0 == ISD::SETNE &&
5885 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5886 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5887 SDValue One = DAG.getConstant(1, DL, OpVT);
5888 SDValue Two = DAG.getConstant(2, DL, OpVT);
5889 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5890 AddToWorklist(Add.getNode());
5891 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5892 }
5893
5894 // Try more general transforms if the predicates match and the only user of
5895 // the compares is the 'and' or 'or'.
5896 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5897 N0.hasOneUse() && N1.hasOneUse()) {
5898 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5899 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5900 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5901 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5902 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5903 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5904 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5905 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5906 }
5907
5908 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5909 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5910 // Match a shared variable operand and 2 non-opaque constant operands.
5911 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5912 // The difference of the constants must be a single bit.
5913 const APInt &CMax =
5914 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5915 const APInt &CMin =
5916 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5917 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5918 };
5919 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5920 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5921 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5922 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5923 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5924 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5925 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5926 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5927 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5928 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5929 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5930 }
5931 }
5932 }
5933
5934 // Canonicalize equivalent operands to LL == RL.
5935 if (LL == RR && LR == RL) {
5937 std::swap(RL, RR);
5938 }
5939
5940 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5941 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5942 if (LL == RL && LR == RR) {
5943 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5944 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5945 if (NewCC != ISD::SETCC_INVALID &&
5946 (!LegalOperations ||
5947 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5948 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5949 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5950 }
5951
5952 return SDValue();
5953}
5954
5955static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
5956 SelectionDAG &DAG) {
5957 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
5958}
5959
5960static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
5961 SelectionDAG &DAG) {
5962 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
5963}
5964
5965static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
5966 ISD::CondCode CC, unsigned OrAndOpcode,
5967 SelectionDAG &DAG,
5968 bool isFMAXNUMFMINNUM_IEEE,
5969 bool isFMAXNUMFMINNUM) {
5970 // The optimization cannot be applied for all the predicates because
5971 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
5972 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
5973 // applied at all if one of the operands is a signaling NaN.
5974
5975 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
5976 // are non NaN values.
5977 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
5978 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
5979 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5980 isFMAXNUMFMINNUM_IEEE
5983 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
5984 (OrAndOpcode == ISD::OR)) ||
5985 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
5986 (OrAndOpcode == ISD::AND)))
5987 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5988 isFMAXNUMFMINNUM_IEEE
5991 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
5992 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
5993 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
5994 // that there are not any sNaNs, then the optimization is not valid
5995 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
5996 // the optimization using FMINNUM/FMAXNUM for the following cases. If
5997 // we can prove that we do not have any sNaNs, then we can do the
5998 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
5999 // cases.
6000 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6001 (OrAndOpcode == ISD::OR)) ||
6002 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6003 (OrAndOpcode == ISD::AND)))
6004 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6005 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6006 isFMAXNUMFMINNUM_IEEE
6009 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6010 (OrAndOpcode == ISD::OR)) ||
6011 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6012 (OrAndOpcode == ISD::AND)))
6013 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6014 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6015 isFMAXNUMFMINNUM_IEEE
6018 return ISD::DELETED_NODE;
6019}
6020
6023 assert(
6024 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6025 "Invalid Op to combine SETCC with");
6026
6027 // TODO: Search past casts/truncates.
6028 SDValue LHS = LogicOp->getOperand(0);
6029 SDValue RHS = LogicOp->getOperand(1);
6030 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6031 !LHS->hasOneUse() || !RHS->hasOneUse())
6032 return SDValue();
6033
6034 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6036 LogicOp, LHS.getNode(), RHS.getNode());
6037
6038 SDValue LHS0 = LHS->getOperand(0);
6039 SDValue RHS0 = RHS->getOperand(0);
6040 SDValue LHS1 = LHS->getOperand(1);
6041 SDValue RHS1 = RHS->getOperand(1);
6042 // TODO: We don't actually need a splat here, for vectors we just need the
6043 // invariants to hold for each element.
6044 auto *LHS1C = isConstOrConstSplat(LHS1);
6045 auto *RHS1C = isConstOrConstSplat(RHS1);
6046 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6047 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6048 EVT VT = LogicOp->getValueType(0);
6049 EVT OpVT = LHS0.getValueType();
6050 SDLoc DL(LogicOp);
6051
6052 // Check if the operands of an and/or operation are comparisons and if they
6053 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6054 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6055 // sequence will be replaced with min-cmp sequence:
6056 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6057 // and and-cmp-cmp will be replaced with max-cmp sequence:
6058 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6059 // The optimization does not work for `==` or `!=` .
6060 // The two comparisons should have either the same predicate or the
6061 // predicate of one of the comparisons is the opposite of the other one.
6062 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6064 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6066 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6067 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6068 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6069 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6070 (OpVT.isFloatingPoint() &&
6071 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6073 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6074 CCL != ISD::SETTRUE &&
6075 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6076
6077 SDValue CommonValue, Operand1, Operand2;
6079 if (CCL == CCR) {
6080 if (LHS0 == RHS0) {
6081 CommonValue = LHS0;
6082 Operand1 = LHS1;
6083 Operand2 = RHS1;
6085 } else if (LHS1 == RHS1) {
6086 CommonValue = LHS1;
6087 Operand1 = LHS0;
6088 Operand2 = RHS0;
6089 CC = CCL;
6090 }
6091 } else {
6092 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6093 if (LHS0 == RHS1) {
6094 CommonValue = LHS0;
6095 Operand1 = LHS1;
6096 Operand2 = RHS0;
6097 CC = CCR;
6098 } else if (RHS0 == LHS1) {
6099 CommonValue = LHS1;
6100 Operand1 = LHS0;
6101 Operand2 = RHS1;
6102 CC = CCL;
6103 }
6104 }
6105
6106 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6107 // handle it using OR/AND.
6108 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6110 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6112
6113 if (CC != ISD::SETCC_INVALID) {
6114 unsigned NewOpcode = ISD::DELETED_NODE;
6115 bool IsSigned = isSignedIntSetCC(CC);
6116 if (OpVT.isInteger()) {
6117 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6118 CC == ISD::SETLT || CC == ISD::SETULT);
6119 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6120 if (IsLess == IsOr)
6121 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6122 else
6123 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6124 } else if (OpVT.isFloatingPoint())
6125 NewOpcode =
6126 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6127 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6128
6129 if (NewOpcode != ISD::DELETED_NODE) {
6130 SDValue MinMaxValue =
6131 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6132 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6133 }
6134 }
6135 }
6136
6137 if (TargetPreference == AndOrSETCCFoldKind::None)
6138 return SDValue();
6139
6140 if (CCL == CCR &&
6141 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6142 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6143 const APInt &APLhs = LHS1C->getAPIntValue();
6144 const APInt &APRhs = RHS1C->getAPIntValue();
6145
6146 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6147 // case this is just a compare).
6148 if (APLhs == (-APRhs) &&
6149 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6150 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6151 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6152 // (icmp eq A, C) | (icmp eq A, -C)
6153 // -> (icmp eq Abs(A), C)
6154 // (icmp ne A, C) & (icmp ne A, -C)
6155 // -> (icmp ne Abs(A), C)
6156 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6157 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6158 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6159 } else if (TargetPreference &
6161
6162 // AndOrSETCCFoldKind::AddAnd:
6163 // A == C0 | A == C1
6164 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6165 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6166 // A != C0 & A != C1
6167 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6168 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6169
6170 // AndOrSETCCFoldKind::NotAnd:
6171 // A == C0 | A == C1
6172 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6173 // -> ~A & smin(C0, C1) == 0
6174 // A != C0 & A != C1
6175 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6176 // -> ~A & smin(C0, C1) != 0
6177
6178 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6179 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6180 APInt Dif = MaxC - MinC;
6181 if (!Dif.isZero() && Dif.isPowerOf2()) {
6182 if (MaxC.isAllOnes() &&
6183 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6184 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6185 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6186 DAG.getConstant(MinC, DL, OpVT));
6187 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6188 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6189 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6190
6191 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6192 DAG.getConstant(-MinC, DL, OpVT));
6193 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6194 DAG.getConstant(~Dif, DL, OpVT));
6195 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6196 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6197 }
6198 }
6199 }
6200 }
6201
6202 return SDValue();
6203}
6204
6205// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6206// We canonicalize to the `select` form in the middle end, but the `and` form
6207// gets better codegen and all tested targets (arm, x86, riscv)
6209 const SDLoc &DL, SelectionDAG &DAG) {
6210 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6211 if (!isNullConstant(F))
6212 return SDValue();
6213
6214 EVT CondVT = Cond.getValueType();
6215 if (TLI.getBooleanContents(CondVT) !=
6217 return SDValue();
6218
6219 if (T.getOpcode() != ISD::AND)
6220 return SDValue();
6221
6222 if (!isOneConstant(T.getOperand(1)))
6223 return SDValue();
6224
6225 EVT OpVT = T.getValueType();
6226
6227 SDValue CondMask =
6228 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6229 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6230}
6231
6232/// This contains all DAGCombine rules which reduce two values combined by
6233/// an And operation to a single value. This makes them reusable in the context
6234/// of visitSELECT(). Rules involving constants are not included as
6235/// visitSELECT() already handles those cases.
6236SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6237 EVT VT = N1.getValueType();
6238 SDLoc DL(N);
6239
6240 // fold (and x, undef) -> 0
6241 if (N0.isUndef() || N1.isUndef())
6242 return DAG.getConstant(0, DL, VT);
6243
6244 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6245 return V;
6246
6247 // Canonicalize:
6248 // and(x, add) -> and(add, x)
6249 if (N1.getOpcode() == ISD::ADD)
6250 std::swap(N0, N1);
6251
6252 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6253 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6254 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6255 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6256 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6257 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6258 // immediate for an add, but it is legal if its top c2 bits are set,
6259 // transform the ADD so the immediate doesn't need to be materialized
6260 // in a register.
6261 APInt ADDC = ADDI->getAPIntValue();
6262 APInt SRLC = SRLI->getAPIntValue();
6263 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6264 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6266 SRLC.getZExtValue());
6267 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6268 ADDC |= Mask;
6269 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6270 SDLoc DL0(N0);
6271 SDValue NewAdd =
6272 DAG.getNode(ISD::ADD, DL0, VT,
6273 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6274 CombineTo(N0.getNode(), NewAdd);
6275 // Return N so it doesn't get rechecked!
6276 return SDValue(N, 0);
6277 }
6278 }
6279 }
6280 }
6281 }
6282 }
6283
6284 return SDValue();
6285}
6286
6287bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6288 EVT LoadResultTy, EVT &ExtVT) {
6289 if (!AndC->getAPIntValue().isMask())
6290 return false;
6291
6292 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6293
6294 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6295 EVT LoadedVT = LoadN->getMemoryVT();
6296
6297 if (ExtVT == LoadedVT &&
6298 (!LegalOperations ||
6299 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6300 // ZEXTLOAD will match without needing to change the size of the value being
6301 // loaded.
6302 return true;
6303 }
6304
6305 // Do not change the width of a volatile or atomic loads.
6306 if (!LoadN->isSimple())
6307 return false;
6308
6309 // Do not generate loads of non-round integer types since these can
6310 // be expensive (and would be wrong if the type is not byte sized).
6311 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6312 return false;
6313
6314 if (LegalOperations &&
6315 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6316 return false;
6317
6318 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6319 return false;
6320
6321 return true;
6322}
6323
6324bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6325 ISD::LoadExtType ExtType, EVT &MemVT,
6326 unsigned ShAmt) {
6327 if (!LDST)
6328 return false;
6329 // Only allow byte offsets.
6330 if (ShAmt % 8)
6331 return false;
6332
6333 // Do not generate loads of non-round integer types since these can
6334 // be expensive (and would be wrong if the type is not byte sized).
6335 if (!MemVT.isRound())
6336 return false;
6337
6338 // Don't change the width of a volatile or atomic loads.
6339 if (!LDST->isSimple())
6340 return false;
6341
6342 EVT LdStMemVT = LDST->getMemoryVT();
6343
6344 // Bail out when changing the scalable property, since we can't be sure that
6345 // we're actually narrowing here.
6346 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6347 return false;
6348
6349 // Verify that we are actually reducing a load width here.
6350 if (LdStMemVT.bitsLT(MemVT))
6351 return false;
6352
6353 // Ensure that this isn't going to produce an unsupported memory access.
6354 if (ShAmt) {
6355 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6356 const unsigned ByteShAmt = ShAmt / 8;
6357 const Align LDSTAlign = LDST->getAlign();
6358 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6359 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6360 LDST->getAddressSpace(), NarrowAlign,
6361 LDST->getMemOperand()->getFlags()))
6362 return false;
6363 }
6364
6365 // It's not possible to generate a constant of extended or untyped type.
6366 EVT PtrType = LDST->getBasePtr().getValueType();
6367 if (PtrType == MVT::Untyped || PtrType.isExtended())
6368 return false;
6369
6370 if (isa<LoadSDNode>(LDST)) {
6371 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6372 // Don't transform one with multiple uses, this would require adding a new
6373 // load.
6374 if (!SDValue(Load, 0).hasOneUse())
6375 return false;
6376
6377 if (LegalOperations &&
6378 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6379 return false;
6380
6381 // For the transform to be legal, the load must produce only two values
6382 // (the value loaded and the chain). Don't transform a pre-increment
6383 // load, for example, which produces an extra value. Otherwise the
6384 // transformation is not equivalent, and the downstream logic to replace
6385 // uses gets things wrong.
6386 if (Load->getNumValues() > 2)
6387 return false;
6388
6389 // If the load that we're shrinking is an extload and we're not just
6390 // discarding the extension we can't simply shrink the load. Bail.
6391 // TODO: It would be possible to merge the extensions in some cases.
6392 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6393 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6394 return false;
6395
6396 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6397 return false;
6398 } else {
6399 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6400 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6401 // Can't write outside the original store
6402 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6403 return false;
6404
6405 if (LegalOperations &&
6406 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6407 return false;
6408 }
6409 return true;
6410}
6411
6412bool DAGCombiner::SearchForAndLoads(SDNode *N,
6414 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6415 ConstantSDNode *Mask,
6416 SDNode *&NodeToMask) {
6417 // Recursively search for the operands, looking for loads which can be
6418 // narrowed.
6419 for (SDValue Op : N->op_values()) {
6420 if (Op.getValueType().isVector())
6421 return false;
6422
6423 // Some constants may need fixing up later if they are too large.
6424 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6425 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6426 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6427 NodesWithConsts.insert(N);
6428 continue;
6429 }
6430
6431 if (!Op.hasOneUse())
6432 return false;
6433
6434 switch(Op.getOpcode()) {
6435 case ISD::LOAD: {
6436 auto *Load = cast<LoadSDNode>(Op);
6437 EVT ExtVT;
6438 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6439 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6440
6441 // ZEXTLOAD is already small enough.
6442 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6443 ExtVT.bitsGE(Load->getMemoryVT()))
6444 continue;
6445
6446 // Use LE to convert equal sized loads to zext.
6447 if (ExtVT.bitsLE(Load->getMemoryVT()))
6448 Loads.push_back(Load);
6449
6450 continue;
6451 }
6452 return false;
6453 }
6454 case ISD::ZERO_EXTEND:
6455 case ISD::AssertZext: {
6456 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6457 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6458 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6459 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6460 Op.getOperand(0).getValueType();
6461
6462 // We can accept extending nodes if the mask is wider or an equal
6463 // width to the original type.
6464 if (ExtVT.bitsGE(VT))
6465 continue;
6466 break;
6467 }
6468 case ISD::OR:
6469 case ISD::XOR:
6470 case ISD::AND:
6471 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6472 NodeToMask))
6473 return false;
6474 continue;
6475 }
6476
6477 // Allow one node which will masked along with any loads found.
6478 if (NodeToMask)
6479 return false;
6480
6481 // Also ensure that the node to be masked only produces one data result.
6482 NodeToMask = Op.getNode();
6483 if (NodeToMask->getNumValues() > 1) {
6484 bool HasValue = false;
6485 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6486 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6487 if (VT != MVT::Glue && VT != MVT::Other) {
6488 if (HasValue) {
6489 NodeToMask = nullptr;
6490 return false;
6491 }
6492 HasValue = true;
6493 }
6494 }
6495 assert(HasValue && "Node to be masked has no data result?");
6496 }
6497 }
6498 return true;
6499}
6500
6501bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6502 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6503 if (!Mask)
6504 return false;
6505
6506 if (!Mask->getAPIntValue().isMask())
6507 return false;
6508
6509 // No need to do anything if the and directly uses a load.
6510 if (isa<LoadSDNode>(N->getOperand(0)))
6511 return false;
6512
6514 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6515 SDNode *FixupNode = nullptr;
6516 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6517 if (Loads.empty())
6518 return false;
6519
6520 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6521 SDValue MaskOp = N->getOperand(1);
6522
6523 // If it exists, fixup the single node we allow in the tree that needs
6524 // masking.
6525 if (FixupNode) {
6526 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6527 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6528 FixupNode->getValueType(0),
6529 SDValue(FixupNode, 0), MaskOp);
6530 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6531 if (And.getOpcode() == ISD ::AND)
6532 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6533 }
6534
6535 // Narrow any constants that need it.
6536 for (auto *LogicN : NodesWithConsts) {
6537 SDValue Op0 = LogicN->getOperand(0);
6538 SDValue Op1 = LogicN->getOperand(1);
6539
6540 if (isa<ConstantSDNode>(Op0))
6541 Op0 =
6542 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6543
6544 if (isa<ConstantSDNode>(Op1))
6545 Op1 =
6546 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6547
6548 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6549 std::swap(Op0, Op1);
6550
6551 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6552 }
6553
6554 // Create narrow loads.
6555 for (auto *Load : Loads) {
6556 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6557 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6558 SDValue(Load, 0), MaskOp);
6559 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6560 if (And.getOpcode() == ISD ::AND)
6561 And = SDValue(
6562 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6563 SDValue NewLoad = reduceLoadWidth(And.getNode());
6564 assert(NewLoad &&
6565 "Shouldn't be masking the load if it can't be narrowed");
6566 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6567 }
6568 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6569 return true;
6570 }
6571 return false;
6572}
6573
6574// Unfold
6575// x & (-1 'logical shift' y)
6576// To
6577// (x 'opposite logical shift' y) 'logical shift' y
6578// if it is better for performance.
6579SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6580 assert(N->getOpcode() == ISD::AND);
6581
6582 SDValue N0 = N->getOperand(0);
6583 SDValue N1 = N->getOperand(1);
6584
6585 // Do we actually prefer shifts over mask?
6587 return SDValue();
6588
6589 // Try to match (-1 '[outer] logical shift' y)
6590 unsigned OuterShift;
6591 unsigned InnerShift; // The opposite direction to the OuterShift.
6592 SDValue Y; // Shift amount.
6593 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6594 if (!M.hasOneUse())
6595 return false;
6596 OuterShift = M->getOpcode();
6597 if (OuterShift == ISD::SHL)
6598 InnerShift = ISD::SRL;
6599 else if (OuterShift == ISD::SRL)
6600 InnerShift = ISD::SHL;
6601 else
6602 return false;
6603 if (!isAllOnesConstant(M->getOperand(0)))
6604 return false;
6605 Y = M->getOperand(1);
6606 return true;
6607 };
6608
6609 SDValue X;
6610 if (matchMask(N1))
6611 X = N0;
6612 else if (matchMask(N0))
6613 X = N1;
6614 else
6615 return SDValue();
6616
6617 SDLoc DL(N);
6618 EVT VT = N->getValueType(0);
6619
6620 // tmp = x 'opposite logical shift' y
6621 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6622 // ret = tmp 'logical shift' y
6623 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6624
6625 return T1;
6626}
6627
6628/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6629/// For a target with a bit test, this is expected to become test + set and save
6630/// at least 1 instruction.
6632 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6633
6634 // Look through an optional extension.
6635 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6636 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6637 And0 = And0.getOperand(0);
6638 if (!isOneConstant(And1) || !And0.hasOneUse())
6639 return SDValue();
6640
6641 SDValue Src = And0;
6642
6643 // Attempt to find a 'not' op.
6644 // TODO: Should we favor test+set even without the 'not' op?
6645 bool FoundNot = false;
6646 if (isBitwiseNot(Src)) {
6647 FoundNot = true;
6648 Src = Src.getOperand(0);
6649
6650 // Look though an optional truncation. The source operand may not be the
6651 // same type as the original 'and', but that is ok because we are masking
6652 // off everything but the low bit.
6653 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6654 Src = Src.getOperand(0);
6655 }
6656
6657 // Match a shift-right by constant.
6658 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6659 return SDValue();
6660
6661 // This is probably not worthwhile without a supported type.
6662 EVT SrcVT = Src.getValueType();
6663 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6664 if (!TLI.isTypeLegal(SrcVT))
6665 return SDValue();
6666
6667 // We might have looked through casts that make this transform invalid.
6668 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6669 SDValue ShiftAmt = Src.getOperand(1);
6670 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6671 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6672 return SDValue();
6673
6674 // Set source to shift source.
6675 Src = Src.getOperand(0);
6676
6677 // Try again to find a 'not' op.
6678 // TODO: Should we favor test+set even with two 'not' ops?
6679 if (!FoundNot) {
6680 if (!isBitwiseNot(Src))
6681 return SDValue();
6682 Src = Src.getOperand(0);
6683 }
6684
6685 if (!TLI.hasBitTest(Src, ShiftAmt))
6686 return SDValue();
6687
6688 // Turn this into a bit-test pattern using mask op + setcc:
6689 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6690 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6691 SDLoc DL(And);
6692 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6693 EVT CCVT =
6694 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6695 SDValue Mask = DAG.getConstant(
6696 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6697 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6698 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6699 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6700 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6701}
6702
6703/// For targets that support usubsat, match a bit-hack form of that operation
6704/// that ends in 'and' and convert it.
6706 EVT VT = N->getValueType(0);
6707 unsigned BitWidth = VT.getScalarSizeInBits();
6708 APInt SignMask = APInt::getSignMask(BitWidth);
6709
6710 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6711 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6712 // xor/add with SMIN (signmask) are logically equivalent.
6713 SDValue X;
6714 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6716 m_SpecificInt(BitWidth - 1))))) &&
6719 m_SpecificInt(BitWidth - 1))))))
6720 return SDValue();
6721
6722 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6723 DAG.getConstant(SignMask, DL, VT));
6724}
6725
6726/// Given a bitwise logic operation N with a matching bitwise logic operand,
6727/// fold a pattern where 2 of the source operands are identically shifted
6728/// values. For example:
6729/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6731 SelectionDAG &DAG) {
6732 unsigned LogicOpcode = N->getOpcode();
6733 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6734 "Expected bitwise logic operation");
6735
6736 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6737 return SDValue();
6738
6739 // Match another bitwise logic op and a shift.
6740 unsigned ShiftOpcode = ShiftOp.getOpcode();
6741 if (LogicOp.getOpcode() != LogicOpcode ||
6742 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6743 ShiftOpcode == ISD::SRA))
6744 return SDValue();
6745
6746 // Match another shift op inside the first logic operand. Handle both commuted
6747 // possibilities.
6748 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6749 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6750 SDValue X1 = ShiftOp.getOperand(0);
6751 SDValue Y = ShiftOp.getOperand(1);
6752 SDValue X0, Z;
6753 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6754 LogicOp.getOperand(0).getOperand(1) == Y) {
6755 X0 = LogicOp.getOperand(0).getOperand(0);
6756 Z = LogicOp.getOperand(1);
6757 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6758 LogicOp.getOperand(1).getOperand(1) == Y) {
6759 X0 = LogicOp.getOperand(1).getOperand(0);
6760 Z = LogicOp.getOperand(0);
6761 } else {
6762 return SDValue();
6763 }
6764
6765 EVT VT = N->getValueType(0);
6766 SDLoc DL(N);
6767 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6768 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6769 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6770}
6771
6772/// Given a tree of logic operations with shape like
6773/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6774/// try to match and fold shift operations with the same shift amount.
6775/// For example:
6776/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6777/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6779 SDValue RightHand, SelectionDAG &DAG) {
6780 unsigned LogicOpcode = N->getOpcode();
6781 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6782 "Expected bitwise logic operation");
6783 if (LeftHand.getOpcode() != LogicOpcode ||
6784 RightHand.getOpcode() != LogicOpcode)
6785 return SDValue();
6786 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6787 return SDValue();
6788
6789 // Try to match one of following patterns:
6790 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6791 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6792 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6793 // itself.
6794 SDValue CombinedShifts, W;
6795 SDValue R0 = RightHand.getOperand(0);
6796 SDValue R1 = RightHand.getOperand(1);
6797 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6798 W = R1;
6799 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6800 W = R0;
6801 else
6802 return SDValue();
6803
6804 EVT VT = N->getValueType(0);
6805 SDLoc DL(N);
6806 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6807}
6808
6809SDValue DAGCombiner::visitAND(SDNode *N) {
6810 SDValue N0 = N->getOperand(0);
6811 SDValue N1 = N->getOperand(1);
6812 EVT VT = N1.getValueType();
6813 SDLoc DL(N);
6814
6815 // x & x --> x
6816 if (N0 == N1)
6817 return N0;
6818
6819 // fold (and c1, c2) -> c1&c2
6820 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6821 return C;
6822
6823 // canonicalize constant to RHS
6826 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6827
6828 if (areBitwiseNotOfEachother(N0, N1))
6829 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6830
6831 // fold vector ops
6832 if (VT.isVector()) {
6833 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6834 return FoldedVOp;
6835
6836 // fold (and x, 0) -> 0, vector edition
6838 // do not return N1, because undef node may exist in N1
6840 N1.getValueType());
6841
6842 // fold (and x, -1) -> x, vector edition
6844 return N0;
6845
6846 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6847 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6848 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6849 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6850 N1.hasOneUse()) {
6851 EVT LoadVT = MLoad->getMemoryVT();
6852 EVT ExtVT = VT;
6853 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6854 // For this AND to be a zero extension of the masked load the elements
6855 // of the BuildVec must mask the bottom bits of the extended element
6856 // type
6857 uint64_t ElementSize =
6859 if (Splat->getAPIntValue().isMask(ElementSize)) {
6860 SDValue NewLoad = DAG.getMaskedLoad(
6861 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6862 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6863 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6864 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6865 bool LoadHasOtherUsers = !N0.hasOneUse();
6866 CombineTo(N, NewLoad);
6867 if (LoadHasOtherUsers)
6868 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6869 return SDValue(N, 0);
6870 }
6871 }
6872 }
6873 }
6874
6875 // fold (and x, -1) -> x
6876 if (isAllOnesConstant(N1))
6877 return N0;
6878
6879 // if (and x, c) is known to be zero, return 0
6880 unsigned BitWidth = VT.getScalarSizeInBits();
6883 return DAG.getConstant(0, DL, VT);
6884
6885 if (SDValue R = foldAndOrOfSETCC(N, DAG))
6886 return R;
6887
6888 if (SDValue NewSel = foldBinOpIntoSelect(N))
6889 return NewSel;
6890
6891 // reassociate and
6892 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
6893 return RAND;
6894
6895 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
6896 if (SDValue SD =
6897 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
6898 return SD;
6899
6900 // fold (and (or x, C), D) -> D if (C & D) == D
6901 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6902 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6903 };
6904 if (N0.getOpcode() == ISD::OR &&
6905 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6906 return N1;
6907
6908 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6909 SDValue N0Op0 = N0.getOperand(0);
6910 EVT SrcVT = N0Op0.getValueType();
6911 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
6912 APInt Mask = ~N1C->getAPIntValue();
6913 Mask = Mask.trunc(SrcBitWidth);
6914
6915 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6916 if (DAG.MaskedValueIsZero(N0Op0, Mask))
6917 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
6918
6919 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
6920 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
6921 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
6922 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
6923 TLI.isNarrowingProfitable(VT, SrcVT))
6924 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
6925 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
6926 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
6927 }
6928
6929 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
6930 if (ISD::isExtOpcode(N0.getOpcode())) {
6931 unsigned ExtOpc = N0.getOpcode();
6932 SDValue N0Op0 = N0.getOperand(0);
6933 if (N0Op0.getOpcode() == ISD::AND &&
6934 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
6937 N0->hasOneUse() && N0Op0->hasOneUse()) {
6938 SDValue NewMask =
6939 DAG.getNode(ISD::AND, DL, VT, N1,
6940 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
6941 return DAG.getNode(ISD::AND, DL, VT,
6942 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
6943 NewMask);
6944 }
6945 }
6946
6947 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6948 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6949 // already be zero by virtue of the width of the base type of the load.
6950 //
6951 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6952 // more cases.
6953 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6955 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6956 N0.getOperand(0).getResNo() == 0) ||
6957 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6958 auto *Load =
6959 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
6960
6961 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6962 // This can be a pure constant or a vector splat, in which case we treat the
6963 // vector as a scalar and use the splat value.
6966 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
6967 Constant = C->getAPIntValue();
6968 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6969 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6970 APInt SplatValue, SplatUndef;
6971 unsigned SplatBitSize;
6972 bool HasAnyUndefs;
6973 // Endianness should not matter here. Code below makes sure that we only
6974 // use the result if the SplatBitSize is a multiple of the vector element
6975 // size. And after that we AND all element sized parts of the splat
6976 // together. So the end result should be the same regardless of in which
6977 // order we do those operations.
6978 const bool IsBigEndian = false;
6979 bool IsSplat =
6980 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
6981 HasAnyUndefs, EltBitWidth, IsBigEndian);
6982
6983 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6984 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6985 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
6986 // Undef bits can contribute to a possible optimisation if set, so
6987 // set them.
6988 SplatValue |= SplatUndef;
6989
6990 // The splat value may be something like "0x00FFFFFF", which means 0 for
6991 // the first vector value and FF for the rest, repeating. We need a mask
6992 // that will apply equally to all members of the vector, so AND all the
6993 // lanes of the constant together.
6994 Constant = APInt::getAllOnes(EltBitWidth);
6995 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6996 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6997 }
6998 }
6999
7000 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7001 // actually legal and isn't going to get expanded, else this is a false
7002 // optimisation.
7003 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7004 Load->getValueType(0),
7005 Load->getMemoryVT());
7006
7007 // Resize the constant to the same size as the original memory access before
7008 // extension. If it is still the AllOnesValue then this AND is completely
7009 // unneeded.
7010 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7011
7012 bool B;
7013 switch (Load->getExtensionType()) {
7014 default: B = false; break;
7015 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7016 case ISD::ZEXTLOAD:
7017 case ISD::NON_EXTLOAD: B = true; break;
7018 }
7019
7020 if (B && Constant.isAllOnes()) {
7021 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7022 // preserve semantics once we get rid of the AND.
7023 SDValue NewLoad(Load, 0);
7024
7025 // Fold the AND away. NewLoad may get replaced immediately.
7026 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7027
7028 if (Load->getExtensionType() == ISD::EXTLOAD) {
7029 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7030 Load->getValueType(0), SDLoc(Load),
7031 Load->getChain(), Load->getBasePtr(),
7032 Load->getOffset(), Load->getMemoryVT(),
7033 Load->getMemOperand());
7034 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7035 if (Load->getNumValues() == 3) {
7036 // PRE/POST_INC loads have 3 values.
7037 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7038 NewLoad.getValue(2) };
7039 CombineTo(Load, To, 3, true);
7040 } else {
7041 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7042 }
7043 }
7044
7045 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7046 }
7047 }
7048
7049 // Try to convert a constant mask AND into a shuffle clear mask.
7050 if (VT.isVector())
7051 if (SDValue Shuffle = XformToShuffleWithZero(N))
7052 return Shuffle;
7053
7054 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7055 return Combined;
7056
7057 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7059 SDValue Ext = N0.getOperand(0);
7060 EVT ExtVT = Ext->getValueType(0);
7061 SDValue Extendee = Ext->getOperand(0);
7062
7063 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7064 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7065 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7066 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7067 // => (extract_subvector (iN_zeroext v))
7068 SDValue ZeroExtExtendee =
7069 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7070
7071 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7072 N0.getOperand(1));
7073 }
7074 }
7075
7076 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7077 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7078 EVT MemVT = GN0->getMemoryVT();
7079 EVT ScalarVT = MemVT.getScalarType();
7080
7081 if (SDValue(GN0, 0).hasOneUse() &&
7082 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7084 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7085 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7086
7087 SDValue ZExtLoad = DAG.getMaskedGather(
7088 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7089 GN0->getIndexType(), ISD::ZEXTLOAD);
7090
7091 CombineTo(N, ZExtLoad);
7092 AddToWorklist(ZExtLoad.getNode());
7093 // Avoid recheck of N.
7094 return SDValue(N, 0);
7095 }
7096 }
7097
7098 // fold (and (load x), 255) -> (zextload x, i8)
7099 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7100 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7101 if (SDValue Res = reduceLoadWidth(N))
7102 return Res;
7103
7104 if (LegalTypes) {
7105 // Attempt to propagate the AND back up to the leaves which, if they're
7106 // loads, can be combined to narrow loads and the AND node can be removed.
7107 // Perform after legalization so that extend nodes will already be
7108 // combined into the loads.
7109 if (BackwardsPropagateMask(N))
7110 return SDValue(N, 0);
7111 }
7112
7113 if (SDValue Combined = visitANDLike(N0, N1, N))
7114 return Combined;
7115
7116 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7117 if (N0.getOpcode() == N1.getOpcode())
7118 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7119 return V;
7120
7121 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7122 return R;
7123 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7124 return R;
7125
7126 // Masking the negated extension of a boolean is just the zero-extended
7127 // boolean:
7128 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7129 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7130 //
7131 // Note: the SimplifyDemandedBits fold below can make an information-losing
7132 // transform, and then we have no way to find this better fold.
7133 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7134 if (isNullOrNullSplat(N0.getOperand(0))) {
7135 SDValue SubRHS = N0.getOperand(1);
7136 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7137 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7138 return SubRHS;
7139 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7140 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7141 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7142 }
7143 }
7144
7145 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7146 // fold (and (sra)) -> (and (srl)) when possible.
7148 return SDValue(N, 0);
7149
7150 // fold (zext_inreg (extload x)) -> (zextload x)
7151 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7152 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7153 (ISD::isEXTLoad(N0.getNode()) ||
7154 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7155 auto *LN0 = cast<LoadSDNode>(N0);
7156 EVT MemVT = LN0->getMemoryVT();
7157 // If we zero all the possible extended bits, then we can turn this into
7158 // a zextload if we are running before legalize or the operation is legal.
7159 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7160 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7161 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7162 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7163 ((!LegalOperations && LN0->isSimple()) ||
7164 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7165 SDValue ExtLoad =
7166 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7167 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7168 AddToWorklist(N);
7169 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7170 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7171 }
7172 }
7173
7174 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7175 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7176 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7177 N0.getOperand(1), false))
7178 return BSwap;
7179 }
7180
7181 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7182 return Shifts;
7183
7184 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7185 return V;
7186
7187 // Recognize the following pattern:
7188 //
7189 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7190 //
7191 // where bitmask is a mask that clears the upper bits of AndVT. The
7192 // number of bits in bitmask must be a power of two.
7193 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7194 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7195 return false;
7196
7197 auto *C = dyn_cast<ConstantSDNode>(RHS);
7198 if (!C)
7199 return false;
7200
7201 if (!C->getAPIntValue().isMask(
7202 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7203 return false;
7204
7205 return true;
7206 };
7207
7208 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7209 if (IsAndZeroExtMask(N0, N1))
7210 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7211
7212 if (hasOperation(ISD::USUBSAT, VT))
7213 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7214 return V;
7215
7216 // Postpone until legalization completed to avoid interference with bswap
7217 // folding
7218 if (LegalOperations || VT.isVector())
7219 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7220 return R;
7221
7222 return SDValue();
7223}
7224
7225/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7226SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7227 bool DemandHighBits) {
7228 if (!LegalOperations)
7229 return SDValue();
7230
7231 EVT VT = N->getValueType(0);
7232 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7233 return SDValue();
7235 return SDValue();
7236
7237 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7238 bool LookPassAnd0 = false;
7239 bool LookPassAnd1 = false;
7240 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7241 std::swap(N0, N1);
7242 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7243 std::swap(N0, N1);
7244 if (N0.getOpcode() == ISD::AND) {
7245 if (!N0->hasOneUse())
7246 return SDValue();
7247 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7248 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7249 // This is needed for X86.
7250 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7251 N01C->getZExtValue() != 0xFFFF))
7252 return SDValue();
7253 N0 = N0.getOperand(0);
7254 LookPassAnd0 = true;
7255 }
7256
7257 if (N1.getOpcode() == ISD::AND) {
7258 if (!N1->hasOneUse())
7259 return SDValue();
7260 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7261 if (!N11C || N11C->getZExtValue() != 0xFF)
7262 return SDValue();
7263 N1 = N1.getOperand(0);
7264 LookPassAnd1 = true;
7265 }
7266
7267 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7268 std::swap(N0, N1);
7269 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7270 return SDValue();
7271 if (!N0->hasOneUse() || !N1->hasOneUse())
7272 return SDValue();
7273
7274 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7275 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7276 if (!N01C || !N11C)
7277 return SDValue();
7278 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7279 return SDValue();
7280
7281 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7282 SDValue N00 = N0->getOperand(0);
7283 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7284 if (!N00->hasOneUse())
7285 return SDValue();
7286 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7287 if (!N001C || N001C->getZExtValue() != 0xFF)
7288 return SDValue();
7289 N00 = N00.getOperand(0);
7290 LookPassAnd0 = true;
7291 }
7292
7293 SDValue N10 = N1->getOperand(0);
7294 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7295 if (!N10->hasOneUse())
7296 return SDValue();
7297 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7298 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7299 // for X86.
7300 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7301 N101C->getZExtValue() != 0xFFFF))
7302 return SDValue();
7303 N10 = N10.getOperand(0);
7304 LookPassAnd1 = true;
7305 }
7306
7307 if (N00 != N10)
7308 return SDValue();
7309
7310 // Make sure everything beyond the low halfword gets set to zero since the SRL
7311 // 16 will clear the top bits.
7312 unsigned OpSizeInBits = VT.getSizeInBits();
7313 if (OpSizeInBits > 16) {
7314 // If the left-shift isn't masked out then the only way this is a bswap is
7315 // if all bits beyond the low 8 are 0. In that case the entire pattern
7316 // reduces to a left shift anyway: leave it for other parts of the combiner.
7317 if (DemandHighBits && !LookPassAnd0)
7318 return SDValue();
7319
7320 // However, if the right shift isn't masked out then it might be because
7321 // it's not needed. See if we can spot that too. If the high bits aren't
7322 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7323 // upper bits to be zero.
7324 if (!LookPassAnd1) {
7325 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7326 if (!DAG.MaskedValueIsZero(N10,
7327 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7328 return SDValue();
7329 }
7330 }
7331
7332 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7333 if (OpSizeInBits > 16) {
7334 SDLoc DL(N);
7335 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7336 DAG.getConstant(OpSizeInBits - 16, DL,
7337 getShiftAmountTy(VT)));
7338 }
7339 return Res;
7340}
7341
7342/// Return true if the specified node is an element that makes up a 32-bit
7343/// packed halfword byteswap.
7344/// ((x & 0x000000ff) << 8) |
7345/// ((x & 0x0000ff00) >> 8) |
7346/// ((x & 0x00ff0000) << 8) |
7347/// ((x & 0xff000000) >> 8)
7349 if (!N->hasOneUse())
7350 return false;
7351
7352 unsigned Opc = N.getOpcode();
7353 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7354 return false;
7355
7356 SDValue N0 = N.getOperand(0);
7357 unsigned Opc0 = N0.getOpcode();
7358 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7359 return false;
7360
7361 ConstantSDNode *N1C = nullptr;
7362 // SHL or SRL: look upstream for AND mask operand
7363 if (Opc == ISD::AND)
7364 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7365 else if (Opc0 == ISD::AND)
7366 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7367 if (!N1C)
7368 return false;
7369
7370 unsigned MaskByteOffset;
7371 switch (N1C->getZExtValue()) {
7372 default:
7373 return false;
7374 case 0xFF: MaskByteOffset = 0; break;
7375 case 0xFF00: MaskByteOffset = 1; break;
7376 case 0xFFFF:
7377 // In case demanded bits didn't clear the bits that will be shifted out.
7378 // This is needed for X86.
7379 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7380 MaskByteOffset = 1;
7381 break;
7382 }
7383 return false;
7384 case 0xFF0000: MaskByteOffset = 2; break;
7385 case 0xFF000000: MaskByteOffset = 3; break;
7386 }
7387
7388 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7389 if (Opc == ISD::AND) {
7390 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7391 // (x >> 8) & 0xff
7392 // (x >> 8) & 0xff0000
7393 if (Opc0 != ISD::SRL)
7394 return false;
7395 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7396 if (!C || C->getZExtValue() != 8)
7397 return false;
7398 } else {
7399 // (x << 8) & 0xff00
7400 // (x << 8) & 0xff000000
7401 if (Opc0 != ISD::SHL)
7402 return false;
7403 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7404 if (!C || C->getZExtValue() != 8)
7405 return false;
7406 }
7407 } else if (Opc == ISD::SHL) {
7408 // (x & 0xff) << 8
7409 // (x & 0xff0000) << 8
7410 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7411 return false;
7412 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7413 if (!C || C->getZExtValue() != 8)
7414 return false;
7415 } else { // Opc == ISD::SRL
7416 // (x & 0xff00) >> 8
7417 // (x & 0xff000000) >> 8
7418 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7419 return false;
7420 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7421 if (!C || C->getZExtValue() != 8)
7422 return false;
7423 }
7424
7425 if (Parts[MaskByteOffset])
7426 return false;
7427
7428 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7429 return true;
7430}
7431
7432// Match 2 elements of a packed halfword bswap.
7434 if (N.getOpcode() == ISD::OR)
7435 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7436 isBSwapHWordElement(N.getOperand(1), Parts);
7437
7438 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7439 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7440 if (!C || C->getAPIntValue() != 16)
7441 return false;
7442 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7443 return true;
7444 }
7445
7446 return false;
7447}
7448
7449// Match this pattern:
7450// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7451// And rewrite this to:
7452// (rotr (bswap A), 16)
7454 SelectionDAG &DAG, SDNode *N, SDValue N0,
7455 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7456 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7457 "MatchBSwapHWordOrAndAnd: expecting i32");
7458 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7459 return SDValue();
7460 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7461 return SDValue();
7462 // TODO: this is too restrictive; lifting this restriction requires more tests
7463 if (!N0->hasOneUse() || !N1->hasOneUse())
7464 return SDValue();
7467 if (!Mask0 || !Mask1)
7468 return SDValue();
7469 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7470 Mask1->getAPIntValue() != 0x00ff00ff)
7471 return SDValue();
7472 SDValue Shift0 = N0.getOperand(0);
7473 SDValue Shift1 = N1.getOperand(0);
7474 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7475 return SDValue();
7476 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7477 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7478 if (!ShiftAmt0 || !ShiftAmt1)
7479 return SDValue();
7480 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7481 return SDValue();
7482 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7483 return SDValue();
7484
7485 SDLoc DL(N);
7486 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7487 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7488 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7489}
7490
7491/// Match a 32-bit packed halfword bswap. That is
7492/// ((x & 0x000000ff) << 8) |
7493/// ((x & 0x0000ff00) >> 8) |
7494/// ((x & 0x00ff0000) << 8) |
7495/// ((x & 0xff000000) >> 8)
7496/// => (rotl (bswap x), 16)
7497SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7498 if (!LegalOperations)
7499 return SDValue();
7500
7501 EVT VT = N->getValueType(0);
7502 if (VT != MVT::i32)
7503 return SDValue();
7505 return SDValue();
7506
7507 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7508 getShiftAmountTy(VT)))
7509 return BSwap;
7510
7511 // Try again with commuted operands.
7512 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7513 getShiftAmountTy(VT)))
7514 return BSwap;
7515
7516
7517 // Look for either
7518 // (or (bswaphpair), (bswaphpair))
7519 // (or (or (bswaphpair), (and)), (and))
7520 // (or (or (and), (bswaphpair)), (and))
7521 SDNode *Parts[4] = {};
7522
7523 if (isBSwapHWordPair(N0, Parts)) {
7524 // (or (or (and), (and)), (or (and), (and)))
7525 if (!isBSwapHWordPair(N1, Parts))
7526 return SDValue();
7527 } else if (N0.getOpcode() == ISD::OR) {
7528 // (or (or (or (and), (and)), (and)), (and))
7529 if (!isBSwapHWordElement(N1, Parts))
7530 return SDValue();
7531 SDValue N00 = N0.getOperand(0);
7532 SDValue N01 = N0.getOperand(1);
7533 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7534 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7535 return SDValue();
7536 } else {
7537 return SDValue();
7538 }
7539
7540 // Make sure the parts are all coming from the same node.
7541 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7542 return SDValue();
7543
7544 SDLoc DL(N);
7545 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7546 SDValue(Parts[0], 0));
7547
7548 // Result of the bswap should be rotated by 16. If it's not legal, then
7549 // do (x << 16) | (x >> 16).
7550 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7552 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7554 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7555 return DAG.getNode(ISD::OR, DL, VT,
7556 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7557 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7558}
7559
7560/// This contains all DAGCombine rules which reduce two values combined by
7561/// an Or operation to a single value \see visitANDLike().
7562SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
7563 EVT VT = N1.getValueType();
7564 SDLoc DL(N);
7565
7566 // fold (or x, undef) -> -1
7567 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7568 return DAG.getAllOnesConstant(DL, VT);
7569
7570 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7571 return V;
7572
7573 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7574 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7575 // Don't increase # computations.
7576 (N0->hasOneUse() || N1->hasOneUse())) {
7577 // We can only do this xform if we know that bits from X that are set in C2
7578 // but not in C1 are already zero. Likewise for Y.
7579 if (const ConstantSDNode *N0O1C =
7581 if (const ConstantSDNode *N1O1C =
7583 // We can only do this xform if we know that bits from X that are set in
7584 // C2 but not in C1 are already zero. Likewise for Y.
7585 const APInt &LHSMask = N0O1C->getAPIntValue();
7586 const APInt &RHSMask = N1O1C->getAPIntValue();
7587
7588 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7589 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7590 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7591 N0.getOperand(0), N1.getOperand(0));
7592 return DAG.getNode(ISD::AND, DL, VT, X,
7593 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7594 }
7595 }
7596 }
7597 }
7598
7599 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7600 if (N0.getOpcode() == ISD::AND &&
7601 N1.getOpcode() == ISD::AND &&
7602 N0.getOperand(0) == N1.getOperand(0) &&
7603 // Don't increase # computations.
7604 (N0->hasOneUse() || N1->hasOneUse())) {
7605 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7606 N0.getOperand(1), N1.getOperand(1));
7607 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7608 }
7609
7610 return SDValue();
7611}
7612
7613/// OR combines for which the commuted variant will be tried as well.
7615 SDNode *N) {
7616 EVT VT = N0.getValueType();
7617
7618 auto peekThroughResize = [](SDValue V) {
7619 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7620 return V->getOperand(0);
7621 return V;
7622 };
7623
7624 SDValue N0Resized = peekThroughResize(N0);
7625 if (N0Resized.getOpcode() == ISD::AND) {
7626 SDValue N1Resized = peekThroughResize(N1);
7627 SDValue N00 = N0Resized.getOperand(0);
7628 SDValue N01 = N0Resized.getOperand(1);
7629
7630 // fold or (and x, y), x --> x
7631 if (N00 == N1Resized || N01 == N1Resized)
7632 return N1;
7633
7634 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7635 // TODO: Set AllowUndefs = true.
7636 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7637 /* AllowUndefs */ false)) {
7638 if (peekThroughResize(NotOperand) == N1Resized)
7639 return DAG.getNode(ISD::OR, SDLoc(N), VT,
7640 DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
7641 }
7642
7643 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7644 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7645 /* AllowUndefs */ false)) {
7646 if (peekThroughResize(NotOperand) == N1Resized)
7647 return DAG.getNode(ISD::OR, SDLoc(N), VT,
7648 DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
7649 }
7650 }
7651
7652 if (N0.getOpcode() == ISD::XOR) {
7653 // fold or (xor x, y), x --> or x, y
7654 // or (xor x, y), (x and/or y) --> or x, y
7655 SDValue N00 = N0.getOperand(0);
7656 SDValue N01 = N0.getOperand(1);
7657 if (N00 == N1)
7658 return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
7659 if (N01 == N1)
7660 return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
7661
7662 if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
7663 SDValue N10 = N1.getOperand(0);
7664 SDValue N11 = N1.getOperand(1);
7665 if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
7666 return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
7667 }
7668 }
7669
7670 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7671 return R;
7672
7673 auto peekThroughZext = [](SDValue V) {
7674 if (V->getOpcode() == ISD::ZERO_EXTEND)
7675 return V->getOperand(0);
7676 return V;
7677 };
7678
7679 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7680 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7681 N0.getOperand(0) == N1.getOperand(0) &&
7682 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7683 return N0;
7684
7685 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7686 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7687 N0.getOperand(1) == N1.getOperand(0) &&
7688 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7689 return N0;
7690
7691 return SDValue();
7692}
7693
7694SDValue DAGCombiner::visitOR(SDNode *N) {
7695 SDValue N0 = N->getOperand(0);
7696 SDValue N1 = N->getOperand(1);
7697 EVT VT = N1.getValueType();
7698
7699 // x | x --> x
7700 if (N0 == N1)
7701 return N0;
7702
7703 // fold (or c1, c2) -> c1|c2
7704 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
7705 return C;
7706
7707 // canonicalize constant to RHS
7710 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
7711
7712 // fold vector ops
7713 if (VT.isVector()) {
7714 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
7715 return FoldedVOp;
7716
7717 // fold (or x, 0) -> x, vector edition
7719 return N0;
7720
7721 // fold (or x, -1) -> -1, vector edition
7723 // do not return N1, because undef node may exist in N1
7724 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
7725
7726 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7727 // Do this only if the resulting type / shuffle is legal.
7728 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7729 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7730 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7731 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7732 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7733 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7734 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7735 // Ensure both shuffles have a zero input.
7736 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7737 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7738 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7739 bool CanFold = true;
7740 int NumElts = VT.getVectorNumElements();
7741 SmallVector<int, 4> Mask(NumElts, -1);
7742
7743 for (int i = 0; i != NumElts; ++i) {
7744 int M0 = SV0->getMaskElt(i);
7745 int M1 = SV1->getMaskElt(i);
7746
7747 // Determine if either index is pointing to a zero vector.
7748 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7749 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7750
7751 // If one element is zero and the otherside is undef, keep undef.
7752 // This also handles the case that both are undef.
7753 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7754 continue;
7755
7756 // Make sure only one of the elements is zero.
7757 if (M0Zero == M1Zero) {
7758 CanFold = false;
7759 break;
7760 }
7761
7762 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7763
7764 // We have a zero and non-zero element. If the non-zero came from
7765 // SV0 make the index a LHS index. If it came from SV1, make it
7766 // a RHS index. We need to mod by NumElts because we don't care
7767 // which operand it came from in the original shuffles.
7768 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7769 }
7770
7771 if (CanFold) {
7772 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7773 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7774
7775 SDValue LegalShuffle =
7776 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
7777 Mask, DAG);
7778 if (LegalShuffle)
7779 return LegalShuffle;
7780 }
7781 }
7782 }
7783 }
7784
7785 // fold (or x, 0) -> x
7786 if (isNullConstant(N1))
7787 return N0;
7788
7789 // fold (or x, -1) -> -1
7790 if (isAllOnesConstant(N1))
7791 return N1;
7792
7793 if (SDValue NewSel = foldBinOpIntoSelect(N))
7794 return NewSel;
7795
7796 // fold (or x, c) -> c iff (x & ~c) == 0
7797 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7798 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7799 return N1;
7800
7801 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7802 return R;
7803
7804 if (SDValue Combined = visitORLike(N0, N1, N))
7805 return Combined;
7806
7807 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7808 return Combined;
7809
7810 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7811 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7812 return BSwap;
7813 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7814 return BSwap;
7815
7816 // reassociate or
7817 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
7818 return ROR;
7819
7820 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7821 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
7822 VT, N0, N1))
7823 return SD;
7824
7825 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7826 // iff (c1 & c2) != 0 or c1/c2 are undef.
7827 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7828 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7829 };
7830 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7831 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7832 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7833 {N1, N0.getOperand(1)})) {
7834 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7835 AddToWorklist(IOR.getNode());
7836 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
7837 }
7838 }
7839
7840 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7841 return Combined;
7842 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7843 return Combined;
7844
7845 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7846 if (N0.getOpcode() == N1.getOpcode())
7847 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7848 return V;
7849
7850 // See if this is some rotate idiom.
7851 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7852 return Rot;
7853
7854 if (SDValue Load = MatchLoadCombine(N))
7855 return Load;
7856
7857 // Simplify the operands using demanded-bits information.
7859 return SDValue(N, 0);
7860
7861 // If OR can be rewritten into ADD, try combines based on ADD.
7862 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7863 DAG.isADDLike(SDValue(N, 0)))
7864 if (SDValue Combined = visitADDLike(N))
7865 return Combined;
7866
7867 // Postpone until legalization completed to avoid interference with bswap
7868 // folding
7869 if (LegalOperations || VT.isVector())
7870 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7871 return R;
7872
7873 return SDValue();
7874}
7875
7877 SDValue &Mask) {
7878 if (Op.getOpcode() == ISD::AND &&
7879 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7880 Mask = Op.getOperand(1);
7881 return Op.getOperand(0);
7882 }
7883 return Op;
7884}
7885
7886/// Match "(X shl/srl V1) & V2" where V2 may not be present.
7887static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7888 SDValue &Mask) {
7889 Op = stripConstantMask(DAG, Op, Mask);
7890 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7891 Shift = Op;
7892 return true;
7893 }
7894 return false;
7895}
7896
7897/// Helper function for visitOR to extract the needed side of a rotate idiom
7898/// from a shl/srl/mul/udiv. This is meant to handle cases where
7899/// InstCombine merged some outside op with one of the shifts from
7900/// the rotate pattern.
7901/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7902/// Otherwise, returns an expansion of \p ExtractFrom based on the following
7903/// patterns:
7904///
7905/// (or (add v v) (shrl v bitwidth-1)):
7906/// expands (add v v) -> (shl v 1)
7907///
7908/// (or (mul v c0) (shrl (mul v c1) c2)):
7909/// expands (mul v c0) -> (shl (mul v c1) c3)
7910///
7911/// (or (udiv v c0) (shl (udiv v c1) c2)):
7912/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
7913///
7914/// (or (shl v c0) (shrl (shl v c1) c2)):
7915/// expands (shl v c0) -> (shl (shl v c1) c3)
7916///
7917/// (or (shrl v c0) (shl (shrl v c1) c2)):
7918/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
7919///
7920/// Such that in all cases, c3+c2==bitwidth(op v c1).
7922 SDValue ExtractFrom, SDValue &Mask,
7923 const SDLoc &DL) {
7924 assert(OppShift && ExtractFrom && "Empty SDValue");
7925 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
7926 return SDValue();
7927
7928 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7929
7930 // Value and Type of the shift.
7931 SDValue OppShiftLHS = OppShift.getOperand(0);
7932 EVT ShiftedVT = OppShiftLHS.getValueType();
7933
7934 // Amount of the existing shift.
7935 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7936
7937 // (add v v) -> (shl v 1)
7938 // TODO: Should this be a general DAG canonicalization?
7939 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7940 ExtractFrom.getOpcode() == ISD::ADD &&
7941 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7942 ExtractFrom.getOperand(0) == OppShiftLHS &&
7943 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7944 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7945 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7946
7947 // Preconditions:
7948 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7949 //
7950 // Find opcode of the needed shift to be extracted from (op0 v c0).
7951 unsigned Opcode = ISD::DELETED_NODE;
7952 bool IsMulOrDiv = false;
7953 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7954 // opcode or its arithmetic (mul or udiv) variant.
7955 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7956 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7957 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7958 return false;
7959 Opcode = NeededShift;
7960 return true;
7961 };
7962 // op0 must be either the needed shift opcode or the mul/udiv equivalent
7963 // that the needed shift can be extracted from.
7964 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7965 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7966 return SDValue();
7967
7968 // op0 must be the same opcode on both sides, have the same LHS argument,
7969 // and produce the same value type.
7970 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7971 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7972 ShiftedVT != ExtractFrom.getValueType())
7973 return SDValue();
7974
7975 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7976 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
7977 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
7978 ConstantSDNode *ExtractFromCst =
7979 isConstOrConstSplat(ExtractFrom.getOperand(1));
7980 // TODO: We should be able to handle non-uniform constant vectors for these values
7981 // Check that we have constant values.
7982 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
7983 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
7984 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
7985 return SDValue();
7986
7987 // Compute the shift amount we need to extract to complete the rotate.
7988 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
7989 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
7990 return SDValue();
7991 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
7992 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
7993 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
7994 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
7995 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
7996
7997 // Now try extract the needed shift from the ExtractFrom op and see if the
7998 // result matches up with the existing shift's LHS op.
7999 if (IsMulOrDiv) {
8000 // Op to extract from is a mul or udiv by a constant.
8001 // Check:
8002 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8003 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8004 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8005 NeededShiftAmt.getZExtValue());
8006 APInt ResultAmt;
8007 APInt Rem;
8008 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8009 if (Rem != 0 || ResultAmt != OppLHSAmt)
8010 return SDValue();
8011 } else {
8012 // Op to extract from is a shift by a constant.
8013 // Check:
8014 // c2 - (bitwidth(op0 v c0) - c1) == c0
8015 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8016 ExtractFromAmt.getBitWidth()))
8017 return SDValue();
8018 }
8019
8020 // Return the expanded shift op that should allow a rotate to be formed.
8021 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8022 EVT ResVT = ExtractFrom.getValueType();
8023 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8024 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8025}
8026
8027// Return true if we can prove that, whenever Neg and Pos are both in the
8028// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8029// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8030//
8031// (or (shift1 X, Neg), (shift2 X, Pos))
8032//
8033// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8034// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8035// to consider shift amounts with defined behavior.
8036//
8037// The IsRotate flag should be set when the LHS of both shifts is the same.
8038// Otherwise if matching a general funnel shift, it should be clear.
8039static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8040 SelectionDAG &DAG, bool IsRotate) {
8041 const auto &TLI = DAG.getTargetLoweringInfo();
8042 // If EltSize is a power of 2 then:
8043 //
8044 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8045 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8046 //
8047 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8048 // for the stronger condition:
8049 //
8050 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8051 //
8052 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8053 // we can just replace Neg with Neg' for the rest of the function.
8054 //
8055 // In other cases we check for the even stronger condition:
8056 //
8057 // Neg == EltSize - Pos [B]
8058 //
8059 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8060 // behavior if Pos == 0 (and consequently Neg == EltSize).
8061 //
8062 // We could actually use [A] whenever EltSize is a power of 2, but the
8063 // only extra cases that it would match are those uninteresting ones
8064 // where Neg and Pos are never in range at the same time. E.g. for
8065 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8066 // as well as (sub 32, Pos), but:
8067 //
8068 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8069 //
8070 // always invokes undefined behavior for 32-bit X.
8071 //
8072 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8073 // This allows us to peek through any operations that only affect Mask's
8074 // un-demanded bits.
8075 //
8076 // NOTE: We can only do this when matching operations which won't modify the
8077 // least Log2(EltSize) significant bits and not a general funnel shift.
8078 unsigned MaskLoBits = 0;
8079 if (IsRotate && isPowerOf2_64(EltSize)) {
8080 unsigned Bits = Log2_64(EltSize);
8081 unsigned NegBits = Neg.getScalarValueSizeInBits();
8082 if (NegBits >= Bits) {
8083 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8084 if (SDValue Inner =
8086 Neg = Inner;
8087 MaskLoBits = Bits;
8088 }
8089 }
8090 }
8091
8092 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8093 if (Neg.getOpcode() != ISD::SUB)
8094 return false;
8096 if (!NegC)
8097 return false;
8098 SDValue NegOp1 = Neg.getOperand(1);
8099
8100 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8101 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8102 // are redundant for the purpose of the equality.
8103 if (MaskLoBits) {
8104 unsigned PosBits = Pos.getScalarValueSizeInBits();
8105 if (PosBits >= MaskLoBits) {
8106 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8107 if (SDValue Inner =
8109 Pos = Inner;
8110 }
8111 }
8112 }
8113
8114 // The condition we need is now:
8115 //
8116 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8117 //
8118 // If NegOp1 == Pos then we need:
8119 //
8120 // EltSize & Mask == NegC & Mask
8121 //
8122 // (because "x & Mask" is a truncation and distributes through subtraction).
8123 //
8124 // We also need to account for a potential truncation of NegOp1 if the amount
8125 // has already been legalized to a shift amount type.
8126 APInt Width;
8127 if ((Pos == NegOp1) ||
8128 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8129 Width = NegC->getAPIntValue();
8130
8131 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8132 // Then the condition we want to prove becomes:
8133 //
8134 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8135 //
8136 // which, again because "x & Mask" is a truncation, becomes:
8137 //
8138 // NegC & Mask == (EltSize - PosC) & Mask
8139 // EltSize & Mask == (NegC + PosC) & Mask
8140 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8141 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8142 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8143 else
8144 return false;
8145 } else
8146 return false;
8147
8148 // Now we just need to check that EltSize & Mask == Width & Mask.
8149 if (MaskLoBits)
8150 // EltSize & Mask is 0 since Mask is EltSize - 1.
8151 return Width.getLoBits(MaskLoBits) == 0;
8152 return Width == EltSize;
8153}
8154
8155// A subroutine of MatchRotate used once we have found an OR of two opposite
8156// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8157// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8158// former being preferred if supported. InnerPos and InnerNeg are Pos and
8159// Neg with outer conversions stripped away.
8160SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8161 SDValue Neg, SDValue InnerPos,
8162 SDValue InnerNeg, bool HasPos,
8163 unsigned PosOpcode, unsigned NegOpcode,
8164 const SDLoc &DL) {
8165 // fold (or (shl x, (*ext y)),
8166 // (srl x, (*ext (sub 32, y)))) ->
8167 // (rotl x, y) or (rotr x, (sub 32, y))
8168 //
8169 // fold (or (shl x, (*ext (sub 32, y))),
8170 // (srl x, (*ext y))) ->
8171 // (rotr x, y) or (rotl x, (sub 32, y))
8172 EVT VT = Shifted.getValueType();
8173 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8174 /*IsRotate*/ true)) {
8175 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8176 HasPos ? Pos : Neg);
8177 }
8178
8179 return SDValue();
8180}
8181
8182// A subroutine of MatchRotate used once we have found an OR of two opposite
8183// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8184// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8185// former being preferred if supported. InnerPos and InnerNeg are Pos and
8186// Neg with outer conversions stripped away.
8187// TODO: Merge with MatchRotatePosNeg.
8188SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8189 SDValue Neg, SDValue InnerPos,
8190 SDValue InnerNeg, bool HasPos,
8191 unsigned PosOpcode, unsigned NegOpcode,
8192 const SDLoc &DL) {
8193 EVT VT = N0.getValueType();
8194 unsigned EltBits = VT.getScalarSizeInBits();
8195
8196 // fold (or (shl x0, (*ext y)),
8197 // (srl x1, (*ext (sub 32, y)))) ->
8198 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8199 //
8200 // fold (or (shl x0, (*ext (sub 32, y))),
8201 // (srl x1, (*ext y))) ->
8202 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8203 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8204 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8205 HasPos ? Pos : Neg);
8206 }
8207
8208 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8209 // so for now just use the PosOpcode case if its legal.
8210 // TODO: When can we use the NegOpcode case?
8211 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8212 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8213 if (Op.getOpcode() != BinOpc)
8214 return false;
8215 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8216 return Cst && (Cst->getAPIntValue() == Imm);
8217 };
8218
8219 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8220 // -> (fshl x0, x1, y)
8221 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8222 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8223 InnerPos == InnerNeg.getOperand(0) &&
8225 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8226 }
8227
8228 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8229 // -> (fshr x0, x1, y)
8230 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8231 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8232 InnerNeg == InnerPos.getOperand(0) &&
8234 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8235 }
8236
8237 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8238 // -> (fshr x0, x1, y)
8239 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8240 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8241 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8242 InnerNeg == InnerPos.getOperand(0) &&
8244 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8245 }
8246 }
8247
8248 return SDValue();
8249}
8250
8251// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8252// idioms for rotate, and if the target supports rotation instructions, generate
8253// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8254// with different shifted sources.
8255SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8256 EVT VT = LHS.getValueType();
8257
8258 // The target must have at least one rotate/funnel flavor.
8259 // We still try to match rotate by constant pre-legalization.
8260 // TODO: Support pre-legalization funnel-shift by constant.
8261 bool HasROTL = hasOperation(ISD::ROTL, VT);
8262 bool HasROTR = hasOperation(ISD::ROTR, VT);
8263 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8264 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8265
8266 // If the type is going to be promoted and the target has enabled custom
8267 // lowering for rotate, allow matching rotate by non-constants. Only allow
8268 // this for scalar types.
8269 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8273 }
8274
8275 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8276 return SDValue();
8277
8278 // Check for truncated rotate.
8279 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8280 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8281 assert(LHS.getValueType() == RHS.getValueType());
8282 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8283 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8284 }
8285 }
8286
8287 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8288 SDValue LHSShift; // The shift.
8289 SDValue LHSMask; // AND value if any.
8290 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8291
8292 SDValue RHSShift; // The shift.
8293 SDValue RHSMask; // AND value if any.
8294 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8295
8296 // If neither side matched a rotate half, bail
8297 if (!LHSShift && !RHSShift)
8298 return SDValue();
8299
8300 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8301 // side of the rotate, so try to handle that here. In all cases we need to
8302 // pass the matched shift from the opposite side to compute the opcode and
8303 // needed shift amount to extract. We still want to do this if both sides
8304 // matched a rotate half because one half may be a potential overshift that
8305 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8306 // single one).
8307
8308 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8309 if (LHSShift)
8310 if (SDValue NewRHSShift =
8311 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8312 RHSShift = NewRHSShift;
8313 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8314 if (RHSShift)
8315 if (SDValue NewLHSShift =
8316 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8317 LHSShift = NewLHSShift;
8318
8319 // If a side is still missing, nothing else we can do.
8320 if (!RHSShift || !LHSShift)
8321 return SDValue();
8322
8323 // At this point we've matched or extracted a shift op on each side.
8324
8325 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8326 return SDValue(); // Shifts must disagree.
8327
8328 // Canonicalize shl to left side in a shl/srl pair.
8329 if (RHSShift.getOpcode() == ISD::SHL) {
8330 std::swap(LHS, RHS);
8331 std::swap(LHSShift, RHSShift);
8332 std::swap(LHSMask, RHSMask);
8333 }
8334
8335 // Something has gone wrong - we've lost the shl/srl pair - bail.
8336 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8337 return SDValue();
8338
8339 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8340 SDValue LHSShiftArg = LHSShift.getOperand(0);
8341 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8342 SDValue RHSShiftArg = RHSShift.getOperand(0);
8343 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8344
8345 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8347 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8348 };
8349
8350 auto ApplyMasks = [&](SDValue Res) {
8351 // If there is an AND of either shifted operand, apply it to the result.
8352 if (LHSMask.getNode() || RHSMask.getNode()) {
8355
8356 if (LHSMask.getNode()) {
8357 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8358 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8359 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8360 }
8361 if (RHSMask.getNode()) {
8362 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8363 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8364 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8365 }
8366
8367 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8368 }
8369
8370 return Res;
8371 };
8372
8373 // TODO: Support pre-legalization funnel-shift by constant.
8374 bool IsRotate = LHSShiftArg == RHSShiftArg;
8375 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8376 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8377 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8378 // Look for a disguised rotate by constant.
8379 // The common shifted operand X may be hidden inside another 'or'.
8380 SDValue X, Y;
8381 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8382 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8383 return false;
8384 if (CommonOp == Or.getOperand(0)) {
8385 X = CommonOp;
8386 Y = Or.getOperand(1);
8387 return true;
8388 }
8389 if (CommonOp == Or.getOperand(1)) {
8390 X = CommonOp;
8391 Y = Or.getOperand(0);
8392 return true;
8393 }
8394 return false;
8395 };
8396
8397 SDValue Res;
8398 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8399 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8400 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8401 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8402 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8403 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8404 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8405 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8406 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8407 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8408 } else {
8409 return SDValue();
8410 }
8411
8412 return ApplyMasks(Res);
8413 }
8414
8415 return SDValue(); // Requires funnel shift support.
8416 }
8417
8418 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8419 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8420 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8421 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8422 // iff C1+C2 == EltSizeInBits
8423 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8424 SDValue Res;
8425 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8426 bool UseROTL = !LegalOperations || HasROTL;
8427 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8428 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8429 } else {
8430 bool UseFSHL = !LegalOperations || HasFSHL;
8431 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8432 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8433 }
8434
8435 return ApplyMasks(Res);
8436 }
8437
8438 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8439 // shift.
8440 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8441 return SDValue();
8442
8443 // If there is a mask here, and we have a variable shift, we can't be sure
8444 // that we're masking out the right stuff.
8445 if (LHSMask.getNode() || RHSMask.getNode())
8446 return SDValue();
8447
8448 // If the shift amount is sign/zext/any-extended just peel it off.
8449 SDValue LExtOp0 = LHSShiftAmt;
8450 SDValue RExtOp0 = RHSShiftAmt;
8451 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8452 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8453 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8454 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8455 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8456 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8457 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8458 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8459 LExtOp0 = LHSShiftAmt.getOperand(0);
8460 RExtOp0 = RHSShiftAmt.getOperand(0);
8461 }
8462
8463 if (IsRotate && (HasROTL || HasROTR)) {
8464 SDValue TryL =
8465 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8466 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8467 if (TryL)
8468 return TryL;
8469
8470 SDValue TryR =
8471 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8472 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8473 if (TryR)
8474 return TryR;
8475 }
8476
8477 SDValue TryL =
8478 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8479 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8480 if (TryL)
8481 return TryL;
8482
8483 SDValue TryR =
8484 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8485 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8486 if (TryR)
8487 return TryR;
8488
8489 return SDValue();
8490}
8491
8492/// Recursively traverses the expression calculating the origin of the requested
8493/// byte of the given value. Returns std::nullopt if the provider can't be
8494/// calculated.
8495///
8496/// For all the values except the root of the expression, we verify that the
8497/// value has exactly one use and if not then return std::nullopt. This way if
8498/// the origin of the byte is returned it's guaranteed that the values which
8499/// contribute to the byte are not used outside of this expression.
8500
8501/// However, there is a special case when dealing with vector loads -- we allow
8502/// more than one use if the load is a vector type. Since the values that
8503/// contribute to the byte ultimately come from the ExtractVectorElements of the
8504/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8505/// because those operations are independent from the pattern to be combined.
8506/// For vector loads, we simply care that the ByteProviders are adjacent
8507/// positions of the same vector, and their index matches the byte that is being
8508/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8509/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8510/// byte position we are trying to provide for the LoadCombine. If these do
8511/// not match, then we can not combine the vector loads. \p Index uses the
8512/// byte position we are trying to provide for and is matched against the
8513/// shl and load size. The \p Index algorithm ensures the requested byte is
8514/// provided for by the pattern, and the pattern does not over provide bytes.
8515///
8516///
8517/// The supported LoadCombine pattern for vector loads is as follows
8518/// or
8519/// / \
8520/// or shl
8521/// / \ |
8522/// or shl zext
8523/// / \ | |
8524/// shl zext zext EVE*
8525/// | | | |
8526/// zext EVE* EVE* LOAD
8527/// | | |
8528/// EVE* LOAD LOAD
8529/// |
8530/// LOAD
8531///
8532/// *ExtractVectorElement
8534
8535static std::optional<SDByteProvider>
8537 std::optional<uint64_t> VectorIndex,
8538 unsigned StartingIndex = 0) {
8539
8540 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8541 if (Depth == 10)
8542 return std::nullopt;
8543
8544 // Only allow multiple uses if the instruction is a vector load (in which
8545 // case we will use the load for every ExtractVectorElement)
8546 if (Depth && !Op.hasOneUse() &&
8547 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8548 return std::nullopt;
8549
8550 // Fail to combine if we have encountered anything but a LOAD after handling
8551 // an ExtractVectorElement.
8552 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8553 return std::nullopt;
8554
8555 unsigned BitWidth = Op.getValueSizeInBits();
8556 if (BitWidth % 8 != 0)
8557 return std::nullopt;
8558 unsigned ByteWidth = BitWidth / 8;
8559 assert(Index < ByteWidth && "invalid index requested");
8560 (void) ByteWidth;
8561
8562 switch (Op.getOpcode()) {
8563 case ISD::OR: {
8564 auto LHS =
8565 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8566 if (!LHS)
8567 return std::nullopt;
8568 auto RHS =
8569 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8570 if (!RHS)
8571 return std::nullopt;
8572
8573 if (LHS->isConstantZero())
8574 return RHS;
8575 if (RHS->isConstantZero())
8576 return LHS;
8577 return std::nullopt;
8578 }
8579 case ISD::SHL: {
8580 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8581 if (!ShiftOp)
8582 return std::nullopt;
8583
8584 uint64_t BitShift = ShiftOp->getZExtValue();
8585
8586 if (BitShift % 8 != 0)
8587 return std::nullopt;
8588 uint64_t ByteShift = BitShift / 8;
8589
8590 // If we are shifting by an amount greater than the index we are trying to
8591 // provide, then do not provide anything. Otherwise, subtract the index by
8592 // the amount we shifted by.
8593 return Index < ByteShift
8595 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8596 Depth + 1, VectorIndex, Index);
8597 }
8598 case ISD::ANY_EXTEND:
8599 case ISD::SIGN_EXTEND:
8600 case ISD::ZERO_EXTEND: {
8601 SDValue NarrowOp = Op->getOperand(0);
8602 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8603 if (NarrowBitWidth % 8 != 0)
8604 return std::nullopt;
8605 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8606
8607 if (Index >= NarrowByteWidth)
8608 return Op.getOpcode() == ISD::ZERO_EXTEND
8609 ? std::optional<SDByteProvider>(
8611 : std::nullopt;
8612 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8613 StartingIndex);
8614 }
8615 case ISD::BSWAP:
8616 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8617 Depth + 1, VectorIndex, StartingIndex);
8619 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8620 if (!OffsetOp)
8621 return std::nullopt;
8622
8623 VectorIndex = OffsetOp->getZExtValue();
8624
8625 SDValue NarrowOp = Op->getOperand(0);
8626 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8627 if (NarrowBitWidth % 8 != 0)
8628 return std::nullopt;
8629 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8630 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8631 // type, leaving the high bits undefined.
8632 if (Index >= NarrowByteWidth)
8633 return std::nullopt;
8634
8635 // Check to see if the position of the element in the vector corresponds
8636 // with the byte we are trying to provide for. In the case of a vector of
8637 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8638 // the element will provide a range of bytes. For example, if we have a
8639 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8640 // 3).
8641 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8642 return std::nullopt;
8643 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8644 return std::nullopt;
8645
8646 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8647 VectorIndex, StartingIndex);
8648 }
8649 case ISD::LOAD: {
8650 auto L = cast<LoadSDNode>(Op.getNode());
8651 if (!L->isSimple() || L->isIndexed())
8652 return std::nullopt;
8653
8654 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8655 if (NarrowBitWidth % 8 != 0)
8656 return std::nullopt;
8657 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8658
8659 // If the width of the load does not reach byte we are trying to provide for
8660 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8661 // question
8662 if (Index >= NarrowByteWidth)
8663 return L->getExtensionType() == ISD::ZEXTLOAD
8664 ? std::optional<SDByteProvider>(
8666 : std::nullopt;
8667
8668 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8669 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8670 }
8671 }
8672
8673 return std::nullopt;
8674}
8675
8676static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8677 return i;
8678}
8679
8680static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8681 return BW - i - 1;
8682}
8683
8684// Check if the bytes offsets we are looking at match with either big or
8685// little endian value loaded. Return true for big endian, false for little
8686// endian, and std::nullopt if match failed.
8687static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8688 int64_t FirstOffset) {
8689 // The endian can be decided only when it is 2 bytes at least.
8690 unsigned Width = ByteOffsets.size();
8691 if (Width < 2)
8692 return std::nullopt;
8693
8694 bool BigEndian = true, LittleEndian = true;
8695 for (unsigned i = 0; i < Width; i++) {
8696 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8697 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8698 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8699 if (!BigEndian && !LittleEndian)
8700 return std::nullopt;
8701 }
8702
8703 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8704 "little endian");
8705 return BigEndian;
8706}
8707
8709 switch (Value.getOpcode()) {
8710 case ISD::TRUNCATE:
8711 case ISD::ZERO_EXTEND:
8712 case ISD::SIGN_EXTEND:
8713 case ISD::ANY_EXTEND:
8714 return stripTruncAndExt(Value.getOperand(0));
8715 }
8716 return Value;
8717}
8718
8719/// Match a pattern where a wide type scalar value is stored by several narrow
8720/// stores. Fold it into a single store or a BSWAP and a store if the targets
8721/// supports it.
8722///
8723/// Assuming little endian target:
8724/// i8 *p = ...
8725/// i32 val = ...
8726/// p[0] = (val >> 0) & 0xFF;
8727/// p[1] = (val >> 8) & 0xFF;
8728/// p[2] = (val >> 16) & 0xFF;
8729/// p[3] = (val >> 24) & 0xFF;
8730/// =>
8731/// *((i32)p) = val;
8732///
8733/// i8 *p = ...
8734/// i32 val = ...
8735/// p[0] = (val >> 24) & 0xFF;
8736/// p[1] = (val >> 16) & 0xFF;
8737/// p[2] = (val >> 8) & 0xFF;
8738/// p[3] = (val >> 0) & 0xFF;
8739/// =>
8740/// *((i32)p) = BSWAP(val);
8741SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8742 // The matching looks for "store (trunc x)" patterns that appear early but are
8743 // likely to be replaced by truncating store nodes during combining.
8744 // TODO: If there is evidence that running this later would help, this
8745 // limitation could be removed. Legality checks may need to be added
8746 // for the created store and optional bswap/rotate.
8747 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8748 return SDValue();
8749
8750 // We only handle merging simple stores of 1-4 bytes.
8751 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8752 EVT MemVT = N->getMemoryVT();
8753 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8754 !N->isSimple() || N->isIndexed())
8755 return SDValue();
8756
8757 // Collect all of the stores in the chain, upto the maximum store width (i64).
8758 SDValue Chain = N->getChain();
8760 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8761 unsigned MaxWideNumBits = 64;
8762 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8763 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8764 // All stores must be the same size to ensure that we are writing all of the
8765 // bytes in the wide value.
8766 // This store should have exactly one use as a chain operand for another
8767 // store in the merging set. If there are other chain uses, then the
8768 // transform may not be safe because order of loads/stores outside of this
8769 // set may not be preserved.
8770 // TODO: We could allow multiple sizes by tracking each stored byte.
8771 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8772 Store->isIndexed() || !Store->hasOneUse())
8773 return SDValue();
8774 Stores.push_back(Store);
8775 Chain = Store->getChain();
8776 if (MaxStores < Stores.size())
8777 return SDValue();
8778 }
8779 // There is no reason to continue if we do not have at least a pair of stores.
8780 if (Stores.size() < 2)
8781 return SDValue();
8782
8783 // Handle simple types only.
8784 LLVMContext &Context = *DAG.getContext();
8785 unsigned NumStores = Stores.size();
8786 unsigned WideNumBits = NumStores * NarrowNumBits;
8787 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8788 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8789 return SDValue();
8790
8791 // Check if all bytes of the source value that we are looking at are stored
8792 // to the same base address. Collect offsets from Base address into OffsetMap.
8793 SDValue SourceValue;
8794 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8795 int64_t FirstOffset = INT64_MAX;
8796 StoreSDNode *FirstStore = nullptr;
8797 std::optional<BaseIndexOffset> Base;
8798 for (auto *Store : Stores) {
8799 // All the stores store different parts of the CombinedValue. A truncate is
8800 // required to get the partial value.
8801 SDValue Trunc = Store->getValue();
8802 if (Trunc.getOpcode() != ISD::TRUNCATE)
8803 return SDValue();
8804 // Other than the first/last part, a shift operation is required to get the
8805 // offset.
8806 int64_t Offset = 0;
8807 SDValue WideVal = Trunc.getOperand(0);
8808 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8809 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8810 // The shift amount must be a constant multiple of the narrow type.
8811 // It is translated to the offset address in the wide source value "y".
8812 //
8813 // x = srl y, ShiftAmtC
8814 // i8 z = trunc x
8815 // store z, ...
8816 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8817 if (ShiftAmtC % NarrowNumBits != 0)
8818 return SDValue();
8819
8820 Offset = ShiftAmtC / NarrowNumBits;
8821 WideVal = WideVal.getOperand(0);
8822 }
8823
8824 // Stores must share the same source value with different offsets.
8825 // Truncate and extends should be stripped to get the single source value.
8826 if (!SourceValue)
8827 SourceValue = WideVal;
8828 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8829 return SDValue();
8830 else if (SourceValue.getValueType() != WideVT) {
8831 if (WideVal.getValueType() == WideVT ||
8832 WideVal.getScalarValueSizeInBits() >
8833 SourceValue.getScalarValueSizeInBits())
8834 SourceValue = WideVal;
8835 // Give up if the source value type is smaller than the store size.
8836 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8837 return SDValue();
8838 }
8839
8840 // Stores must share the same base address.
8842 int64_t ByteOffsetFromBase = 0;
8843 if (!Base)
8844 Base = Ptr;
8845 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8846 return SDValue();
8847
8848 // Remember the first store.
8849 if (ByteOffsetFromBase < FirstOffset) {
8850 FirstStore = Store;
8851 FirstOffset = ByteOffsetFromBase;
8852 }
8853 // Map the offset in the store and the offset in the combined value, and
8854 // early return if it has been set before.
8855 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8856 return SDValue();
8857 OffsetMap[Offset] = ByteOffsetFromBase;
8858 }
8859
8860 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8861 assert(FirstStore && "First store must be set");
8862
8863 // Check that a store of the wide type is both allowed and fast on the target
8864 const DataLayout &Layout = DAG.getDataLayout();
8865 unsigned Fast = 0;
8866 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8867 *FirstStore->getMemOperand(), &Fast);
8868 if (!Allowed || !Fast)
8869 return SDValue();
8870
8871 // Check if the pieces of the value are going to the expected places in memory
8872 // to merge the stores.
8873 auto checkOffsets = [&](bool MatchLittleEndian) {
8874 if (MatchLittleEndian) {
8875 for (unsigned i = 0; i != NumStores; ++i)
8876 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
8877 return false;
8878 } else { // MatchBigEndian by reversing loop counter.
8879 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
8880 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
8881 return false;
8882 }
8883 return true;
8884 };
8885
8886 // Check if the offsets line up for the native data layout of this target.
8887 bool NeedBswap = false;
8888 bool NeedRotate = false;
8889 if (!checkOffsets(Layout.isLittleEndian())) {
8890 // Special-case: check if byte offsets line up for the opposite endian.
8891 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
8892 NeedBswap = true;
8893 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
8894 NeedRotate = true;
8895 else
8896 return SDValue();
8897 }
8898
8899 SDLoc DL(N);
8900 if (WideVT != SourceValue.getValueType()) {
8901 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
8902 "Unexpected store value to merge");
8903 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8904 }
8905
8906 // Before legalize we can introduce illegal bswaps/rotates which will be later
8907 // converted to an explicit bswap sequence. This way we end up with a single
8908 // store and byte shuffling instead of several stores and byte shuffling.
8909 if (NeedBswap) {
8910 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8911 } else if (NeedRotate) {
8912 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
8913 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8914 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8915 }
8916
8917 SDValue NewStore =
8918 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8919 FirstStore->getPointerInfo(), FirstStore->getAlign());
8920
8921 // Rely on other DAG combine rules to remove the other individual stores.
8922 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8923 return NewStore;
8924}
8925
8926/// Match a pattern where a wide type scalar value is loaded by several narrow
8927/// loads and combined by shifts and ors. Fold it into a single load or a load
8928/// and a BSWAP if the targets supports it.
8929///
8930/// Assuming little endian target:
8931/// i8 *a = ...
8932/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8933/// =>
8934/// i32 val = *((i32)a)
8935///
8936/// i8 *a = ...
8937/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8938/// =>
8939/// i32 val = BSWAP(*((i32)a))
8940///
8941/// TODO: This rule matches complex patterns with OR node roots and doesn't
8942/// interact well with the worklist mechanism. When a part of the pattern is
8943/// updated (e.g. one of the loads) its direct users are put into the worklist,
8944/// but the root node of the pattern which triggers the load combine is not
8945/// necessarily a direct user of the changed node. For example, once the address
8946/// of t28 load is reassociated load combine won't be triggered:
8947/// t25: i32 = add t4, Constant:i32<2>
8948/// t26: i64 = sign_extend t25
8949/// t27: i64 = add t2, t26
8950/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8951/// t29: i32 = zero_extend t28
8952/// t32: i32 = shl t29, Constant:i8<8>
8953/// t33: i32 = or t23, t32
8954/// As a possible fix visitLoad can check if the load can be a part of a load
8955/// combine pattern and add corresponding OR roots to the worklist.
8956SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8957 assert(N->getOpcode() == ISD::OR &&
8958 "Can only match load combining against OR nodes");
8959
8960 // Handles simple types only
8961 EVT VT = N->getValueType(0);
8962 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8963 return SDValue();
8964 unsigned ByteWidth = VT.getSizeInBits() / 8;
8965
8966 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8967 auto MemoryByteOffset = [&](SDByteProvider P) {
8968 assert(P.hasSrc() && "Must be a memory byte provider");
8969 auto *Load = cast<LoadSDNode>(P.Src.value());
8970
8971 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
8972
8973 assert(LoadBitWidth % 8 == 0 &&
8974 "can only analyze providers for individual bytes not bit");
8975 unsigned LoadByteWidth = LoadBitWidth / 8;
8976 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
8977 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
8978 };
8979
8980 std::optional<BaseIndexOffset> Base;
8981 SDValue Chain;
8982
8984 std::optional<SDByteProvider> FirstByteProvider;
8985 int64_t FirstOffset = INT64_MAX;
8986
8987 // Check if all the bytes of the OR we are looking at are loaded from the same
8988 // base address. Collect bytes offsets from Base address in ByteOffsets.
8989 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
8990 unsigned ZeroExtendedBytes = 0;
8991 for (int i = ByteWidth - 1; i >= 0; --i) {
8992 auto P =
8993 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
8994 /*StartingIndex*/ i);
8995 if (!P)
8996 return SDValue();
8997
8998 if (P->isConstantZero()) {
8999 // It's OK for the N most significant bytes to be 0, we can just
9000 // zero-extend the load.
9001 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9002 return SDValue();
9003 continue;
9004 }
9005 assert(P->hasSrc() && "provenance should either be memory or zero");
9006 auto *L = cast<LoadSDNode>(P->Src.value());
9007
9008 // All loads must share the same chain
9009 SDValue LChain = L->getChain();
9010 if (!Chain)
9011 Chain = LChain;
9012 else if (Chain != LChain)
9013 return SDValue();
9014
9015 // Loads must share the same base address
9017 int64_t ByteOffsetFromBase = 0;
9018
9019 // For vector loads, the expected load combine pattern will have an
9020 // ExtractElement for each index in the vector. While each of these
9021 // ExtractElements will be accessing the same base address as determined
9022 // by the load instruction, the actual bytes they interact with will differ
9023 // due to different ExtractElement indices. To accurately determine the
9024 // byte position of an ExtractElement, we offset the base load ptr with
9025 // the index multiplied by the byte size of each element in the vector.
9026 if (L->getMemoryVT().isVector()) {
9027 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9028 if (LoadWidthInBit % 8 != 0)
9029 return SDValue();
9030 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9031 Ptr.addToOffset(ByteOffsetFromVector);
9032 }
9033
9034 if (!Base)
9035 Base = Ptr;
9036
9037 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9038 return SDValue();
9039
9040 // Calculate the offset of the current byte from the base address
9041 ByteOffsetFromBase += MemoryByteOffset(*P);
9042 ByteOffsets[i] = ByteOffsetFromBase;
9043
9044 // Remember the first byte load
9045 if (ByteOffsetFromBase < FirstOffset) {
9046 FirstByteProvider = P;
9047 FirstOffset = ByteOffsetFromBase;
9048 }
9049
9050 Loads.insert(L);
9051 }
9052
9053 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9054 "memory, so there must be at least one load which produces the value");
9055 assert(Base && "Base address of the accessed memory location must be set");
9056 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9057
9058 bool NeedsZext = ZeroExtendedBytes > 0;
9059
9060 EVT MemVT =
9061 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9062
9063 if (!MemVT.isSimple())
9064 return SDValue();
9065
9066 // Before legalize we can introduce too wide illegal loads which will be later
9067 // split into legal sized loads. This enables us to combine i64 load by i8
9068 // patterns to a couple of i32 loads on 32 bit targets.
9069 if (LegalOperations &&
9071 MemVT))
9072 return SDValue();
9073
9074 // Check if the bytes of the OR we are looking at match with either big or
9075 // little endian value load
9076 std::optional<bool> IsBigEndian = isBigEndian(
9077 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9078 if (!IsBigEndian)
9079 return SDValue();
9080
9081 assert(FirstByteProvider && "must be set");
9082
9083 // Ensure that the first byte is loaded from zero offset of the first load.
9084 // So the combined value can be loaded from the first load address.
9085 if (MemoryByteOffset(*FirstByteProvider) != 0)
9086 return SDValue();
9087 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9088
9089 // The node we are looking at matches with the pattern, check if we can
9090 // replace it with a single (possibly zero-extended) load and bswap + shift if
9091 // needed.
9092
9093 // If the load needs byte swap check if the target supports it
9094 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9095
9096 // Before legalize we can introduce illegal bswaps which will be later
9097 // converted to an explicit bswap sequence. This way we end up with a single
9098 // load and byte shuffling instead of several loads and byte shuffling.
9099 // We do not introduce illegal bswaps when zero-extending as this tends to
9100 // introduce too many arithmetic instructions.
9101 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9102 !TLI.isOperationLegal(ISD::BSWAP, VT))
9103 return SDValue();
9104
9105 // If we need to bswap and zero extend, we have to insert a shift. Check that
9106 // it is legal.
9107 if (NeedsBswap && NeedsZext && LegalOperations &&
9108 !TLI.isOperationLegal(ISD::SHL, VT))
9109 return SDValue();
9110
9111 // Check that a load of the wide type is both allowed and fast on the target
9112 unsigned Fast = 0;
9113 bool Allowed =
9114 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9115 *FirstLoad->getMemOperand(), &Fast);
9116 if (!Allowed || !Fast)
9117 return SDValue();
9118
9119 SDValue NewLoad =
9120 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9121 Chain, FirstLoad->getBasePtr(),
9122 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9123
9124 // Transfer chain users from old loads to the new load.
9125 for (LoadSDNode *L : Loads)
9126 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9127
9128 if (!NeedsBswap)
9129 return NewLoad;
9130
9131 SDValue ShiftedLoad =
9132 NeedsZext
9133 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9134 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9135 SDLoc(N), LegalOperations))
9136 : NewLoad;
9137 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9138}
9139
9140// If the target has andn, bsl, or a similar bit-select instruction,
9141// we want to unfold masked merge, with canonical pattern of:
9142// | A | |B|
9143// ((x ^ y) & m) ^ y
9144// | D |
9145// Into:
9146// (x & m) | (y & ~m)
9147// If y is a constant, m is not a 'not', and the 'andn' does not work with
9148// immediates, we unfold into a different pattern:
9149// ~(~x & m) & (m | y)
9150// If x is a constant, m is a 'not', and the 'andn' does not work with
9151// immediates, we unfold into a different pattern:
9152// (x | ~m) & ~(~m & ~y)
9153// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9154// the very least that breaks andnpd / andnps patterns, and because those
9155// patterns are simplified in IR and shouldn't be created in the DAG
9156SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9157 assert(N->getOpcode() == ISD::XOR);
9158
9159 // Don't touch 'not' (i.e. where y = -1).
9160 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9161 return SDValue();
9162
9163 EVT VT = N->getValueType(0);
9164
9165 // There are 3 commutable operators in the pattern,
9166 // so we have to deal with 8 possible variants of the basic pattern.
9167 SDValue X, Y, M;
9168 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9169 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9170 return false;
9171 SDValue Xor = And.getOperand(XorIdx);
9172 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9173 return false;
9174 SDValue Xor0 = Xor.getOperand(0);
9175 SDValue Xor1 = Xor.getOperand(1);
9176 // Don't touch 'not' (i.e. where y = -1).
9177 if (isAllOnesOrAllOnesSplat(Xor1))
9178 return false;
9179 if (Other == Xor0)
9180 std::swap(Xor0, Xor1);
9181 if (Other != Xor1)
9182 return false;
9183 X = Xor0;
9184 Y = Xor1;
9185 M = And.getOperand(XorIdx ? 0 : 1);
9186 return true;
9187 };
9188
9189 SDValue N0 = N->getOperand(0);
9190 SDValue N1 = N->getOperand(1);
9191 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9192 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9193 return SDValue();
9194
9195 // Don't do anything if the mask is constant. This should not be reachable.
9196 // InstCombine should have already unfolded this pattern, and DAGCombiner
9197 // probably shouldn't produce it, too.
9198 if (isa<ConstantSDNode>(M.getNode()))
9199 return SDValue();
9200
9201 // We can transform if the target has AndNot
9202 if (!TLI.hasAndNot(M))
9203 return SDValue();
9204
9205 SDLoc DL(N);
9206
9207 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9208 // a bitwise not that would already allow ANDN to be used.
9209 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9210 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9211 // If not, we need to do a bit more work to make sure andn is still used.
9212 SDValue NotX = DAG.getNOT(DL, X, VT);
9213 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9214 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9215 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9216 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9217 }
9218
9219 // If X is a constant and M is a bitwise not, check that 'andn' works with
9220 // immediates.
9221 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9222 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9223 // If not, we need to do a bit more work to make sure andn is still used.
9224 SDValue NotM = M.getOperand(0);
9225 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9226 SDValue NotY = DAG.getNOT(DL, Y, VT);
9227 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9228 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9229 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9230 }
9231
9232 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9233 SDValue NotM = DAG.getNOT(DL, M, VT);
9234 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9235
9236 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9237}
9238
9239SDValue DAGCombiner::visitXOR(SDNode *N) {
9240 SDValue N0 = N->getOperand(0);
9241 SDValue N1 = N->getOperand(1);
9242 EVT VT = N0.getValueType();
9243 SDLoc DL(N);
9244
9245 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9246 if (N0.isUndef() && N1.isUndef())
9247 return DAG.getConstant(0, DL, VT);
9248
9249 // fold (xor x, undef) -> undef
9250 if (N0.isUndef())
9251 return N0;
9252 if (N1.isUndef())
9253 return N1;
9254
9255 // fold (xor c1, c2) -> c1^c2
9256 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9257 return C;
9258
9259 // canonicalize constant to RHS
9262 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9263
9264 // fold vector ops
9265 if (VT.isVector()) {
9266 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9267 return FoldedVOp;
9268
9269 // fold (xor x, 0) -> x, vector edition
9271 return N0;
9272 }
9273
9274 // fold (xor x, 0) -> x
9275 if (isNullConstant(N1))
9276 return N0;
9277
9278 if (SDValue NewSel = foldBinOpIntoSelect(N))
9279 return NewSel;
9280
9281 // reassociate xor
9282 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9283 return RXOR;
9284
9285 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9286 if (SDValue SD =
9287 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9288 return SD;
9289
9290 // fold (a^b) -> (a|b) iff a and b share no bits.
9291 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9292 DAG.haveNoCommonBitsSet(N0, N1))
9293 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
9294
9295 // look for 'add-like' folds:
9296 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9297 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9299 if (SDValue Combined = visitADDLike(N))
9300 return Combined;
9301
9302 // fold !(x cc y) -> (x !cc y)
9303 unsigned N0Opcode = N0.getOpcode();
9304 SDValue LHS, RHS, CC;
9305 if (TLI.isConstTrueVal(N1) &&
9306 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9307 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9308 LHS.getValueType());
9309 if (!LegalOperations ||
9310 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9311 switch (N0Opcode) {
9312 default:
9313 llvm_unreachable("Unhandled SetCC Equivalent!");
9314 case ISD::SETCC:
9315 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9316 case ISD::SELECT_CC:
9317 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9318 N0.getOperand(3), NotCC);
9319 case ISD::STRICT_FSETCC:
9320 case ISD::STRICT_FSETCCS: {
9321 if (N0.hasOneUse()) {
9322 // FIXME Can we handle multiple uses? Could we token factor the chain
9323 // results from the new/old setcc?
9324 SDValue SetCC =
9325 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9326 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9327 CombineTo(N, SetCC);
9328 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9329 recursivelyDeleteUnusedNodes(N0.getNode());
9330 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9331 }
9332 break;
9333 }
9334 }
9335 }
9336 }
9337
9338 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9339 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9340 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9341 SDValue V = N0.getOperand(0);
9342 SDLoc DL0(N0);
9343 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9344 DAG.getConstant(1, DL0, V.getValueType()));
9345 AddToWorklist(V.getNode());
9346 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9347 }
9348
9349 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9350 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9351 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9352 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9353 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9354 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9355 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9356 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9357 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9358 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9359 }
9360 }
9361 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9362 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9363 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9364 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9365 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9366 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9367 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9368 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9369 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9370 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9371 }
9372 }
9373
9374 // fold (not (neg x)) -> (add X, -1)
9375 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9376 // Y is a constant or the subtract has a single use.
9377 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9378 isNullConstant(N0.getOperand(0))) {
9379 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9380 DAG.getAllOnesConstant(DL, VT));
9381 }
9382
9383 // fold (not (add X, -1)) -> (neg X)
9384 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9386 return DAG.getNegative(N0.getOperand(0), DL, VT);
9387 }
9388
9389 // fold (xor (and x, y), y) -> (and (not x), y)
9390 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9391 SDValue X = N0.getOperand(0);
9392 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9393 AddToWorklist(NotX.getNode());
9394 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9395 }
9396
9397 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9398 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9399 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9400 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9401 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9402 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9403 SDValue S0 = S.getOperand(0);
9404 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9406 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9407 return DAG.getNode(ISD::ABS, DL, VT, S0);
9408 }
9409 }
9410
9411 // fold (xor x, x) -> 0
9412 if (N0 == N1)
9413 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9414
9415 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9416 // Here is a concrete example of this equivalence:
9417 // i16 x == 14
9418 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9419 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9420 //
9421 // =>
9422 //
9423 // i16 ~1 == 0b1111111111111110
9424 // i16 rol(~1, 14) == 0b1011111111111111
9425 //
9426 // Some additional tips to help conceptualize this transform:
9427 // - Try to see the operation as placing a single zero in a value of all ones.
9428 // - There exists no value for x which would allow the result to contain zero.
9429 // - Values of x larger than the bitwidth are undefined and do not require a
9430 // consistent result.
9431 // - Pushing the zero left requires shifting one bits in from the right.
9432 // A rotate left of ~1 is a nice way of achieving the desired result.
9433 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9435 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9436 N0.getOperand(1));
9437 }
9438
9439 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9440 if (N0Opcode == N1.getOpcode())
9441 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9442 return V;
9443
9444 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9445 return R;
9446 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9447 return R;
9448 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9449 return R;
9450
9451 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9452 if (SDValue MM = unfoldMaskedMerge(N))
9453 return MM;
9454
9455 // Simplify the expression using non-local knowledge.
9457 return SDValue(N, 0);
9458
9459 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9460 return Combined;
9461
9462 return SDValue();
9463}
9464
9465/// If we have a shift-by-constant of a bitwise logic op that itself has a
9466/// shift-by-constant operand with identical opcode, we may be able to convert
9467/// that into 2 independent shifts followed by the logic op. This is a
9468/// throughput improvement.
9470 // Match a one-use bitwise logic op.
9471 SDValue LogicOp = Shift->getOperand(0);
9472 if (!LogicOp.hasOneUse())
9473 return SDValue();
9474
9475 unsigned LogicOpcode = LogicOp.getOpcode();
9476 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9477 LogicOpcode != ISD::XOR)
9478 return SDValue();
9479
9480 // Find a matching one-use shift by constant.
9481 unsigned ShiftOpcode = Shift->getOpcode();
9482 SDValue C1 = Shift->getOperand(1);
9483 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9484 assert(C1Node && "Expected a shift with constant operand");
9485 const APInt &C1Val = C1Node->getAPIntValue();
9486 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9487 const APInt *&ShiftAmtVal) {
9488 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9489 return false;
9490
9491 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9492 if (!ShiftCNode)
9493 return false;
9494
9495 // Capture the shifted operand and shift amount value.
9496 ShiftOp = V.getOperand(0);
9497 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9498
9499 // Shift amount types do not have to match their operand type, so check that
9500 // the constants are the same width.
9501 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9502 return false;
9503
9504 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9505 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
9506 return false;
9507
9508 return true;
9509 };
9510
9511 // Logic ops are commutative, so check each operand for a match.
9512 SDValue X, Y;
9513 const APInt *C0Val;
9514 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9515 Y = LogicOp.getOperand(1);
9516 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9517 Y = LogicOp.getOperand(0);
9518 else
9519 return SDValue();
9520
9521 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9522 SDLoc DL(Shift);
9523 EVT VT = Shift->getValueType(0);
9524 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9525 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9526 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9527 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9528 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
9529}
9530
9531/// Handle transforms common to the three shifts, when the shift amount is a
9532/// constant.
9533/// We are looking for: (shift being one of shl/sra/srl)
9534/// shift (binop X, C0), C1
9535/// And want to transform into:
9536/// binop (shift X, C1), (shift C0, C1)
9537SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9538 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9539
9540 // Do not turn a 'not' into a regular xor.
9541 if (isBitwiseNot(N->getOperand(0)))
9542 return SDValue();
9543
9544 // The inner binop must be one-use, since we want to replace it.
9545 SDValue LHS = N->getOperand(0);
9546 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9547 return SDValue();
9548
9549 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9550 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9551 return R;
9552
9553 // We want to pull some binops through shifts, so that we have (and (shift))
9554 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9555 // thing happens with address calculations, so it's important to canonicalize
9556 // it.
9557 switch (LHS.getOpcode()) {
9558 default:
9559 return SDValue();
9560 case ISD::OR:
9561 case ISD::XOR:
9562 case ISD::AND:
9563 break;
9564 case ISD::ADD:
9565 if (N->getOpcode() != ISD::SHL)
9566 return SDValue(); // only shl(add) not sr[al](add).
9567 break;
9568 }
9569
9570 // FIXME: disable this unless the input to the binop is a shift by a constant
9571 // or is copy/select. Enable this in other cases when figure out it's exactly
9572 // profitable.
9573 SDValue BinOpLHSVal = LHS.getOperand(0);
9574 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9575 BinOpLHSVal.getOpcode() == ISD::SRA ||
9576 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9577 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9578 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9579 BinOpLHSVal.getOpcode() == ISD::SELECT;
9580
9581 if (!IsShiftByConstant && !IsCopyOrSelect)
9582 return SDValue();
9583
9584 if (IsCopyOrSelect && N->hasOneUse())
9585 return SDValue();
9586
9587 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9588 SDLoc DL(N);
9589 EVT VT = N->getValueType(0);
9590 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9591 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9592 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9593 N->getOperand(1));
9594 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9595 }
9596
9597 return SDValue();
9598}
9599
9600SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9601 assert(N->getOpcode() == ISD::TRUNCATE);
9602 assert(N->getOperand(0).getOpcode() == ISD::AND);
9603
9604 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9605 EVT TruncVT = N->getValueType(0);
9606 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9607 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9608 SDValue N01 = N->getOperand(0).getOperand(1);
9609 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9610 SDLoc DL(N);
9611 SDValue N00 = N->getOperand(0).getOperand(0);
9612 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9613 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9614 AddToWorklist(Trunc00.getNode());
9615 AddToWorklist(Trunc01.getNode());
9616 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9617 }
9618 }
9619
9620 return SDValue();
9621}
9622
9623SDValue DAGCombiner::visitRotate(SDNode *N) {
9624 SDLoc dl(N);
9625 SDValue N0 = N->getOperand(0);
9626 SDValue N1 = N->getOperand(1);
9627 EVT VT = N->getValueType(0);
9628 unsigned Bitsize = VT.getScalarSizeInBits();
9629
9630 // fold (rot x, 0) -> x
9631 if (isNullOrNullSplat(N1))
9632 return N0;
9633
9634 // fold (rot x, c) -> x iff (c % BitSize) == 0
9635 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9636 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9637 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9638 return N0;
9639 }
9640
9641 // fold (rot x, c) -> (rot x, c % BitSize)
9642 bool OutOfRange = false;
9643 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9644 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9645 return true;
9646 };
9647 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9648 EVT AmtVT = N1.getValueType();
9649 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9650 if (SDValue Amt =
9651 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9652 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9653 }
9654
9655 // rot i16 X, 8 --> bswap X
9656 auto *RotAmtC = isConstOrConstSplat(N1);
9657 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9658 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9659 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9660
9661 // Simplify the operands using demanded-bits information.
9663 return SDValue(N, 0);
9664
9665 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9666 if (N1.getOpcode() == ISD::TRUNCATE &&
9667 N1.getOperand(0).getOpcode() == ISD::AND) {
9668 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9669 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9670 }
9671
9672 unsigned NextOp = N0.getOpcode();
9673
9674 // fold (rot* (rot* x, c2), c1)
9675 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9676 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9679 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9680 EVT ShiftVT = C1->getValueType(0);
9681 bool SameSide = (N->getOpcode() == NextOp);
9682 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9683 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9684 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9685 {N1, BitsizeC});
9686 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9687 {N0.getOperand(1), BitsizeC});
9688 if (Norm1 && Norm2)
9689 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9690 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9691 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9692 {CombinedShift, BitsizeC});
9693 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9694 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9695 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9696 CombinedShiftNorm);
9697 }
9698 }
9699 }
9700 return SDValue();
9701}
9702
9703SDValue DAGCombiner::visitSHL(SDNode *N) {
9704 SDValue N0 = N->getOperand(0);
9705 SDValue N1 = N->getOperand(1);
9706 if (SDValue V = DAG.simplifyShift(N0, N1))
9707 return V;
9708
9709 EVT VT = N0.getValueType();
9710 EVT ShiftVT = N1.getValueType();
9711 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9712
9713 // fold (shl c1, c2) -> c1<<c2
9714 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
9715 return C;
9716
9717 // fold vector ops
9718 if (VT.isVector()) {
9719 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9720 return FoldedVOp;
9721
9722 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9723 // If setcc produces all-one true value then:
9724 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9725 if (N1CV && N1CV->isConstant()) {
9726 if (N0.getOpcode() == ISD::AND) {
9727 SDValue N00 = N0->getOperand(0);
9728 SDValue N01 = N0->getOperand(1);
9729 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9730
9731 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9734 if (SDValue C =
9735 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
9736 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
9737 }
9738 }
9739 }
9740 }
9741
9742 if (SDValue NewSel = foldBinOpIntoSelect(N))
9743 return NewSel;
9744
9745 // if (shl x, c) is known to be zero, return 0
9746 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9747 return DAG.getConstant(0, SDLoc(N), VT);
9748
9749 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9750 if (N1.getOpcode() == ISD::TRUNCATE &&
9751 N1.getOperand(0).getOpcode() == ISD::AND) {
9752 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9753 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
9754 }
9755
9756 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9757 if (N0.getOpcode() == ISD::SHL) {
9758 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9760 APInt c1 = LHS->getAPIntValue();
9761 APInt c2 = RHS->getAPIntValue();
9762 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9763 return (c1 + c2).uge(OpSizeInBits);
9764 };
9765 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9766 return DAG.getConstant(0, SDLoc(N), VT);
9767
9768 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9770 APInt c1 = LHS->getAPIntValue();
9771 APInt c2 = RHS->getAPIntValue();
9772 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9773 return (c1 + c2).ult(OpSizeInBits);
9774 };
9775 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9776 SDLoc DL(N);
9777 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9778 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9779 }
9780 }
9781
9782 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9783 // For this to be valid, the second form must not preserve any of the bits
9784 // that are shifted out by the inner shift in the first form. This means
9785 // the outer shift size must be >= the number of bits added by the ext.
9786 // As a corollary, we don't care what kind of ext it is.
9787 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9788 N0.getOpcode() == ISD::ANY_EXTEND ||
9789 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9790 N0.getOperand(0).getOpcode() == ISD::SHL) {
9791 SDValue N0Op0 = N0.getOperand(0);
9792 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9793 EVT InnerVT = N0Op0.getValueType();
9794 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9795
9796 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9798 APInt c1 = LHS->getAPIntValue();
9799 APInt c2 = RHS->getAPIntValue();
9800 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9801 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9802 (c1 + c2).uge(OpSizeInBits);
9803 };
9804 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9805 /*AllowUndefs*/ false,
9806 /*AllowTypeMismatch*/ true))
9807 return DAG.getConstant(0, SDLoc(N), VT);
9808
9809 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9811 APInt c1 = LHS->getAPIntValue();
9812 APInt c2 = RHS->getAPIntValue();
9813 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9814 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9815 (c1 + c2).ult(OpSizeInBits);
9816 };
9817 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9818 /*AllowUndefs*/ false,
9819 /*AllowTypeMismatch*/ true)) {
9820 SDLoc DL(N);
9821 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9822 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9823 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9824 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9825 }
9826 }
9827
9828 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9829 // Only fold this if the inner zext has no other uses to avoid increasing
9830 // the total number of instructions.
9831 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9832 N0.getOperand(0).getOpcode() == ISD::SRL) {
9833 SDValue N0Op0 = N0.getOperand(0);
9834 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9835
9836 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9837 APInt c1 = LHS->getAPIntValue();
9838 APInt c2 = RHS->getAPIntValue();
9839 zeroExtendToMatch(c1, c2);
9840 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9841 };
9842 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9843 /*AllowUndefs*/ false,
9844 /*AllowTypeMismatch*/ true)) {
9845 SDLoc DL(N);
9846 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9847 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9848 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9849 AddToWorklist(NewSHL.getNode());
9850 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9851 }
9852 }
9853
9854 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9855 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9857 const APInt &LHSC = LHS->getAPIntValue();
9858 const APInt &RHSC = RHS->getAPIntValue();
9859 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9860 LHSC.getZExtValue() <= RHSC.getZExtValue();
9861 };
9862
9863 SDLoc DL(N);
9864
9865 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
9866 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9867 if (N0->getFlags().hasExact()) {
9868 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9869 /*AllowUndefs*/ false,
9870 /*AllowTypeMismatch*/ true)) {
9871 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9872 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9873 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9874 }
9875 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9876 /*AllowUndefs*/ false,
9877 /*AllowTypeMismatch*/ true)) {
9878 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9879 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9880 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
9881 }
9882 }
9883
9884 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
9885 // (and (srl x, (sub c1, c2), MASK)
9886 // Only fold this if the inner shift has no other uses -- if it does,
9887 // folding this will increase the total number of instructions.
9888 if (N0.getOpcode() == ISD::SRL &&
9889 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9891 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9892 /*AllowUndefs*/ false,
9893 /*AllowTypeMismatch*/ true)) {
9894 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9895 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9896 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9897 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9898 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9899 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9900 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9901 }
9902 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9903 /*AllowUndefs*/ false,
9904 /*AllowTypeMismatch*/ true)) {
9905 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9906 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9907 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9908 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9909 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9910 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9911 }
9912 }
9913 }
9914
9915 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9916 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9917 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9918 SDLoc DL(N);
9919 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9920 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9921 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9922 }
9923
9924 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9925 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9926 // Variant of version done on multiply, except mul by a power of 2 is turned
9927 // into a shift.
9928 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9929 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
9930 SDValue N01 = N0.getOperand(1);
9931 if (SDValue Shl1 =
9932 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
9933 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9934 AddToWorklist(Shl0.getNode());
9936 // Preserve the disjoint flag for Or.
9937 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
9938 Flags.setDisjoint(true);
9939 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags);
9940 }
9941 }
9942
9943 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
9944 // TODO: Add zext/add_nuw variant with suitable test coverage
9945 // TODO: Should we limit this with isLegalAddImmediate?
9946 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
9947 N0.getOperand(0).getOpcode() == ISD::ADD &&
9948 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
9949 N0.getOperand(0)->hasOneUse() &&
9950 TLI.isDesirableToCommuteWithShift(N, Level)) {
9951 SDValue Add = N0.getOperand(0);
9952 SDLoc DL(N0);
9953 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
9954 {Add.getOperand(1)})) {
9955 if (SDValue ShlC =
9956 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
9957 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
9958 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
9959 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
9960 }
9961 }
9962 }
9963
9964 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9965 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
9966 SDValue N01 = N0.getOperand(1);
9967 if (SDValue Shl =
9968 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
9969 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
9970 }
9971
9973 if (N1C && !N1C->isOpaque())
9974 if (SDValue NewSHL = visitShiftByConstant(N))
9975 return NewSHL;
9976
9978 return SDValue(N, 0);
9979
9980 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
9981 if (N0.getOpcode() == ISD::VSCALE && N1C) {
9982 const APInt &C0 = N0.getConstantOperandAPInt(0);
9983 const APInt &C1 = N1C->getAPIntValue();
9984 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
9985 }
9986
9987 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
9988 APInt ShlVal;
9989 if (N0.getOpcode() == ISD::STEP_VECTOR &&
9990 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
9991 const APInt &C0 = N0.getConstantOperandAPInt(0);
9992 if (ShlVal.ult(C0.getBitWidth())) {
9993 APInt NewStep = C0 << ShlVal;
9994 return DAG.getStepVector(SDLoc(N), VT, NewStep);
9995 }
9996 }
9997
9998 return SDValue();
9999}
10000
10001// Transform a right shift of a multiply into a multiply-high.
10002// Examples:
10003// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10004// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10006 const TargetLowering &TLI) {
10007 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10008 "SRL or SRA node is required here!");
10009
10010 // Check the shift amount. Proceed with the transformation if the shift
10011 // amount is constant.
10012 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10013 if (!ShiftAmtSrc)
10014 return SDValue();
10015
10016 SDLoc DL(N);
10017
10018 // The operation feeding into the shift must be a multiply.
10019 SDValue ShiftOperand = N->getOperand(0);
10020 if (ShiftOperand.getOpcode() != ISD::MUL)
10021 return SDValue();
10022
10023 // Both operands must be equivalent extend nodes.
10024 SDValue LeftOp = ShiftOperand.getOperand(0);
10025 SDValue RightOp = ShiftOperand.getOperand(1);
10026
10027 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10028 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10029
10030 if (!IsSignExt && !IsZeroExt)
10031 return SDValue();
10032
10033 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10034 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10035
10036 // return true if U may use the lower bits of its operands
10037 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10038 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10039 return true;
10040 }
10041 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10042 if (!UShiftAmtSrc) {
10043 return true;
10044 }
10045 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10046 return UShiftAmt < NarrowVTSize;
10047 };
10048
10049 // If the lower part of the MUL is also used and MUL_LOHI is supported
10050 // do not introduce the MULH in favor of MUL_LOHI
10051 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10052 if (!ShiftOperand.hasOneUse() &&
10053 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10054 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10055 return SDValue();
10056 }
10057
10058 SDValue MulhRightOp;
10060 unsigned ActiveBits = IsSignExt
10061 ? Constant->getAPIntValue().getSignificantBits()
10062 : Constant->getAPIntValue().getActiveBits();
10063 if (ActiveBits > NarrowVTSize)
10064 return SDValue();
10065 MulhRightOp = DAG.getConstant(
10066 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10067 NarrowVT);
10068 } else {
10069 if (LeftOp.getOpcode() != RightOp.getOpcode())
10070 return SDValue();
10071 // Check that the two extend nodes are the same type.
10072 if (NarrowVT != RightOp.getOperand(0).getValueType())
10073 return SDValue();
10074 MulhRightOp = RightOp.getOperand(0);
10075 }
10076
10077 EVT WideVT = LeftOp.getValueType();
10078 // Proceed with the transformation if the wide types match.
10079 assert((WideVT == RightOp.getValueType()) &&
10080 "Cannot have a multiply node with two different operand types.");
10081
10082 // Proceed with the transformation if the wide type is twice as large
10083 // as the narrow type.
10084 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10085 return SDValue();
10086
10087 // Check the shift amount with the narrow type size.
10088 // Proceed with the transformation if the shift amount is the width
10089 // of the narrow type.
10090 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10091 if (ShiftAmt != NarrowVTSize)
10092 return SDValue();
10093
10094 // If the operation feeding into the MUL is a sign extend (sext),
10095 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10096 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10097
10098 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10099 // or if it is a vector type then we could transform to an acceptable type and
10100 // rely on legalization to split/combine the result.
10101 if (NarrowVT.isVector()) {
10102 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10103 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10104 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10105 return SDValue();
10106 } else {
10107 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10108 return SDValue();
10109 }
10110
10111 SDValue Result =
10112 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10113 bool IsSigned = N->getOpcode() == ISD::SRA;
10114 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10115}
10116
10117// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10118// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10120 unsigned Opcode = N->getOpcode();
10121 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10122 return SDValue();
10123
10124 SDValue N0 = N->getOperand(0);
10125 EVT VT = N->getValueType(0);
10126 SDLoc DL(N);
10127 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10128 SDValue OldLHS = N0.getOperand(0);
10129 SDValue OldRHS = N0.getOperand(1);
10130
10131 // If both operands are bswap/bitreverse, ignore the multiuse
10132 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10133 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10134 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10135 OldRHS.getOperand(0));
10136 }
10137
10138 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10139 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10140 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10141 NewBitReorder);
10142 }
10143
10144 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10145 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10146 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10147 OldRHS.getOperand(0));
10148 }
10149 }
10150 return SDValue();
10151}
10152
10153SDValue DAGCombiner::visitSRA(SDNode *N) {
10154 SDValue N0 = N->getOperand(0);
10155 SDValue N1 = N->getOperand(1);
10156 if (SDValue V = DAG.simplifyShift(N0, N1))
10157 return V;
10158
10159 EVT VT = N0.getValueType();
10160 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10161
10162 // fold (sra c1, c2) -> (sra c1, c2)
10163 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
10164 return C;
10165
10166 // Arithmetic shifting an all-sign-bit value is a no-op.
10167 // fold (sra 0, x) -> 0
10168 // fold (sra -1, x) -> -1
10169 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10170 return N0;
10171
10172 // fold vector ops
10173 if (VT.isVector())
10174 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10175 return FoldedVOp;
10176
10177 if (SDValue NewSel = foldBinOpIntoSelect(N))
10178 return NewSel;
10179
10181
10182 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10183 // clamp (add c1, c2) to max shift.
10184 if (N0.getOpcode() == ISD::SRA) {
10185 SDLoc DL(N);
10186 EVT ShiftVT = N1.getValueType();
10187 EVT ShiftSVT = ShiftVT.getScalarType();
10188 SmallVector<SDValue, 16> ShiftValues;
10189
10190 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10191 APInt c1 = LHS->getAPIntValue();
10192 APInt c2 = RHS->getAPIntValue();
10193 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10194 APInt Sum = c1 + c2;
10195 unsigned ShiftSum =
10196 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10197 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10198 return true;
10199 };
10200 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10201 SDValue ShiftValue;
10202 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10203 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10204 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10205 assert(ShiftValues.size() == 1 &&
10206 "Expected matchBinaryPredicate to return one element for "
10207 "SPLAT_VECTORs");
10208 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10209 } else
10210 ShiftValue = ShiftValues[0];
10211 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10212 }
10213 }
10214
10215 // fold (sra (shl X, m), (sub result_size, n))
10216 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10217 // result_size - n != m.
10218 // If truncate is free for the target sext(shl) is likely to result in better
10219 // code.
10220 if (N0.getOpcode() == ISD::SHL && N1C) {
10221 // Get the two constants of the shifts, CN0 = m, CN = n.
10222 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10223 if (N01C) {
10224 LLVMContext &Ctx = *DAG.getContext();
10225 // Determine what the truncate's result bitsize and type would be.
10226 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10227
10228 if (VT.isVector())
10229 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10230
10231 // Determine the residual right-shift amount.
10232 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10233
10234 // If the shift is not a no-op (in which case this should be just a sign
10235 // extend already), the truncated to type is legal, sign_extend is legal
10236 // on that type, and the truncate to that type is both legal and free,
10237 // perform the transform.
10238 if ((ShiftAmt > 0) &&
10241 TLI.isTruncateFree(VT, TruncVT)) {
10242 SDLoc DL(N);
10243 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10245 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10246 N0.getOperand(0), Amt);
10247 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10248 Shift);
10249 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10250 N->getValueType(0), Trunc);
10251 }
10252 }
10253 }
10254
10255 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10256 // sra (add (shl X, N1C), AddC), N1C -->
10257 // sext (add (trunc X to (width - N1C)), AddC')
10258 // sra (sub AddC, (shl X, N1C)), N1C -->
10259 // sext (sub AddC1',(trunc X to (width - N1C)))
10260 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10261 N0.hasOneUse()) {
10262 bool IsAdd = N0.getOpcode() == ISD::ADD;
10263 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10264 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10265 Shl.hasOneUse()) {
10266 // TODO: AddC does not need to be a splat.
10267 if (ConstantSDNode *AddC =
10268 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10269 // Determine what the truncate's type would be and ask the target if
10270 // that is a free operation.
10271 LLVMContext &Ctx = *DAG.getContext();
10272 unsigned ShiftAmt = N1C->getZExtValue();
10273 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10274 if (VT.isVector())
10275 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10276
10277 // TODO: The simple type check probably belongs in the default hook
10278 // implementation and/or target-specific overrides (because
10279 // non-simple types likely require masking when legalized), but
10280 // that restriction may conflict with other transforms.
10281 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10282 TLI.isTruncateFree(VT, TruncVT)) {
10283 SDLoc DL(N);
10284 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10285 SDValue ShiftC =
10286 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10287 TruncVT.getScalarSizeInBits()),
10288 DL, TruncVT);
10289 SDValue Add;
10290 if (IsAdd)
10291 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10292 else
10293 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10294 return DAG.getSExtOrTrunc(Add, DL, VT);
10295 }
10296 }
10297 }
10298 }
10299
10300 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10301 if (N1.getOpcode() == ISD::TRUNCATE &&
10302 N1.getOperand(0).getOpcode() == ISD::AND) {
10303 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10304 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
10305 }
10306
10307 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10308 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10309 // if c1 is equal to the number of bits the trunc removes
10310 // TODO - support non-uniform vector shift amounts.
10311 if (N0.getOpcode() == ISD::TRUNCATE &&
10312 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10313 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10314 N0.getOperand(0).hasOneUse() &&
10315 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10316 SDValue N0Op0 = N0.getOperand(0);
10317 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10318 EVT LargeVT = N0Op0.getValueType();
10319 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10320 if (LargeShift->getAPIntValue() == TruncBits) {
10321 SDLoc DL(N);
10322 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10323 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10324 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10325 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10326 SDValue SRA =
10327 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10328 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10329 }
10330 }
10331 }
10332
10333 // Simplify, based on bits shifted out of the LHS.
10335 return SDValue(N, 0);
10336
10337 // If the sign bit is known to be zero, switch this to a SRL.
10338 if (DAG.SignBitIsZero(N0))
10339 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
10340
10341 if (N1C && !N1C->isOpaque())
10342 if (SDValue NewSRA = visitShiftByConstant(N))
10343 return NewSRA;
10344
10345 // Try to transform this shift into a multiply-high if
10346 // it matches the appropriate pattern detected in combineShiftToMULH.
10347 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10348 return MULH;
10349
10350 // Attempt to convert a sra of a load into a narrower sign-extending load.
10351 if (SDValue NarrowLoad = reduceLoadWidth(N))
10352 return NarrowLoad;
10353
10354 return SDValue();
10355}
10356
10357SDValue DAGCombiner::visitSRL(SDNode *N) {
10358 SDValue N0 = N->getOperand(0);
10359 SDValue N1 = N->getOperand(1);
10360 if (SDValue V = DAG.simplifyShift(N0, N1))
10361 return V;
10362
10363 EVT VT = N0.getValueType();
10364 EVT ShiftVT = N1.getValueType();
10365 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10366
10367 // fold (srl c1, c2) -> c1 >>u c2
10368 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
10369 return C;
10370
10371 // fold vector ops
10372 if (VT.isVector())
10373 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10374 return FoldedVOp;
10375
10376 if (SDValue NewSel = foldBinOpIntoSelect(N))
10377 return NewSel;
10378
10379 // if (srl x, c) is known to be zero, return 0
10381 if (N1C &&
10382 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10383 return DAG.getConstant(0, SDLoc(N), VT);
10384
10385 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10386 if (N0.getOpcode() == ISD::SRL) {
10387 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10389 APInt c1 = LHS->getAPIntValue();
10390 APInt c2 = RHS->getAPIntValue();
10391 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10392 return (c1 + c2).uge(OpSizeInBits);
10393 };
10394 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10395 return DAG.getConstant(0, SDLoc(N), VT);
10396
10397 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10399 APInt c1 = LHS->getAPIntValue();
10400 APInt c2 = RHS->getAPIntValue();
10401 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10402 return (c1 + c2).ult(OpSizeInBits);
10403 };
10404 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10405 SDLoc DL(N);
10406 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10407 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10408 }
10409 }
10410
10411 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10412 N0.getOperand(0).getOpcode() == ISD::SRL) {
10413 SDValue InnerShift = N0.getOperand(0);
10414 // TODO - support non-uniform vector shift amounts.
10415 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10416 uint64_t c1 = N001C->getZExtValue();
10417 uint64_t c2 = N1C->getZExtValue();
10418 EVT InnerShiftVT = InnerShift.getValueType();
10419 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10420 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10421 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10422 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10423 if (c1 + OpSizeInBits == InnerShiftSize) {
10424 SDLoc DL(N);
10425 if (c1 + c2 >= InnerShiftSize)
10426 return DAG.getConstant(0, DL, VT);
10427 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10428 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10429 InnerShift.getOperand(0), NewShiftAmt);
10430 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10431 }
10432 // In the more general case, we can clear the high bits after the shift:
10433 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10434 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10435 c1 + c2 < InnerShiftSize) {
10436 SDLoc DL(N);
10437 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10438 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10439 InnerShift.getOperand(0), NewShiftAmt);
10440 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10441 OpSizeInBits - c2),
10442 DL, InnerShiftVT);
10443 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10444 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10445 }
10446 }
10447 }
10448
10449 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10450 // (and (srl x, (sub c2, c1), MASK)
10451 if (N0.getOpcode() == ISD::SHL &&
10452 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10454 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10456 const APInt &LHSC = LHS->getAPIntValue();
10457 const APInt &RHSC = RHS->getAPIntValue();
10458 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10459 LHSC.getZExtValue() <= RHSC.getZExtValue();
10460 };
10461 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10462 /*AllowUndefs*/ false,
10463 /*AllowTypeMismatch*/ true)) {
10464 SDLoc DL(N);
10465 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10466 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10467 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10468 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10469 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10470 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10471 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10472 }
10473 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10474 /*AllowUndefs*/ false,
10475 /*AllowTypeMismatch*/ true)) {
10476 SDLoc DL(N);
10477 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10478 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10479 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10480 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10481 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10482 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10483 }
10484 }
10485
10486 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10487 // TODO - support non-uniform vector shift amounts.
10488 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10489 // Shifting in all undef bits?
10490 EVT SmallVT = N0.getOperand(0).getValueType();
10491 unsigned BitSize = SmallVT.getScalarSizeInBits();
10492 if (N1C->getAPIntValue().uge(BitSize))
10493 return DAG.getUNDEF(VT);
10494
10495 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10496 uint64_t ShiftAmt = N1C->getZExtValue();
10497 SDLoc DL0(N0);
10498 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10499 N0.getOperand(0),
10500 DAG.getConstant(ShiftAmt, DL0,
10501 getShiftAmountTy(SmallVT)));
10502 AddToWorklist(SmallShift.getNode());
10503 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10504 SDLoc DL(N);
10505 return DAG.getNode(ISD::AND, DL, VT,
10506 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10507 DAG.getConstant(Mask, DL, VT));
10508 }
10509 }
10510
10511 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10512 // bit, which is unmodified by sra.
10513 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10514 if (N0.getOpcode() == ISD::SRA)
10515 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
10516 }
10517
10518 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10519 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10520 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10521 isPowerOf2_32(OpSizeInBits) &&
10522 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10523 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10524
10525 // If any of the input bits are KnownOne, then the input couldn't be all
10526 // zeros, thus the result of the srl will always be zero.
10527 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10528
10529 // If all of the bits input the to ctlz node are known to be zero, then
10530 // the result of the ctlz is "32" and the result of the shift is one.
10531 APInt UnknownBits = ~Known.Zero;
10532 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10533
10534 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10535 if (UnknownBits.isPowerOf2()) {
10536 // Okay, we know that only that the single bit specified by UnknownBits
10537 // could be set on input to the CTLZ node. If this bit is set, the SRL
10538 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10539 // to an SRL/XOR pair, which is likely to simplify more.
10540 unsigned ShAmt = UnknownBits.countr_zero();
10541 SDValue Op = N0.getOperand(0);
10542
10543 if (ShAmt) {
10544 SDLoc DL(N0);
10545 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10546 DAG.getConstant(ShAmt, DL,
10547 getShiftAmountTy(Op.getValueType())));
10548 AddToWorklist(Op.getNode());
10549 }
10550
10551 SDLoc DL(N);
10552 return DAG.getNode(ISD::XOR, DL, VT,
10553 Op, DAG.getConstant(1, DL, VT));
10554 }
10555 }
10556
10557 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10558 if (N1.getOpcode() == ISD::TRUNCATE &&
10559 N1.getOperand(0).getOpcode() == ISD::AND) {
10560 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10561 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
10562 }
10563
10564 // fold operands of srl based on knowledge that the low bits are not
10565 // demanded.
10567 return SDValue(N, 0);
10568
10569 if (N1C && !N1C->isOpaque())
10570 if (SDValue NewSRL = visitShiftByConstant(N))
10571 return NewSRL;
10572
10573 // Attempt to convert a srl of a load into a narrower zero-extending load.
10574 if (SDValue NarrowLoad = reduceLoadWidth(N))
10575 return NarrowLoad;
10576
10577 // Here is a common situation. We want to optimize:
10578 //
10579 // %a = ...
10580 // %b = and i32 %a, 2
10581 // %c = srl i32 %b, 1
10582 // brcond i32 %c ...
10583 //
10584 // into
10585 //
10586 // %a = ...
10587 // %b = and %a, 2
10588 // %c = setcc eq %b, 0
10589 // brcond %c ...
10590 //
10591 // However when after the source operand of SRL is optimized into AND, the SRL
10592 // itself may not be optimized further. Look for it and add the BRCOND into
10593 // the worklist.
10594 //
10595 // The also tends to happen for binary operations when SimplifyDemandedBits
10596 // is involved.
10597 //
10598 // FIXME: This is unecessary if we process the DAG in topological order,
10599 // which we plan to do. This workaround can be removed once the DAG is
10600 // processed in topological order.
10601 if (N->hasOneUse()) {
10602 SDNode *Use = *N->use_begin();
10603
10604 // Look pass the truncate.
10605 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10606 Use = *Use->use_begin();
10607
10608 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10609 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10610 AddToWorklist(Use);
10611 }
10612
10613 // Try to transform this shift into a multiply-high if
10614 // it matches the appropriate pattern detected in combineShiftToMULH.
10615 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10616 return MULH;
10617
10618 return SDValue();
10619}
10620
10621SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10622 EVT VT = N->getValueType(0);
10623 SDValue N0 = N->getOperand(0);
10624 SDValue N1 = N->getOperand(1);
10625 SDValue N2 = N->getOperand(2);
10626 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10627 unsigned BitWidth = VT.getScalarSizeInBits();
10628
10629 // fold (fshl N0, N1, 0) -> N0
10630 // fold (fshr N0, N1, 0) -> N1
10632 if (DAG.MaskedValueIsZero(
10633 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10634 return IsFSHL ? N0 : N1;
10635
10636 auto IsUndefOrZero = [](SDValue V) {
10637 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10638 };
10639
10640 // TODO - support non-uniform vector shift amounts.
10641 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10642 EVT ShAmtTy = N2.getValueType();
10643
10644 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10645 if (Cst->getAPIntValue().uge(BitWidth)) {
10646 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10647 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10648 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10649 }
10650
10651 unsigned ShAmt = Cst->getZExtValue();
10652 if (ShAmt == 0)
10653 return IsFSHL ? N0 : N1;
10654
10655 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10656 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10657 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10658 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10659 if (IsUndefOrZero(N0))
10660 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10661 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10662 SDLoc(N), ShAmtTy));
10663 if (IsUndefOrZero(N1))
10664 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10665 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10666 SDLoc(N), ShAmtTy));
10667
10668 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10669 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10670 // TODO - bigendian support once we have test coverage.
10671 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10672 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10673 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10674 !DAG.getDataLayout().isBigEndian()) {
10675 auto *LHS = dyn_cast<LoadSDNode>(N0);
10676 auto *RHS = dyn_cast<LoadSDNode>(N1);
10677 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10678 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10679 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10680 ISD::isNON_EXTLoad(LHS)) {
10681 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10682 SDLoc DL(RHS);
10683 uint64_t PtrOff =
10684 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10685 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10686 unsigned Fast = 0;
10687 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10688 RHS->getAddressSpace(), NewAlign,
10689 RHS->getMemOperand()->getFlags(), &Fast) &&
10690 Fast) {
10691 SDValue NewPtr = DAG.getMemBasePlusOffset(
10692 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10693 AddToWorklist(NewPtr.getNode());
10694 SDValue Load = DAG.getLoad(
10695 VT, DL, RHS->getChain(), NewPtr,
10696 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10697 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10698 // Replace the old load's chain with the new load's chain.
10699 WorklistRemover DeadNodes(*this);
10700 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10701 return Load;
10702 }
10703 }
10704 }
10705 }
10706 }
10707
10708 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10709 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10710 // iff We know the shift amount is in range.
10711 // TODO: when is it worth doing SUB(BW, N2) as well?
10712 if (isPowerOf2_32(BitWidth)) {
10713 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10714 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10715 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10716 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10717 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10718 }
10719
10720 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10721 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10722 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10723 // is legal as well we might be better off avoiding non-constant (BW - N2).
10724 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10725 if (N0 == N1 && hasOperation(RotOpc, VT))
10726 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10727
10728 // Simplify, based on bits shifted out of N0/N1.
10730 return SDValue(N, 0);
10731
10732 return SDValue();
10733}
10734
10735SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10736 SDValue N0 = N->getOperand(0);
10737 SDValue N1 = N->getOperand(1);
10738 if (SDValue V = DAG.simplifyShift(N0, N1))
10739 return V;
10740
10741 EVT VT = N0.getValueType();
10742
10743 // fold (*shlsat c1, c2) -> c1<<c2
10744 if (SDValue C =
10745 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
10746 return C;
10747
10749
10750 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10751 // fold (sshlsat x, c) -> (shl x, c)
10752 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10753 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10754 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10755
10756 // fold (ushlsat x, c) -> (shl x, c)
10757 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10758 N1C->getAPIntValue().ule(
10760 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10761 }
10762
10763 return SDValue();
10764}
10765
10766// Given a ABS node, detect the following patterns:
10767// (ABS (SUB (EXTEND a), (EXTEND b))).
10768// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10769// Generates UABD/SABD instruction.
10770SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10771 EVT SrcVT = N->getValueType(0);
10772
10773 if (N->getOpcode() == ISD::TRUNCATE)
10774 N = N->getOperand(0).getNode();
10775
10776 if (N->getOpcode() != ISD::ABS)
10777 return SDValue();
10778
10779 EVT VT = N->getValueType(0);
10780 SDValue AbsOp1 = N->getOperand(0);
10781 SDValue Op0, Op1;
10782
10783 if (AbsOp1.getOpcode() != ISD::SUB)
10784 return SDValue();
10785
10786 Op0 = AbsOp1.getOperand(0);
10787 Op1 = AbsOp1.getOperand(1);
10788
10789 unsigned Opc0 = Op0.getOpcode();
10790
10791 // Check if the operands of the sub are (zero|sign)-extended.
10792 // TODO: Should we use ValueTracking instead?
10793 if (Opc0 != Op1.getOpcode() ||
10794 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10795 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10796 // fold (abs (sub nsw x, y)) -> abds(x, y)
10797 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10798 TLI.preferABDSToABSWithNSW(VT)) {
10799 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10800 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10801 }
10802 return SDValue();
10803 }
10804
10805 EVT VT0, VT1;
10806 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10807 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10808 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10809 } else {
10810 VT0 = Op0.getOperand(0).getValueType();
10811 VT1 = Op1.getOperand(0).getValueType();
10812 }
10813 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10814
10815 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10816 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10817 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10818 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10819 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10820 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10821 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10822 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10823 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10824 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10825 }
10826
10827 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10828 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10829 if (hasOperation(ABDOpcode, VT)) {
10830 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10831 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10832 }
10833
10834 return SDValue();
10835}
10836
10837SDValue DAGCombiner::visitABS(SDNode *N) {
10838 SDValue N0 = N->getOperand(0);
10839 EVT VT = N->getValueType(0);
10840 SDLoc DL(N);
10841
10842 // fold (abs c1) -> c2
10843 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10844 return C;
10845 // fold (abs (abs x)) -> (abs x)
10846 if (N0.getOpcode() == ISD::ABS)
10847 return N0;
10848 // fold (abs x) -> x iff not-negative
10849 if (DAG.SignBitIsZero(N0))
10850 return N0;
10851
10852 if (SDValue ABD = foldABSToABD(N, DL))
10853 return ABD;
10854
10855 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10856 // iff zero_extend/truncate are free.
10857 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10858 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10859 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10860 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10861 hasOperation(ISD::ABS, ExtVT)) {
10862 return DAG.getNode(
10863 ISD::ZERO_EXTEND, DL, VT,
10864 DAG.getNode(ISD::ABS, DL, ExtVT,
10865 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10866 }
10867 }
10868
10869 return SDValue();
10870}
10871
10872SDValue DAGCombiner::visitBSWAP(SDNode *N) {
10873 SDValue N0 = N->getOperand(0);
10874 EVT VT = N->getValueType(0);
10875 SDLoc DL(N);
10876
10877 // fold (bswap c1) -> c2
10878 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
10879 return C;
10880 // fold (bswap (bswap x)) -> x
10881 if (N0.getOpcode() == ISD::BSWAP)
10882 return N0.getOperand(0);
10883
10884 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
10885 // isn't supported, it will be expanded to bswap followed by a manual reversal
10886 // of bits in each byte. By placing bswaps before bitreverse, we can remove
10887 // the two bswaps if the bitreverse gets expanded.
10888 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
10889 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10890 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
10891 }
10892
10893 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
10894 // iff x >= bw/2 (i.e. lower half is known zero)
10895 unsigned BW = VT.getScalarSizeInBits();
10896 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
10897 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10898 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
10899 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10900 ShAmt->getZExtValue() >= (BW / 2) &&
10901 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
10902 TLI.isTruncateFree(VT, HalfVT) &&
10903 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
10904 SDValue Res = N0.getOperand(0);
10905 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
10906 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
10907 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
10908 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
10909 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
10910 return DAG.getZExtOrTrunc(Res, DL, VT);
10911 }
10912 }
10913
10914 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
10915 // inverse-shift-of-bswap:
10916 // bswap (X u<< C) --> (bswap X) u>> C
10917 // bswap (X u>> C) --> (bswap X) u<< C
10918 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10919 N0.hasOneUse()) {
10920 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10921 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10922 ShAmt->getZExtValue() % 8 == 0) {
10923 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10924 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
10925 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
10926 }
10927 }
10928
10929 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
10930 return V;
10931
10932 return SDValue();
10933}
10934
10935SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
10936 SDValue N0 = N->getOperand(0);
10937 EVT VT = N->getValueType(0);
10938 SDLoc DL(N);
10939
10940 // fold (bitreverse c1) -> c2
10941 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
10942 return C;
10943 // fold (bitreverse (bitreverse x)) -> x
10944 if (N0.getOpcode() == ISD::BITREVERSE)
10945 return N0.getOperand(0);
10946 return SDValue();
10947}
10948
10949SDValue DAGCombiner::visitCTLZ(SDNode *N) {
10950 SDValue N0 = N->getOperand(0);
10951 EVT VT = N->getValueType(0);
10952 SDLoc DL(N);
10953
10954 // fold (ctlz c1) -> c2
10955 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
10956 return C;
10957
10958 // If the value is known never to be zero, switch to the undef version.
10959 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
10960 if (DAG.isKnownNeverZero(N0))
10961 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
10962
10963 return SDValue();
10964}
10965
10966SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
10967 SDValue N0 = N->getOperand(0);
10968 EVT VT = N->getValueType(0);
10969 SDLoc DL(N);
10970
10971 // fold (ctlz_zero_undef c1) -> c2
10972 if (SDValue C =
10974 return C;
10975 return SDValue();
10976}
10977
10978SDValue DAGCombiner::visitCTTZ(SDNode *N) {
10979 SDValue N0 = N->getOperand(0);
10980 EVT VT = N->getValueType(0);
10981 SDLoc DL(N);
10982
10983 // fold (cttz c1) -> c2
10984 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
10985 return C;
10986
10987 // If the value is known never to be zero, switch to the undef version.
10988 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
10989 if (DAG.isKnownNeverZero(N0))
10990 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
10991
10992 return SDValue();
10993}
10994
10995SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
10996 SDValue N0 = N->getOperand(0);
10997 EVT VT = N->getValueType(0);
10998 SDLoc DL(N);
10999
11000 // fold (cttz_zero_undef c1) -> c2
11001 if (SDValue C =
11003 return C;
11004 return SDValue();
11005}
11006
11007SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11008 SDValue N0 = N->getOperand(0);
11009 EVT VT = N->getValueType(0);
11010 unsigned NumBits = VT.getScalarSizeInBits();
11011 SDLoc DL(N);
11012
11013 // fold (ctpop c1) -> c2
11014 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11015 return C;
11016
11017 // If the source is being shifted, but doesn't affect any active bits,
11018 // then we can call CTPOP on the shift source directly.
11019 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11020 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11021 const APInt &Amt = AmtC->getAPIntValue();
11022 if (Amt.ult(NumBits)) {
11023 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11024 if ((N0.getOpcode() == ISD::SRL &&
11025 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11026 (N0.getOpcode() == ISD::SHL &&
11027 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11028 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11029 }
11030 }
11031 }
11032 }
11033
11034 // If the upper bits are known to be zero, then see if its profitable to
11035 // only count the lower bits.
11036 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11037 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11038 if (hasOperation(ISD::CTPOP, HalfVT) &&
11039 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11040 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11041 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11042 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11043 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11044 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11045 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11046 }
11047 }
11048 }
11049
11050 return SDValue();
11051}
11052
11053// FIXME: This should be checking for no signed zeros on individual operands, as
11054// well as no nans.
11056 SDValue RHS,
11057 const TargetLowering &TLI) {
11058 const TargetOptions &Options = DAG.getTarget().Options;
11059 EVT VT = LHS.getValueType();
11060
11061 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11064}
11065
11067 SDValue RHS, SDValue True, SDValue False,
11069 const TargetLowering &TLI,
11070 SelectionDAG &DAG) {
11071 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11072 switch (CC) {
11073 case ISD::SETOLT:
11074 case ISD::SETOLE:
11075 case ISD::SETLT:
11076 case ISD::SETLE:
11077 case ISD::SETULT:
11078 case ISD::SETULE: {
11079 // Since it's known never nan to get here already, either fminnum or
11080 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11081 // expanded in terms of it.
11082 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11083 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11084 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11085
11086 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11087 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11088 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11089 return SDValue();
11090 }
11091 case ISD::SETOGT:
11092 case ISD::SETOGE:
11093 case ISD::SETGT:
11094 case ISD::SETGE:
11095 case ISD::SETUGT:
11096 case ISD::SETUGE: {
11097 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11098 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11099 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11100
11101 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11102 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11103 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11104 return SDValue();
11105 }
11106 default:
11107 return SDValue();
11108 }
11109}
11110
11111/// Generate Min/Max node
11112SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11113 SDValue RHS, SDValue True,
11114 SDValue False, ISD::CondCode CC) {
11115 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11116 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11117
11118 // If we can't directly match this, try to see if we can pull an fneg out of
11119 // the select.
11121 True, DAG, LegalOperations, ForCodeSize);
11122 if (!NegTrue)
11123 return SDValue();
11124
11125 HandleSDNode NegTrueHandle(NegTrue);
11126
11127 // Try to unfold an fneg from the select if we are comparing the negated
11128 // constant.
11129 //
11130 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11131 //
11132 // TODO: Handle fabs
11133 if (LHS == NegTrue) {
11134 // If we can't directly match this, try to see if we can pull an fneg out of
11135 // the select.
11137 RHS, DAG, LegalOperations, ForCodeSize);
11138 if (NegRHS) {
11139 HandleSDNode NegRHSHandle(NegRHS);
11140 if (NegRHS == False) {
11141 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11142 False, CC, TLI, DAG);
11143 if (Combined)
11144 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11145 }
11146 }
11147 }
11148
11149 return SDValue();
11150}
11151
11152/// If a (v)select has a condition value that is a sign-bit test, try to smear
11153/// the condition operand sign-bit across the value width and use it as a mask.
11155 SDValue Cond = N->getOperand(0);
11156 SDValue C1 = N->getOperand(1);
11157 SDValue C2 = N->getOperand(2);
11159 return SDValue();
11160
11161 EVT VT = N->getValueType(0);
11162 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11163 VT != Cond.getOperand(0).getValueType())
11164 return SDValue();
11165
11166 // The inverted-condition + commuted-select variants of these patterns are
11167 // canonicalized to these forms in IR.
11168 SDValue X = Cond.getOperand(0);
11169 SDValue CondC = Cond.getOperand(1);
11170 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11171 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11173 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11174 SDLoc DL(N);
11175 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11176 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11177 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11178 }
11179 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11180 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11181 SDLoc DL(N);
11182 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11183 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11184 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11185 }
11186 return SDValue();
11187}
11188
11190 const TargetLowering &TLI) {
11191 if (!TLI.convertSelectOfConstantsToMath(VT))
11192 return false;
11193
11194 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11195 return true;
11197 return true;
11198
11199 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11200 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11201 return true;
11202 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11203 return true;
11204
11205 return false;
11206}
11207
11208SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11209 SDValue Cond = N->getOperand(0);
11210 SDValue N1 = N->getOperand(1);
11211 SDValue N2 = N->getOperand(2);
11212 EVT VT = N->getValueType(0);
11213 EVT CondVT = Cond.getValueType();
11214 SDLoc DL(N);
11215
11216 if (!VT.isInteger())
11217 return SDValue();
11218
11219 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11220 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11221 if (!C1 || !C2)
11222 return SDValue();
11223
11224 if (CondVT != MVT::i1 || LegalOperations) {
11225 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11226 // We can't do this reliably if integer based booleans have different contents
11227 // to floating point based booleans. This is because we can't tell whether we
11228 // have an integer-based boolean or a floating-point-based boolean unless we
11229 // can find the SETCC that produced it and inspect its operands. This is
11230 // fairly easy if C is the SETCC node, but it can potentially be
11231 // undiscoverable (or not reasonably discoverable). For example, it could be
11232 // in another basic block or it could require searching a complicated
11233 // expression.
11234 if (CondVT.isInteger() &&
11235 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11237 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11239 C1->isZero() && C2->isOne()) {
11240 SDValue NotCond =
11241 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11242 if (VT.bitsEq(CondVT))
11243 return NotCond;
11244 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11245 }
11246
11247 return SDValue();
11248 }
11249
11250 // Only do this before legalization to avoid conflicting with target-specific
11251 // transforms in the other direction (create a select from a zext/sext). There
11252 // is also a target-independent combine here in DAGCombiner in the other
11253 // direction for (select Cond, -1, 0) when the condition is not i1.
11254 assert(CondVT == MVT::i1 && !LegalOperations);
11255
11256 // select Cond, 1, 0 --> zext (Cond)
11257 if (C1->isOne() && C2->isZero())
11258 return DAG.getZExtOrTrunc(Cond, DL, VT);
11259
11260 // select Cond, -1, 0 --> sext (Cond)
11261 if (C1->isAllOnes() && C2->isZero())
11262 return DAG.getSExtOrTrunc(Cond, DL, VT);
11263
11264 // select Cond, 0, 1 --> zext (!Cond)
11265 if (C1->isZero() && C2->isOne()) {
11266 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11267 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11268 return NotCond;
11269 }
11270
11271 // select Cond, 0, -1 --> sext (!Cond)
11272 if (C1->isZero() && C2->isAllOnes()) {
11273 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11274 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11275 return NotCond;
11276 }
11277
11278 // Use a target hook because some targets may prefer to transform in the
11279 // other direction.
11281 return SDValue();
11282
11283 // For any constants that differ by 1, we can transform the select into
11284 // an extend and add.
11285 const APInt &C1Val = C1->getAPIntValue();
11286 const APInt &C2Val = C2->getAPIntValue();
11287
11288 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11289 if (C1Val - 1 == C2Val) {
11290 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11291 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11292 }
11293
11294 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11295 if (C1Val + 1 == C2Val) {
11296 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11297 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11298 }
11299
11300 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11301 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11302 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11303 SDValue ShAmtC =
11304 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11305 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11306 }
11307
11308 // select Cond, -1, C --> or (sext Cond), C
11309 if (C1->isAllOnes()) {
11310 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11311 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11312 }
11313
11314 // select Cond, C, -1 --> or (sext (not Cond)), C
11315 if (C2->isAllOnes()) {
11316 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11317 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11318 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11319 }
11320
11322 return V;
11323
11324 return SDValue();
11325}
11326
11327template <class MatchContextClass>
11329 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11330 N->getOpcode() == ISD::VP_SELECT) &&
11331 "Expected a (v)(vp.)select");
11332 SDValue Cond = N->getOperand(0);
11333 SDValue T = N->getOperand(1), F = N->getOperand(2);
11334 EVT VT = N->getValueType(0);
11335 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11336 MatchContextClass matcher(DAG, TLI, N);
11337
11338 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11339 return SDValue();
11340
11341 // select Cond, Cond, F --> or Cond, F
11342 // select Cond, 1, F --> or Cond, F
11343 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11344 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11345
11346 // select Cond, T, Cond --> and Cond, T
11347 // select Cond, T, 0 --> and Cond, T
11348 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11349 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11350
11351 // select Cond, T, 1 --> or (not Cond), T
11352 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11353 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11354 DAG.getAllOnesConstant(SDLoc(N), VT));
11355 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11356 }
11357
11358 // select Cond, 0, F --> and (not Cond), F
11359 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11360 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11361 DAG.getAllOnesConstant(SDLoc(N), VT));
11362 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11363 }
11364
11365 return SDValue();
11366}
11367
11369 SDValue N0 = N->getOperand(0);
11370 SDValue N1 = N->getOperand(1);
11371 SDValue N2 = N->getOperand(2);
11372 EVT VT = N->getValueType(0);
11373 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11374 return SDValue();
11375
11376 SDValue Cond0 = N0.getOperand(0);
11377 SDValue Cond1 = N0.getOperand(1);
11378 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11379 if (VT != Cond0.getValueType())
11380 return SDValue();
11381
11382 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11383 // compare is inverted from that pattern ("Cond0 s> -1").
11384 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11385 ; // This is the pattern we are looking for.
11386 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11387 std::swap(N1, N2);
11388 else
11389 return SDValue();
11390
11391 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11392 if (isNullOrNullSplat(N2)) {
11393 SDLoc DL(N);
11394 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11395 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11396 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11397 }
11398
11399 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11400 if (isAllOnesOrAllOnesSplat(N1)) {
11401 SDLoc DL(N);
11402 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11403 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11404 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11405 }
11406
11407 // If we have to invert the sign bit mask, only do that transform if the
11408 // target has a bitwise 'and not' instruction (the invert is free).
11409 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11410 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11411 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11412 SDLoc DL(N);
11413 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11414 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11415 SDValue Not = DAG.getNOT(DL, Sra, VT);
11416 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11417 }
11418
11419 // TODO: There's another pattern in this family, but it may require
11420 // implementing hasOrNot() to check for profitability:
11421 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11422
11423 return SDValue();
11424}
11425
11426SDValue DAGCombiner::visitSELECT(SDNode *N) {
11427 SDValue N0 = N->getOperand(0);
11428 SDValue N1 = N->getOperand(1);
11429 SDValue N2 = N->getOperand(2);
11430 EVT VT = N->getValueType(0);
11431 EVT VT0 = N0.getValueType();
11432 SDLoc DL(N);
11433 SDNodeFlags Flags = N->getFlags();
11434
11435 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11436 return V;
11437
11438 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11439 return V;
11440
11441 // select (not Cond), N1, N2 -> select Cond, N2, N1
11442 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11443 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11444 SelectOp->setFlags(Flags);
11445 return SelectOp;
11446 }
11447
11448 if (SDValue V = foldSelectOfConstants(N))
11449 return V;
11450
11451 // If we can fold this based on the true/false value, do so.
11452 if (SimplifySelectOps(N, N1, N2))
11453 return SDValue(N, 0); // Don't revisit N.
11454
11455 if (VT0 == MVT::i1) {
11456 // The code in this block deals with the following 2 equivalences:
11457 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11458 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11459 // The target can specify its preferred form with the
11460 // shouldNormalizeToSelectSequence() callback. However we always transform
11461 // to the right anyway if we find the inner select exists in the DAG anyway
11462 // and we always transform to the left side if we know that we can further
11463 // optimize the combination of the conditions.
11464 bool normalizeToSequence =
11466 // select (and Cond0, Cond1), X, Y
11467 // -> select Cond0, (select Cond1, X, Y), Y
11468 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11469 SDValue Cond0 = N0->getOperand(0);
11470 SDValue Cond1 = N0->getOperand(1);
11471 SDValue InnerSelect =
11472 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11473 if (normalizeToSequence || !InnerSelect.use_empty())
11474 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11475 InnerSelect, N2, Flags);
11476 // Cleanup on failure.
11477 if (InnerSelect.use_empty())
11478 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11479 }
11480 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11481 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11482 SDValue Cond0 = N0->getOperand(0);
11483 SDValue Cond1 = N0->getOperand(1);
11484 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11485 Cond1, N1, N2, Flags);
11486 if (normalizeToSequence || !InnerSelect.use_empty())
11487 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11488 InnerSelect, Flags);
11489 // Cleanup on failure.
11490 if (InnerSelect.use_empty())
11491 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11492 }
11493
11494 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11495 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11496 SDValue N1_0 = N1->getOperand(0);
11497 SDValue N1_1 = N1->getOperand(1);
11498 SDValue N1_2 = N1->getOperand(2);
11499 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11500 // Create the actual and node if we can generate good code for it.
11501 if (!normalizeToSequence) {
11502 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11503 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11504 N2, Flags);
11505 }
11506 // Otherwise see if we can optimize the "and" to a better pattern.
11507 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11508 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11509 N2, Flags);
11510 }
11511 }
11512 }
11513 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11514 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11515 SDValue N2_0 = N2->getOperand(0);
11516 SDValue N2_1 = N2->getOperand(1);
11517 SDValue N2_2 = N2->getOperand(2);
11518 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11519 // Create the actual or node if we can generate good code for it.
11520 if (!normalizeToSequence) {
11521 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11522 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11523 N2_2, Flags);
11524 }
11525 // Otherwise see if we can optimize to a better pattern.
11526 if (SDValue Combined = visitORLike(N0, N2_0, N))
11527 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11528 N2_2, Flags);
11529 }
11530 }
11531 }
11532
11533 // Fold selects based on a setcc into other things, such as min/max/abs.
11534 if (N0.getOpcode() == ISD::SETCC) {
11535 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11536 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11537
11538 // select (fcmp lt x, y), x, y -> fminnum x, y
11539 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11540 //
11541 // This is OK if we don't care what happens if either operand is a NaN.
11542 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11543 if (SDValue FMinMax =
11544 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11545 return FMinMax;
11546
11547 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11548 // This is conservatively limited to pre-legal-operations to give targets
11549 // a chance to reverse the transform if they want to do that. Also, it is
11550 // unlikely that the pattern would be formed late, so it's probably not
11551 // worth going through the other checks.
11552 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11553 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11554 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11555 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11556 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11557 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11558 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11559 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11560 //
11561 // The IR equivalent of this transform would have this form:
11562 // %a = add %x, C
11563 // %c = icmp ugt %x, ~C
11564 // %r = select %c, -1, %a
11565 // =>
11566 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11567 // %u0 = extractvalue %u, 0
11568 // %u1 = extractvalue %u, 1
11569 // %r = select %u1, -1, %u0
11570 SDVTList VTs = DAG.getVTList(VT, VT0);
11571 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11572 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11573 }
11574 }
11575
11576 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11577 (!LegalOperations &&
11579 // Any flags available in a select/setcc fold will be on the setcc as they
11580 // migrated from fcmp
11581 Flags = N0->getFlags();
11582 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11583 N2, N0.getOperand(2));
11584 SelectNode->setFlags(Flags);
11585 return SelectNode;
11586 }
11587
11588 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11589 return NewSel;
11590 }
11591
11592 if (!VT.isVector())
11593 if (SDValue BinOp = foldSelectOfBinops(N))
11594 return BinOp;
11595
11596 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11597 return R;
11598
11599 return SDValue();
11600}
11601
11602// This function assumes all the vselect's arguments are CONCAT_VECTOR
11603// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11605 SDLoc DL(N);
11606 SDValue Cond = N->getOperand(0);
11607 SDValue LHS = N->getOperand(1);
11608 SDValue RHS = N->getOperand(2);
11609 EVT VT = N->getValueType(0);
11610 int NumElems = VT.getVectorNumElements();
11611 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11612 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11613 Cond.getOpcode() == ISD::BUILD_VECTOR);
11614
11615 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11616 // binary ones here.
11617 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11618 return SDValue();
11619
11620 // We're sure we have an even number of elements due to the
11621 // concat_vectors we have as arguments to vselect.
11622 // Skip BV elements until we find one that's not an UNDEF
11623 // After we find an UNDEF element, keep looping until we get to half the
11624 // length of the BV and see if all the non-undef nodes are the same.
11625 ConstantSDNode *BottomHalf = nullptr;
11626 for (int i = 0; i < NumElems / 2; ++i) {
11627 if (Cond->getOperand(i)->isUndef())
11628 continue;
11629
11630 if (BottomHalf == nullptr)
11631 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11632 else if (Cond->getOperand(i).getNode() != BottomHalf)
11633 return SDValue();
11634 }
11635
11636 // Do the same for the second half of the BuildVector
11637 ConstantSDNode *TopHalf = nullptr;
11638 for (int i = NumElems / 2; i < NumElems; ++i) {
11639 if (Cond->getOperand(i)->isUndef())
11640 continue;
11641
11642 if (TopHalf == nullptr)
11643 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11644 else if (Cond->getOperand(i).getNode() != TopHalf)
11645 return SDValue();
11646 }
11647
11648 assert(TopHalf && BottomHalf &&
11649 "One half of the selector was all UNDEFs and the other was all the "
11650 "same value. This should have been addressed before this function.");
11651 return DAG.getNode(
11653 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11654 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11655}
11656
11657bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11658 SelectionDAG &DAG, const SDLoc &DL) {
11659
11660 // Only perform the transformation when existing operands can be reused.
11661 if (IndexIsScaled)
11662 return false;
11663
11664 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11665 return false;
11666
11667 EVT VT = BasePtr.getValueType();
11668
11669 if (SDValue SplatVal = DAG.getSplatValue(Index);
11670 SplatVal && !isNullConstant(SplatVal) &&
11671 SplatVal.getValueType() == VT) {
11672 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11673 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11674 return true;
11675 }
11676
11677 if (Index.getOpcode() != ISD::ADD)
11678 return false;
11679
11680 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11681 SplatVal && SplatVal.getValueType() == VT) {
11682 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11683 Index = Index.getOperand(1);
11684 return true;
11685 }
11686 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11687 SplatVal && SplatVal.getValueType() == VT) {
11688 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11689 Index = Index.getOperand(0);
11690 return true;
11691 }
11692 return false;
11693}
11694
11695// Fold sext/zext of index into index type.
11697 SelectionDAG &DAG) {
11698 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11699
11700 // It's always safe to look through zero extends.
11701 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11702 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11703 IndexType = ISD::UNSIGNED_SCALED;
11704 Index = Index.getOperand(0);
11705 return true;
11706 }
11707 if (ISD::isIndexTypeSigned(IndexType)) {
11708 IndexType = ISD::UNSIGNED_SCALED;
11709 return true;
11710 }
11711 }
11712
11713 // It's only safe to look through sign extends when Index is signed.
11714 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11715 ISD::isIndexTypeSigned(IndexType) &&
11716 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11717 Index = Index.getOperand(0);
11718 return true;
11719 }
11720
11721 return false;
11722}
11723
11724SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11725 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11726 SDValue Mask = MSC->getMask();
11727 SDValue Chain = MSC->getChain();
11728 SDValue Index = MSC->getIndex();
11729 SDValue Scale = MSC->getScale();
11730 SDValue StoreVal = MSC->getValue();
11731 SDValue BasePtr = MSC->getBasePtr();
11732 SDValue VL = MSC->getVectorLength();
11733 ISD::MemIndexType IndexType = MSC->getIndexType();
11734 SDLoc DL(N);
11735
11736 // Zap scatters with a zero mask.
11738 return Chain;
11739
11740 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11741 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11742 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11743 DL, Ops, MSC->getMemOperand(), IndexType);
11744 }
11745
11746 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11747 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11748 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11749 DL, Ops, MSC->getMemOperand(), IndexType);
11750 }
11751
11752 return SDValue();
11753}
11754
11755SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11756 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11757 SDValue Mask = MSC->getMask();
11758 SDValue Chain = MSC->getChain();
11759 SDValue Index = MSC->getIndex();
11760 SDValue Scale = MSC->getScale();
11761 SDValue StoreVal = MSC->getValue();
11762 SDValue BasePtr = MSC->getBasePtr();
11763 ISD::MemIndexType IndexType = MSC->getIndexType();
11764 SDLoc DL(N);
11765
11766 // Zap scatters with a zero mask.
11768 return Chain;
11769
11770 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11771 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11772 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11773 DL, Ops, MSC->getMemOperand(), IndexType,
11774 MSC->isTruncatingStore());
11775 }
11776
11777 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11778 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11779 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11780 DL, Ops, MSC->getMemOperand(), IndexType,
11781 MSC->isTruncatingStore());
11782 }
11783
11784 return SDValue();
11785}
11786
11787SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11788 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11789 SDValue Mask = MST->getMask();
11790 SDValue Chain = MST->getChain();
11791 SDValue Value = MST->getValue();
11792 SDValue Ptr = MST->getBasePtr();
11793 SDLoc DL(N);
11794
11795 // Zap masked stores with a zero mask.
11797 return Chain;
11798
11799 // Remove a masked store if base pointers and masks are equal.
11800 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11801 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11802 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11803 !MST->getBasePtr().isUndef() &&
11804 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11805 MST1->getMemoryVT().getStoreSize()) ||
11807 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11808 MST->getMemoryVT().getStoreSize())) {
11809 CombineTo(MST1, MST1->getChain());
11810 if (N->getOpcode() != ISD::DELETED_NODE)
11811 AddToWorklist(N);
11812 return SDValue(N, 0);
11813 }
11814 }
11815
11816 // If this is a masked load with an all ones mask, we can use a unmasked load.
11817 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11818 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11819 !MST->isCompressingStore() && !MST->isTruncatingStore())
11820 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11821 MST->getBasePtr(), MST->getPointerInfo(),
11823 MST->getAAInfo());
11824
11825 // Try transforming N to an indexed store.
11826 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11827 return SDValue(N, 0);
11828
11829 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11830 Value.getValueType().isInteger() &&
11831 (!isa<ConstantSDNode>(Value) ||
11832 !cast<ConstantSDNode>(Value)->isOpaque())) {
11833 APInt TruncDemandedBits =
11834 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11836
11837 // See if we can simplify the operation with
11838 // SimplifyDemandedBits, which only works if the value has a single use.
11839 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11840 // Re-visit the store if anything changed and the store hasn't been merged
11841 // with another node (N is deleted) SimplifyDemandedBits will add Value's
11842 // node back to the worklist if necessary, but we also need to re-visit
11843 // the Store node itself.
11844 if (N->getOpcode() != ISD::DELETED_NODE)
11845 AddToWorklist(N);
11846 return SDValue(N, 0);
11847 }
11848 }
11849
11850 // If this is a TRUNC followed by a masked store, fold this into a masked
11851 // truncating store. We can do this even if this is already a masked
11852 // truncstore.
11853 // TODO: Try combine to masked compress store if possiable.
11854 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11855 MST->isUnindexed() && !MST->isCompressingStore() &&
11856 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11857 MST->getMemoryVT(), LegalOperations)) {
11858 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11859 Value.getOperand(0).getValueType());
11860 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11861 MST->getOffset(), Mask, MST->getMemoryVT(),
11862 MST->getMemOperand(), MST->getAddressingMode(),
11863 /*IsTruncating=*/true);
11864 }
11865
11866 return SDValue();
11867}
11868
11869SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11870 auto *SST = cast<VPStridedStoreSDNode>(N);
11871 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11872 // Combine strided stores with unit-stride to a regular VP store.
11873 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11874 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11875 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11876 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11877 SST->getVectorLength(), SST->getMemoryVT(),
11878 SST->getMemOperand(), SST->getAddressingMode(),
11879 SST->isTruncatingStore(), SST->isCompressingStore());
11880 }
11881 return SDValue();
11882}
11883
11884SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11885 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11886 SDValue Mask = MGT->getMask();
11887 SDValue Chain = MGT->getChain();
11888 SDValue Index = MGT->getIndex();
11889 SDValue Scale = MGT->getScale();
11890 SDValue BasePtr = MGT->getBasePtr();
11891 SDValue VL = MGT->getVectorLength();
11892 ISD::MemIndexType IndexType = MGT->getIndexType();
11893 SDLoc DL(N);
11894
11895 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11896 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11897 return DAG.getGatherVP(
11898 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11899 Ops, MGT->getMemOperand(), IndexType);
11900 }
11901
11902 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11903 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11904 return DAG.getGatherVP(
11905 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11906 Ops, MGT->getMemOperand(), IndexType);
11907 }
11908
11909 return SDValue();
11910}
11911
11912SDValue DAGCombiner::visitMGATHER(SDNode *N) {
11913 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
11914 SDValue Mask = MGT->getMask();
11915 SDValue Chain = MGT->getChain();
11916 SDValue Index = MGT->getIndex();
11917 SDValue Scale = MGT->getScale();
11918 SDValue PassThru = MGT->getPassThru();
11919 SDValue BasePtr = MGT->getBasePtr();
11920 ISD::MemIndexType IndexType = MGT->getIndexType();
11921 SDLoc DL(N);
11922
11923 // Zap gathers with a zero mask.
11925 return CombineTo(N, PassThru, MGT->getChain());
11926
11927 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11928 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11929 return DAG.getMaskedGather(
11930 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11931 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11932 }
11933
11934 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11935 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11936 return DAG.getMaskedGather(
11937 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11938 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11939 }
11940
11941 return SDValue();
11942}
11943
11944SDValue DAGCombiner::visitMLOAD(SDNode *N) {
11945 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
11946 SDValue Mask = MLD->getMask();
11947 SDLoc DL(N);
11948
11949 // Zap masked loads with a zero mask.
11951 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
11952
11953 // If this is a masked load with an all ones mask, we can use a unmasked load.
11954 // FIXME: Can we do this for indexed, expanding, or extending loads?
11955 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
11956 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
11957 SDValue NewLd = DAG.getLoad(
11958 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
11959 MLD->getPointerInfo(), MLD->getOriginalAlign(),
11961 return CombineTo(N, NewLd, NewLd.getValue(1));
11962 }
11963
11964 // Try transforming N to an indexed load.
11965 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11966 return SDValue(N, 0);
11967
11968 return SDValue();
11969}
11970
11971SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
11972 auto *SLD = cast<VPStridedLoadSDNode>(N);
11973 EVT EltVT = SLD->getValueType(0).getVectorElementType();
11974 // Combine strided loads with unit-stride to a regular VP load.
11975 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
11976 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11977 SDValue NewLd = DAG.getLoadVP(
11978 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
11979 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
11980 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
11981 SLD->getMemOperand(), SLD->isExpandingLoad());
11982 return CombineTo(N, NewLd, NewLd.getValue(1));
11983 }
11984 return SDValue();
11985}
11986
11987/// A vector select of 2 constant vectors can be simplified to math/logic to
11988/// avoid a variable select instruction and possibly avoid constant loads.
11989SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
11990 SDValue Cond = N->getOperand(0);
11991 SDValue N1 = N->getOperand(1);
11992 SDValue N2 = N->getOperand(2);
11993 EVT VT = N->getValueType(0);
11994 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
11998 return SDValue();
11999
12000 // Check if we can use the condition value to increment/decrement a single
12001 // constant value. This simplifies a select to an add and removes a constant
12002 // load/materialization from the general case.
12003 bool AllAddOne = true;
12004 bool AllSubOne = true;
12005 unsigned Elts = VT.getVectorNumElements();
12006 for (unsigned i = 0; i != Elts; ++i) {
12007 SDValue N1Elt = N1.getOperand(i);
12008 SDValue N2Elt = N2.getOperand(i);
12009 if (N1Elt.isUndef() || N2Elt.isUndef())
12010 continue;
12011 if (N1Elt.getValueType() != N2Elt.getValueType())
12012 continue;
12013
12014 const APInt &C1 = N1Elt->getAsAPIntVal();
12015 const APInt &C2 = N2Elt->getAsAPIntVal();
12016 if (C1 != C2 + 1)
12017 AllAddOne = false;
12018 if (C1 != C2 - 1)
12019 AllSubOne = false;
12020 }
12021
12022 // Further simplifications for the extra-special cases where the constants are
12023 // all 0 or all -1 should be implemented as folds of these patterns.
12024 SDLoc DL(N);
12025 if (AllAddOne || AllSubOne) {
12026 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12027 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12028 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12029 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12030 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12031 }
12032
12033 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12034 APInt Pow2C;
12035 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12036 isNullOrNullSplat(N2)) {
12037 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12038 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12039 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12040 }
12041
12043 return V;
12044
12045 // The general case for select-of-constants:
12046 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12047 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12048 // leave that to a machine-specific pass.
12049 return SDValue();
12050}
12051
12052SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12053 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12054 return V;
12055
12056 return SDValue();
12057}
12058
12059SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12060 SDValue N0 = N->getOperand(0);
12061 SDValue N1 = N->getOperand(1);
12062 SDValue N2 = N->getOperand(2);
12063 EVT VT = N->getValueType(0);
12064 SDLoc DL(N);
12065
12066 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12067 return V;
12068
12069 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12070 return V;
12071
12072 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12073 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12074 return DAG.getSelect(DL, VT, F, N2, N1);
12075
12076 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12077 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12080 TLI.getBooleanContents(N0.getValueType()) ==
12082 return DAG.getNode(
12083 ISD::ADD, DL, N1.getValueType(), N2,
12084 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12085 }
12086
12087 // Canonicalize integer abs.
12088 // vselect (setg[te] X, 0), X, -X ->
12089 // vselect (setgt X, -1), X, -X ->
12090 // vselect (setl[te] X, 0), -X, X ->
12091 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12092 if (N0.getOpcode() == ISD::SETCC) {
12093 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12094 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12095 bool isAbs = false;
12096 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12097
12098 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12099 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12100 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12102 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12103 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12105
12106 if (isAbs) {
12108 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12109
12110 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12111 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12112 DL, getShiftAmountTy(VT)));
12113 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12114 AddToWorklist(Shift.getNode());
12115 AddToWorklist(Add.getNode());
12116 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12117 }
12118
12119 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12120 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12121 //
12122 // This is OK if we don't care about what happens if either operand is a
12123 // NaN.
12124 //
12125 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12126 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12127 return FMinMax;
12128 }
12129
12130 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12131 return S;
12132 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12133 return S;
12134
12135 // If this select has a condition (setcc) with narrower operands than the
12136 // select, try to widen the compare to match the select width.
12137 // TODO: This should be extended to handle any constant.
12138 // TODO: This could be extended to handle non-loading patterns, but that
12139 // requires thorough testing to avoid regressions.
12140 if (isNullOrNullSplat(RHS)) {
12141 EVT NarrowVT = LHS.getValueType();
12143 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12144 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12145 unsigned WideWidth = WideVT.getScalarSizeInBits();
12146 bool IsSigned = isSignedIntSetCC(CC);
12147 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12148 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12149 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12150 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12151 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12152 // Both compare operands can be widened for free. The LHS can use an
12153 // extended load, and the RHS is a constant:
12154 // vselect (ext (setcc load(X), C)), N1, N2 -->
12155 // vselect (setcc extload(X), C'), N1, N2
12156 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12157 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12158 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12159 EVT WideSetCCVT = getSetCCResultType(WideVT);
12160 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12161 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12162 }
12163 }
12164
12165 // Match VSELECTs with absolute difference patterns.
12166 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12167 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12168 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12169 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12170 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12171 N1.getOperand(0) == N2.getOperand(1) &&
12172 N1.getOperand(1) == N2.getOperand(0)) {
12173 bool IsSigned = isSignedIntSetCC(CC);
12174 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12175 if (hasOperation(ABDOpc, VT)) {
12176 switch (CC) {
12177 case ISD::SETGT:
12178 case ISD::SETGE:
12179 case ISD::SETUGT:
12180 case ISD::SETUGE:
12181 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12182 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12183 break;
12184 case ISD::SETLT:
12185 case ISD::SETLE:
12186 case ISD::SETULT:
12187 case ISD::SETULE:
12188 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12189 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12190 break;
12191 default:
12192 break;
12193 }
12194 }
12195 }
12196
12197 // Match VSELECTs into add with unsigned saturation.
12198 if (hasOperation(ISD::UADDSAT, VT)) {
12199 // Check if one of the arms of the VSELECT is vector with all bits set.
12200 // If it's on the left side invert the predicate to simplify logic below.
12201 SDValue Other;
12202 ISD::CondCode SatCC = CC;
12204 Other = N2;
12205 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12206 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12207 Other = N1;
12208 }
12209
12210 if (Other && Other.getOpcode() == ISD::ADD) {
12211 SDValue CondLHS = LHS, CondRHS = RHS;
12212 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12213
12214 // Canonicalize condition operands.
12215 if (SatCC == ISD::SETUGE) {
12216 std::swap(CondLHS, CondRHS);
12217 SatCC = ISD::SETULE;
12218 }
12219
12220 // We can test against either of the addition operands.
12221 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12222 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12223 if (SatCC == ISD::SETULE && Other == CondRHS &&
12224 (OpLHS == CondLHS || OpRHS == CondLHS))
12225 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12226
12227 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12228 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12229 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12230 CondLHS == OpLHS) {
12231 // If the RHS is a constant we have to reverse the const
12232 // canonicalization.
12233 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12234 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12235 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12236 };
12237 if (SatCC == ISD::SETULE &&
12238 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12239 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12240 }
12241 }
12242 }
12243
12244 // Match VSELECTs into sub with unsigned saturation.
12245 if (hasOperation(ISD::USUBSAT, VT)) {
12246 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12247 // the left side invert the predicate to simplify logic below.
12248 SDValue Other;
12249 ISD::CondCode SatCC = CC;
12251 Other = N2;
12252 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12254 Other = N1;
12255 }
12256
12257 // zext(x) >= y ? trunc(zext(x) - y) : 0
12258 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12259 // zext(x) > y ? trunc(zext(x) - y) : 0
12260 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12261 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12262 Other.getOperand(0).getOpcode() == ISD::SUB &&
12263 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12264 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12265 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12266 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12267 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12268 DAG, DL))
12269 return R;
12270 }
12271
12272 if (Other && Other.getNumOperands() == 2) {
12273 SDValue CondRHS = RHS;
12274 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12275
12276 if (OpLHS == LHS) {
12277 // Look for a general sub with unsigned saturation first.
12278 // x >= y ? x-y : 0 --> usubsat x, y
12279 // x > y ? x-y : 0 --> usubsat x, y
12280 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12281 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12282 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12283
12284 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12285 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12286 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12287 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12288 // If the RHS is a constant we have to reverse the const
12289 // canonicalization.
12290 // x > C-1 ? x+-C : 0 --> usubsat x, C
12291 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12292 return (!Op && !Cond) ||
12293 (Op && Cond &&
12294 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12295 };
12296 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12297 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12298 /*AllowUndefs*/ true)) {
12299 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12300 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12301 }
12302
12303 // Another special case: If C was a sign bit, the sub has been
12304 // canonicalized into a xor.
12305 // FIXME: Would it be better to use computeKnownBits to
12306 // determine whether it's safe to decanonicalize the xor?
12307 // x s< 0 ? x^C : 0 --> usubsat x, C
12308 APInt SplatValue;
12309 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12310 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12312 SplatValue.isSignMask()) {
12313 // Note that we have to rebuild the RHS constant here to
12314 // ensure we don't rely on particular values of undef lanes.
12315 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12316 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12317 }
12318 }
12319 }
12320 }
12321 }
12322 }
12323 }
12324
12325 if (SimplifySelectOps(N, N1, N2))
12326 return SDValue(N, 0); // Don't revisit N.
12327
12328 // Fold (vselect all_ones, N1, N2) -> N1
12330 return N1;
12331 // Fold (vselect all_zeros, N1, N2) -> N2
12333 return N2;
12334
12335 // The ConvertSelectToConcatVector function is assuming both the above
12336 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12337 // and addressed.
12338 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12341 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12342 return CV;
12343 }
12344
12345 if (SDValue V = foldVSelectOfConstants(N))
12346 return V;
12347
12348 if (hasOperation(ISD::SRA, VT))
12350 return V;
12351
12353 return SDValue(N, 0);
12354
12355 return SDValue();
12356}
12357
12358SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12359 SDValue N0 = N->getOperand(0);
12360 SDValue N1 = N->getOperand(1);
12361 SDValue N2 = N->getOperand(2);
12362 SDValue N3 = N->getOperand(3);
12363 SDValue N4 = N->getOperand(4);
12364 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12365
12366 // fold select_cc lhs, rhs, x, x, cc -> x
12367 if (N2 == N3)
12368 return N2;
12369
12370 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12371 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12372 isNullConstant(N1))
12373 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12374
12375 // Determine if the condition we're dealing with is constant
12376 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12377 CC, SDLoc(N), false)) {
12378 AddToWorklist(SCC.getNode());
12379
12380 // cond always true -> true val
12381 // cond always false -> false val
12382 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12383 return SCCC->isZero() ? N3 : N2;
12384
12385 // When the condition is UNDEF, just return the first operand. This is
12386 // coherent the DAG creation, no setcc node is created in this case
12387 if (SCC->isUndef())
12388 return N2;
12389
12390 // Fold to a simpler select_cc
12391 if (SCC.getOpcode() == ISD::SETCC) {
12392 SDValue SelectOp = DAG.getNode(
12393 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12394 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12395 SelectOp->setFlags(SCC->getFlags());
12396 return SelectOp;
12397 }
12398 }
12399
12400 // If we can fold this based on the true/false value, do so.
12401 if (SimplifySelectOps(N, N2, N3))
12402 return SDValue(N, 0); // Don't revisit N.
12403
12404 // fold select_cc into other things, such as min/max/abs
12405 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12406}
12407
12408SDValue DAGCombiner::visitSETCC(SDNode *N) {
12409 // setcc is very commonly used as an argument to brcond. This pattern
12410 // also lend itself to numerous combines and, as a result, it is desired
12411 // we keep the argument to a brcond as a setcc as much as possible.
12412 bool PreferSetCC =
12413 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12414
12415 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12416 EVT VT = N->getValueType(0);
12417 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12418
12419 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12420
12421 if (Combined) {
12422 // If we prefer to have a setcc, and we don't, we'll try our best to
12423 // recreate one using rebuildSetCC.
12424 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12425 SDValue NewSetCC = rebuildSetCC(Combined);
12426
12427 // We don't have anything interesting to combine to.
12428 if (NewSetCC.getNode() == N)
12429 return SDValue();
12430
12431 if (NewSetCC)
12432 return NewSetCC;
12433 }
12434 return Combined;
12435 }
12436
12437 // Optimize
12438 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12439 // or
12440 // 2) (icmp eq/ne X, (rotate X, C1))
12441 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12442 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12443 // Then:
12444 // If C1 is a power of 2, then the rotate and shift+and versions are
12445 // equivilent, so we can interchange them depending on target preference.
12446 // Otherwise, if we have the shift+and version we can interchange srl/shl
12447 // which inturn affects the constant C0. We can use this to get better
12448 // constants again determined by target preference.
12449 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12450 auto IsAndWithShift = [](SDValue A, SDValue B) {
12451 return A.getOpcode() == ISD::AND &&
12452 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12453 A.getOperand(0) == B.getOperand(0);
12454 };
12455 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12456 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12457 B.getOperand(0) == A;
12458 };
12459 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12460 bool IsRotate = false;
12461
12462 // Find either shift+and or rotate pattern.
12463 if (IsAndWithShift(N0, N1)) {
12464 AndOrOp = N0;
12465 ShiftOrRotate = N1;
12466 } else if (IsAndWithShift(N1, N0)) {
12467 AndOrOp = N1;
12468 ShiftOrRotate = N0;
12469 } else if (IsRotateWithOp(N0, N1)) {
12470 IsRotate = true;
12471 AndOrOp = N0;
12472 ShiftOrRotate = N1;
12473 } else if (IsRotateWithOp(N1, N0)) {
12474 IsRotate = true;
12475 AndOrOp = N1;
12476 ShiftOrRotate = N0;
12477 }
12478
12479 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12480 (IsRotate || AndOrOp.hasOneUse())) {
12481 EVT OpVT = N0.getValueType();
12482 // Get constant shift/rotate amount and possibly mask (if its shift+and
12483 // variant).
12484 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12485 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12486 /*AllowTrunc*/ false);
12487 if (CNode == nullptr)
12488 return std::nullopt;
12489 return CNode->getAPIntValue();
12490 };
12491 std::optional<APInt> AndCMask =
12492 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12493 std::optional<APInt> ShiftCAmt =
12494 GetAPIntValue(ShiftOrRotate.getOperand(1));
12495 unsigned NumBits = OpVT.getScalarSizeInBits();
12496
12497 // We found constants.
12498 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12499 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12500 // Check that the constants meet the constraints.
12501 bool CanTransform = IsRotate;
12502 if (!CanTransform) {
12503 // Check that mask and shift compliment eachother
12504 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12505 // Check that we are comparing all bits
12506 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12507 // Check that the and mask is correct for the shift
12508 CanTransform &=
12509 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12510 }
12511
12512 // See if target prefers another shift/rotate opcode.
12513 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12514 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12515 // Transform is valid and we have a new preference.
12516 if (CanTransform && NewShiftOpc != ShiftOpc) {
12517 SDLoc DL(N);
12518 SDValue NewShiftOrRotate =
12519 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12520 ShiftOrRotate.getOperand(1));
12521 SDValue NewAndOrOp = SDValue();
12522
12523 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12524 APInt NewMask =
12525 NewShiftOpc == ISD::SHL
12526 ? APInt::getHighBitsSet(NumBits,
12527 NumBits - ShiftCAmt->getZExtValue())
12528 : APInt::getLowBitsSet(NumBits,
12529 NumBits - ShiftCAmt->getZExtValue());
12530 NewAndOrOp =
12531 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12532 DAG.getConstant(NewMask, DL, OpVT));
12533 } else {
12534 NewAndOrOp = ShiftOrRotate.getOperand(0);
12535 }
12536
12537 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12538 }
12539 }
12540 }
12541 }
12542 return SDValue();
12543}
12544
12545SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12546 SDValue LHS = N->getOperand(0);
12547 SDValue RHS = N->getOperand(1);
12548 SDValue Carry = N->getOperand(2);
12549 SDValue Cond = N->getOperand(3);
12550
12551 // If Carry is false, fold to a regular SETCC.
12552 if (isNullConstant(Carry))
12553 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12554
12555 return SDValue();
12556}
12557
12558/// Check if N satisfies:
12559/// N is used once.
12560/// N is a Load.
12561/// The load is compatible with ExtOpcode. It means
12562/// If load has explicit zero/sign extension, ExpOpcode must have the same
12563/// extension.
12564/// Otherwise returns true.
12565static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12566 if (!N.hasOneUse())
12567 return false;
12568
12569 if (!isa<LoadSDNode>(N))
12570 return false;
12571
12572 LoadSDNode *Load = cast<LoadSDNode>(N);
12573 ISD::LoadExtType LoadExt = Load->getExtensionType();
12574 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12575 return true;
12576
12577 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12578 // extension.
12579 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12580 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12581 return false;
12582
12583 return true;
12584}
12585
12586/// Fold
12587/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12588/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12589/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12590/// This function is called by the DAGCombiner when visiting sext/zext/aext
12591/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12593 SelectionDAG &DAG,
12594 CombineLevel Level) {
12595 unsigned Opcode = N->getOpcode();
12596 SDValue N0 = N->getOperand(0);
12597 EVT VT = N->getValueType(0);
12598 SDLoc DL(N);
12599
12600 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12601 Opcode == ISD::ANY_EXTEND) &&
12602 "Expected EXTEND dag node in input!");
12603
12604 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12605 !N0.hasOneUse())
12606 return SDValue();
12607
12608 SDValue Op1 = N0->getOperand(1);
12609 SDValue Op2 = N0->getOperand(2);
12610 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12611 return SDValue();
12612
12613 auto ExtLoadOpcode = ISD::EXTLOAD;
12614 if (Opcode == ISD::SIGN_EXTEND)
12615 ExtLoadOpcode = ISD::SEXTLOAD;
12616 else if (Opcode == ISD::ZERO_EXTEND)
12617 ExtLoadOpcode = ISD::ZEXTLOAD;
12618
12619 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12620 // Combine2), so we should conservatively check the OperationAction.
12621 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12622 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12623 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12624 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12625 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12627 return SDValue();
12628
12629 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12630 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12631 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12632}
12633
12634/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12635/// a build_vector of constants.
12636/// This function is called by the DAGCombiner when visiting sext/zext/aext
12637/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12638/// Vector extends are not folded if operations are legal; this is to
12639/// avoid introducing illegal build_vector dag nodes.
12641 const TargetLowering &TLI,
12642 SelectionDAG &DAG, bool LegalTypes) {
12643 unsigned Opcode = N->getOpcode();
12644 SDValue N0 = N->getOperand(0);
12645 EVT VT = N->getValueType(0);
12646
12647 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12648 "Expected EXTEND dag node in input!");
12649
12650 // fold (sext c1) -> c1
12651 // fold (zext c1) -> c1
12652 // fold (aext c1) -> c1
12653 if (isa<ConstantSDNode>(N0))
12654 return DAG.getNode(Opcode, DL, VT, N0);
12655
12656 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12657 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12658 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12659 if (N0->getOpcode() == ISD::SELECT) {
12660 SDValue Op1 = N0->getOperand(1);
12661 SDValue Op2 = N0->getOperand(2);
12662 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12663 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12664 // For any_extend, choose sign extension of the constants to allow a
12665 // possible further transform to sign_extend_inreg.i.e.
12666 //
12667 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12668 // t2: i64 = any_extend t1
12669 // -->
12670 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12671 // -->
12672 // t4: i64 = sign_extend_inreg t3
12673 unsigned FoldOpc = Opcode;
12674 if (FoldOpc == ISD::ANY_EXTEND)
12675 FoldOpc = ISD::SIGN_EXTEND;
12676 return DAG.getSelect(DL, VT, N0->getOperand(0),
12677 DAG.getNode(FoldOpc, DL, VT, Op1),
12678 DAG.getNode(FoldOpc, DL, VT, Op2));
12679 }
12680 }
12681
12682 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12683 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12684 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12685 EVT SVT = VT.getScalarType();
12686 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12688 return SDValue();
12689
12690 // We can fold this node into a build_vector.
12691 unsigned VTBits = SVT.getSizeInBits();
12692 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12694 unsigned NumElts = VT.getVectorNumElements();
12695
12696 for (unsigned i = 0; i != NumElts; ++i) {
12697 SDValue Op = N0.getOperand(i);
12698 if (Op.isUndef()) {
12699 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12700 Elts.push_back(DAG.getUNDEF(SVT));
12701 else
12702 Elts.push_back(DAG.getConstant(0, DL, SVT));
12703 continue;
12704 }
12705
12706 SDLoc DL(Op);
12707 // Get the constant value and if needed trunc it to the size of the type.
12708 // Nodes like build_vector might have constants wider than the scalar type.
12709 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12710 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12711 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12712 else
12713 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12714 }
12715
12716 return DAG.getBuildVector(VT, DL, Elts);
12717}
12718
12719// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12720// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12721// transformation. Returns true if extension are possible and the above
12722// mentioned transformation is profitable.
12724 unsigned ExtOpc,
12725 SmallVectorImpl<SDNode *> &ExtendNodes,
12726 const TargetLowering &TLI) {
12727 bool HasCopyToRegUses = false;
12728 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12729 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12730 ++UI) {
12731 SDNode *User = *UI;
12732 if (User == N)
12733 continue;
12734 if (UI.getUse().getResNo() != N0.getResNo())
12735 continue;
12736 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12737 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12738 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12739 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12740 // Sign bits will be lost after a zext.
12741 return false;
12742 bool Add = false;
12743 for (unsigned i = 0; i != 2; ++i) {
12744 SDValue UseOp = User->getOperand(i);
12745 if (UseOp == N0)
12746 continue;
12747 if (!isa<ConstantSDNode>(UseOp))
12748 return false;
12749 Add = true;
12750 }
12751 if (Add)
12752 ExtendNodes.push_back(User);
12753 continue;
12754 }
12755 // If truncates aren't free and there are users we can't
12756 // extend, it isn't worthwhile.
12757 if (!isTruncFree)
12758 return false;
12759 // Remember if this value is live-out.
12760 if (User->getOpcode() == ISD::CopyToReg)
12761 HasCopyToRegUses = true;
12762 }
12763
12764 if (HasCopyToRegUses) {
12765 bool BothLiveOut = false;
12766 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12767 UI != UE; ++UI) {
12768 SDUse &Use = UI.getUse();
12769 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12770 BothLiveOut = true;
12771 break;
12772 }
12773 }
12774 if (BothLiveOut)
12775 // Both unextended and extended values are live out. There had better be
12776 // a good reason for the transformation.
12777 return !ExtendNodes.empty();
12778 }
12779 return true;
12780}
12781
12782void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12783 SDValue OrigLoad, SDValue ExtLoad,
12784 ISD::NodeType ExtType) {
12785 // Extend SetCC uses if necessary.
12786 SDLoc DL(ExtLoad);
12787 for (SDNode *SetCC : SetCCs) {
12789
12790 for (unsigned j = 0; j != 2; ++j) {
12791 SDValue SOp = SetCC->getOperand(j);
12792 if (SOp == OrigLoad)
12793 Ops.push_back(ExtLoad);
12794 else
12795 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12796 }
12797
12798 Ops.push_back(SetCC->getOperand(2));
12799 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12800 }
12801}
12802
12803// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12804SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12805 SDValue N0 = N->getOperand(0);
12806 EVT DstVT = N->getValueType(0);
12807 EVT SrcVT = N0.getValueType();
12808
12809 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12810 N->getOpcode() == ISD::ZERO_EXTEND) &&
12811 "Unexpected node type (not an extend)!");
12812
12813 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12814 // For example, on a target with legal v4i32, but illegal v8i32, turn:
12815 // (v8i32 (sext (v8i16 (load x))))
12816 // into:
12817 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12818 // (v4i32 (sextload (x + 16)))))
12819 // Where uses of the original load, i.e.:
12820 // (v8i16 (load x))
12821 // are replaced with:
12822 // (v8i16 (truncate
12823 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12824 // (v4i32 (sextload (x + 16)))))))
12825 //
12826 // This combine is only applicable to illegal, but splittable, vectors.
12827 // All legal types, and illegal non-vector types, are handled elsewhere.
12828 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12829 //
12830 if (N0->getOpcode() != ISD::LOAD)
12831 return SDValue();
12832
12833 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12834
12835 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12836 !N0.hasOneUse() || !LN0->isSimple() ||
12837 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12839 return SDValue();
12840
12842 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12843 return SDValue();
12844
12845 ISD::LoadExtType ExtType =
12846 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12847
12848 // Try to split the vector types to get down to legal types.
12849 EVT SplitSrcVT = SrcVT;
12850 EVT SplitDstVT = DstVT;
12851 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12852 SplitSrcVT.getVectorNumElements() > 1) {
12853 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12854 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12855 }
12856
12857 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12858 return SDValue();
12859
12860 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
12861
12862 SDLoc DL(N);
12863 const unsigned NumSplits =
12864 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12865 const unsigned Stride = SplitSrcVT.getStoreSize();
12868
12869 SDValue BasePtr = LN0->getBasePtr();
12870 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12871 const unsigned Offset = Idx * Stride;
12872
12873 SDValue SplitLoad =
12874 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
12875 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
12876 SplitSrcVT, LN0->getOriginalAlign(),
12877 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12878
12879 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
12880
12881 Loads.push_back(SplitLoad.getValue(0));
12882 Chains.push_back(SplitLoad.getValue(1));
12883 }
12884
12885 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12886 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12887
12888 // Simplify TF.
12889 AddToWorklist(NewChain.getNode());
12890
12891 CombineTo(N, NewValue);
12892
12893 // Replace uses of the original load (before extension)
12894 // with a truncate of the concatenated sextloaded vectors.
12895 SDValue Trunc =
12896 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12897 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12898 CombineTo(N0.getNode(), Trunc, NewChain);
12899 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12900}
12901
12902// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12903// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12904SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12905 assert(N->getOpcode() == ISD::ZERO_EXTEND);
12906 EVT VT = N->getValueType(0);
12907 EVT OrigVT = N->getOperand(0).getValueType();
12908 if (TLI.isZExtFree(OrigVT, VT))
12909 return SDValue();
12910
12911 // and/or/xor
12912 SDValue N0 = N->getOperand(0);
12913 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
12914 N0.getOperand(1).getOpcode() != ISD::Constant ||
12915 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12916 return SDValue();
12917
12918 // shl/shr
12919 SDValue N1 = N0->getOperand(0);
12920 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12921 N1.getOperand(1).getOpcode() != ISD::Constant ||
12922 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12923 return SDValue();
12924
12925 // load
12926 if (!isa<LoadSDNode>(N1.getOperand(0)))
12927 return SDValue();
12928 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
12929 EVT MemVT = Load->getMemoryVT();
12930 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
12931 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
12932 return SDValue();
12933
12934
12935 // If the shift op is SHL, the logic op must be AND, otherwise the result
12936 // will be wrong.
12937 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
12938 return SDValue();
12939
12940 if (!N0.hasOneUse() || !N1.hasOneUse())
12941 return SDValue();
12942
12944 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
12945 ISD::ZERO_EXTEND, SetCCs, TLI))
12946 return SDValue();
12947
12948 // Actually do the transformation.
12949 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
12950 Load->getChain(), Load->getBasePtr(),
12951 Load->getMemoryVT(), Load->getMemOperand());
12952
12953 SDLoc DL1(N1);
12954 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
12955 N1.getOperand(1));
12956
12958 SDLoc DL0(N0);
12959 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
12960 DAG.getConstant(Mask, DL0, VT));
12961
12962 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12963 CombineTo(N, And);
12964 if (SDValue(Load, 0).hasOneUse()) {
12965 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
12966 } else {
12967 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
12968 Load->getValueType(0), ExtLoad);
12969 CombineTo(Load, Trunc, ExtLoad.getValue(1));
12970 }
12971
12972 // N0 is dead at this point.
12973 recursivelyDeleteUnusedNodes(N0.getNode());
12974
12975 return SDValue(N,0); // Return N so it doesn't get rechecked!
12976}
12977
12978/// If we're narrowing or widening the result of a vector select and the final
12979/// size is the same size as a setcc (compare) feeding the select, then try to
12980/// apply the cast operation to the select's operands because matching vector
12981/// sizes for a select condition and other operands should be more efficient.
12982SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
12983 unsigned CastOpcode = Cast->getOpcode();
12984 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
12985 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
12986 CastOpcode == ISD::FP_ROUND) &&
12987 "Unexpected opcode for vector select narrowing/widening");
12988
12989 // We only do this transform before legal ops because the pattern may be
12990 // obfuscated by target-specific operations after legalization. Do not create
12991 // an illegal select op, however, because that may be difficult to lower.
12992 EVT VT = Cast->getValueType(0);
12993 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
12994 return SDValue();
12995
12996 SDValue VSel = Cast->getOperand(0);
12997 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
12998 VSel.getOperand(0).getOpcode() != ISD::SETCC)
12999 return SDValue();
13000
13001 // Does the setcc have the same vector size as the casted select?
13002 SDValue SetCC = VSel.getOperand(0);
13003 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13004 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13005 return SDValue();
13006
13007 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13008 SDValue A = VSel.getOperand(1);
13009 SDValue B = VSel.getOperand(2);
13010 SDValue CastA, CastB;
13011 SDLoc DL(Cast);
13012 if (CastOpcode == ISD::FP_ROUND) {
13013 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13014 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13015 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13016 } else {
13017 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13018 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13019 }
13020 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13021}
13022
13023// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13024// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13026 const TargetLowering &TLI, EVT VT,
13027 bool LegalOperations, SDNode *N,
13028 SDValue N0, ISD::LoadExtType ExtLoadType) {
13029 SDNode *N0Node = N0.getNode();
13030 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13031 : ISD::isZEXTLoad(N0Node);
13032 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13033 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13034 return SDValue();
13035
13036 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13037 EVT MemVT = LN0->getMemoryVT();
13038 if ((LegalOperations || !LN0->isSimple() ||
13039 VT.isVector()) &&
13040 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13041 return SDValue();
13042
13043 SDValue ExtLoad =
13044 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13045 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13046 Combiner.CombineTo(N, ExtLoad);
13047 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13048 if (LN0->use_empty())
13049 Combiner.recursivelyDeleteUnusedNodes(LN0);
13050 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13051}
13052
13053// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13054// Only generate vector extloads when 1) they're legal, and 2) they are
13055// deemed desirable by the target. NonNegZExt can be set to true if a zero
13056// extend has the nonneg flag to allow use of sextload if profitable.
13058 const TargetLowering &TLI, EVT VT,
13059 bool LegalOperations, SDNode *N, SDValue N0,
13060 ISD::LoadExtType ExtLoadType,
13061 ISD::NodeType ExtOpc,
13062 bool NonNegZExt = false) {
13064 return {};
13065
13066 // If this is zext nneg, see if it would make sense to treat it as a sext.
13067 if (NonNegZExt) {
13068 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13069 "Unexpected load type or opcode");
13070 for (SDNode *User : N0->uses()) {
13071 if (User->getOpcode() == ISD::SETCC) {
13072 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13074 ExtLoadType = ISD::SEXTLOAD;
13075 ExtOpc = ISD::SIGN_EXTEND;
13076 break;
13077 }
13078 }
13079 }
13080 }
13081
13082 // TODO: isFixedLengthVector() should be removed and any negative effects on
13083 // code generation being the result of that target's implementation of
13084 // isVectorLoadExtDesirable().
13085 if ((LegalOperations || VT.isFixedLengthVector() ||
13086 !cast<LoadSDNode>(N0)->isSimple()) &&
13087 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13088 return {};
13089
13090 bool DoXform = true;
13092 if (!N0.hasOneUse())
13093 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13094 if (VT.isVector())
13095 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13096 if (!DoXform)
13097 return {};
13098
13099 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13100 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13101 LN0->getBasePtr(), N0.getValueType(),
13102 LN0->getMemOperand());
13103 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13104 // If the load value is used only by N, replace it via CombineTo N.
13105 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13106 Combiner.CombineTo(N, ExtLoad);
13107 if (NoReplaceTrunc) {
13108 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13109 Combiner.recursivelyDeleteUnusedNodes(LN0);
13110 } else {
13111 SDValue Trunc =
13112 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13113 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13114 }
13115 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13116}
13117
13118static SDValue
13120 bool LegalOperations, SDNode *N, SDValue N0,
13121 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13122 if (!N0.hasOneUse())
13123 return SDValue();
13124
13125 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13126 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13127 return SDValue();
13128
13129 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13130 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13131 return SDValue();
13132
13133 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13134 return SDValue();
13135
13136 SDLoc dl(Ld);
13137 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13138 SDValue NewLoad = DAG.getMaskedLoad(
13139 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13140 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13141 ExtLoadType, Ld->isExpandingLoad());
13142 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13143 return NewLoad;
13144}
13145
13147 bool LegalOperations) {
13148 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13149 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13150
13151 SDValue SetCC = N->getOperand(0);
13152 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13153 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13154 return SDValue();
13155
13156 SDValue X = SetCC.getOperand(0);
13157 SDValue Ones = SetCC.getOperand(1);
13158 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13159 EVT VT = N->getValueType(0);
13160 EVT XVT = X.getValueType();
13161 // setge X, C is canonicalized to setgt, so we do not need to match that
13162 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13163 // not require the 'not' op.
13164 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13165 // Invert and smear/shift the sign bit:
13166 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13167 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13168 SDLoc DL(N);
13169 unsigned ShCt = VT.getSizeInBits() - 1;
13170 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13171 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13172 SDValue NotX = DAG.getNOT(DL, X, VT);
13173 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13174 auto ShiftOpcode =
13175 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13176 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13177 }
13178 }
13179 return SDValue();
13180}
13181
13182SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13183 SDValue N0 = N->getOperand(0);
13184 if (N0.getOpcode() != ISD::SETCC)
13185 return SDValue();
13186
13187 SDValue N00 = N0.getOperand(0);
13188 SDValue N01 = N0.getOperand(1);
13189 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13190 EVT VT = N->getValueType(0);
13191 EVT N00VT = N00.getValueType();
13192 SDLoc DL(N);
13193
13194 // Propagate fast-math-flags.
13195 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13196
13197 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13198 // the same size as the compared operands. Try to optimize sext(setcc())
13199 // if this is the case.
13200 if (VT.isVector() && !LegalOperations &&
13201 TLI.getBooleanContents(N00VT) ==
13203 EVT SVT = getSetCCResultType(N00VT);
13204
13205 // If we already have the desired type, don't change it.
13206 if (SVT != N0.getValueType()) {
13207 // We know that the # elements of the results is the same as the
13208 // # elements of the compare (and the # elements of the compare result
13209 // for that matter). Check to see that they are the same size. If so,
13210 // we know that the element size of the sext'd result matches the
13211 // element size of the compare operands.
13212 if (VT.getSizeInBits() == SVT.getSizeInBits())
13213 return DAG.getSetCC(DL, VT, N00, N01, CC);
13214
13215 // If the desired elements are smaller or larger than the source
13216 // elements, we can use a matching integer vector type and then
13217 // truncate/sign extend.
13218 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13219 if (SVT == MatchingVecType) {
13220 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13221 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13222 }
13223 }
13224
13225 // Try to eliminate the sext of a setcc by zexting the compare operands.
13226 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13228 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13229 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13230 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13231
13232 // We have an unsupported narrow vector compare op that would be legal
13233 // if extended to the destination type. See if the compare operands
13234 // can be freely extended to the destination type.
13235 auto IsFreeToExtend = [&](SDValue V) {
13236 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13237 return true;
13238 // Match a simple, non-extended load that can be converted to a
13239 // legal {z/s}ext-load.
13240 // TODO: Allow widening of an existing {z/s}ext-load?
13241 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13242 ISD::isUNINDEXEDLoad(V.getNode()) &&
13243 cast<LoadSDNode>(V)->isSimple() &&
13244 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13245 return false;
13246
13247 // Non-chain users of this value must either be the setcc in this
13248 // sequence or extends that can be folded into the new {z/s}ext-load.
13249 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13250 UI != UE; ++UI) {
13251 // Skip uses of the chain and the setcc.
13252 SDNode *User = *UI;
13253 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13254 continue;
13255 // Extra users must have exactly the same cast we are about to create.
13256 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13257 // is enhanced similarly.
13258 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13259 return false;
13260 }
13261 return true;
13262 };
13263
13264 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13265 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13266 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13267 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13268 }
13269 }
13270 }
13271
13272 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13273 // Here, T can be 1 or -1, depending on the type of the setcc and
13274 // getBooleanContents().
13275 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13276
13277 // To determine the "true" side of the select, we need to know the high bit
13278 // of the value returned by the setcc if it evaluates to true.
13279 // If the type of the setcc is i1, then the true case of the select is just
13280 // sext(i1 1), that is, -1.
13281 // If the type of the setcc is larger (say, i8) then the value of the high
13282 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13283 // of the appropriate width.
13284 SDValue ExtTrueVal = (SetCCWidth == 1)
13285 ? DAG.getAllOnesConstant(DL, VT)
13286 : DAG.getBoolConstant(true, DL, VT, N00VT);
13287 SDValue Zero = DAG.getConstant(0, DL, VT);
13288 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13289 return SCC;
13290
13291 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13292 EVT SetCCVT = getSetCCResultType(N00VT);
13293 // Don't do this transform for i1 because there's a select transform
13294 // that would reverse it.
13295 // TODO: We should not do this transform at all without a target hook
13296 // because a sext is likely cheaper than a select?
13297 if (SetCCVT.getScalarSizeInBits() != 1 &&
13298 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13299 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13300 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13301 }
13302 }
13303
13304 return SDValue();
13305}
13306
13307SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13308 SDValue N0 = N->getOperand(0);
13309 EVT VT = N->getValueType(0);
13310 SDLoc DL(N);
13311
13312 if (VT.isVector())
13313 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13314 return FoldedVOp;
13315
13316 // sext(undef) = 0 because the top bit will all be the same.
13317 if (N0.isUndef())
13318 return DAG.getConstant(0, DL, VT);
13319
13320 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13321 return Res;
13322
13323 // fold (sext (sext x)) -> (sext x)
13324 // fold (sext (aext x)) -> (sext x)
13325 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13326 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13327
13328 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13329 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13333 N0.getOperand(0));
13334
13335 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13336 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13337 SDValue N00 = N0.getOperand(0);
13338 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13339 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13340 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13341 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13342 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13343 }
13344 }
13345
13346 if (N0.getOpcode() == ISD::TRUNCATE) {
13347 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13348 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13349 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13350 SDNode *oye = N0.getOperand(0).getNode();
13351 if (NarrowLoad.getNode() != N0.getNode()) {
13352 CombineTo(N0.getNode(), NarrowLoad);
13353 // CombineTo deleted the truncate, if needed, but not what's under it.
13354 AddToWorklist(oye);
13355 }
13356 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13357 }
13358
13359 // See if the value being truncated is already sign extended. If so, just
13360 // eliminate the trunc/sext pair.
13361 SDValue Op = N0.getOperand(0);
13362 unsigned OpBits = Op.getScalarValueSizeInBits();
13363 unsigned MidBits = N0.getScalarValueSizeInBits();
13364 unsigned DestBits = VT.getScalarSizeInBits();
13365 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13366
13367 if (OpBits == DestBits) {
13368 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13369 // bits, it is already ready.
13370 if (NumSignBits > DestBits-MidBits)
13371 return Op;
13372 } else if (OpBits < DestBits) {
13373 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13374 // bits, just sext from i32.
13375 if (NumSignBits > OpBits-MidBits)
13376 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13377 } else {
13378 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13379 // bits, just truncate to i32.
13380 if (NumSignBits > OpBits-MidBits)
13381 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13382 }
13383
13384 // fold (sext (truncate x)) -> (sextinreg x).
13385 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13386 N0.getValueType())) {
13387 if (OpBits < DestBits)
13388 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13389 else if (OpBits > DestBits)
13390 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13391 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13392 DAG.getValueType(N0.getValueType()));
13393 }
13394 }
13395
13396 // Try to simplify (sext (load x)).
13397 if (SDValue foldedExt =
13398 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13400 return foldedExt;
13401
13402 if (SDValue foldedExt =
13403 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13405 return foldedExt;
13406
13407 // fold (sext (load x)) to multiple smaller sextloads.
13408 // Only on illegal but splittable vectors.
13409 if (SDValue ExtLoad = CombineExtLoad(N))
13410 return ExtLoad;
13411
13412 // Try to simplify (sext (sextload x)).
13413 if (SDValue foldedExt = tryToFoldExtOfExtload(
13414 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13415 return foldedExt;
13416
13417 // fold (sext (and/or/xor (load x), cst)) ->
13418 // (and/or/xor (sextload x), (sext cst))
13419 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13420 isa<LoadSDNode>(N0.getOperand(0)) &&
13421 N0.getOperand(1).getOpcode() == ISD::Constant &&
13422 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13423 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13424 EVT MemVT = LN00->getMemoryVT();
13425 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13426 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13428 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13429 ISD::SIGN_EXTEND, SetCCs, TLI);
13430 if (DoXform) {
13431 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13432 LN00->getChain(), LN00->getBasePtr(),
13433 LN00->getMemoryVT(),
13434 LN00->getMemOperand());
13436 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13437 ExtLoad, DAG.getConstant(Mask, DL, VT));
13438 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13439 bool NoReplaceTruncAnd = !N0.hasOneUse();
13440 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13441 CombineTo(N, And);
13442 // If N0 has multiple uses, change other uses as well.
13443 if (NoReplaceTruncAnd) {
13444 SDValue TruncAnd =
13446 CombineTo(N0.getNode(), TruncAnd);
13447 }
13448 if (NoReplaceTrunc) {
13449 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13450 } else {
13451 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13452 LN00->getValueType(0), ExtLoad);
13453 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13454 }
13455 return SDValue(N,0); // Return N so it doesn't get rechecked!
13456 }
13457 }
13458 }
13459
13460 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13461 return V;
13462
13463 if (SDValue V = foldSextSetcc(N))
13464 return V;
13465
13466 // fold (sext x) -> (zext x) if the sign bit is known zero.
13467 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13468 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13469 DAG.SignBitIsZero(N0)) {
13471 Flags.setNonNeg(true);
13472 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13473 }
13474
13475 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13476 return NewVSel;
13477
13478 // Eliminate this sign extend by doing a negation in the destination type:
13479 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13480 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13484 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13485 return DAG.getNegative(Zext, DL, VT);
13486 }
13487 // Eliminate this sign extend by doing a decrement in the destination type:
13488 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13489 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13493 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13494 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13495 }
13496
13497 // fold sext (not i1 X) -> add (zext i1 X), -1
13498 // TODO: This could be extended to handle bool vectors.
13499 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13500 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13501 TLI.isOperationLegal(ISD::ADD, VT)))) {
13502 // If we can eliminate the 'not', the sext form should be better
13503 if (SDValue NewXor = visitXOR(N0.getNode())) {
13504 // Returning N0 is a form of in-visit replacement that may have
13505 // invalidated N0.
13506 if (NewXor.getNode() == N0.getNode()) {
13507 // Return SDValue here as the xor should have already been replaced in
13508 // this sext.
13509 return SDValue();
13510 }
13511
13512 // Return a new sext with the new xor.
13513 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13514 }
13515
13516 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13517 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13518 }
13519
13520 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13521 return Res;
13522
13523 return SDValue();
13524}
13525
13526/// Given an extending node with a pop-count operand, if the target does not
13527/// support a pop-count in the narrow source type but does support it in the
13528/// destination type, widen the pop-count to the destination type.
13529static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13530 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13531 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13532
13533 SDValue CtPop = Extend->getOperand(0);
13534 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13535 return SDValue();
13536
13537 EVT VT = Extend->getValueType(0);
13538 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13541 return SDValue();
13542
13543 // zext (ctpop X) --> ctpop (zext X)
13544 SDLoc DL(Extend);
13545 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13546 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13547}
13548
13549// If we have (zext (abs X)) where X is a type that will be promoted by type
13550// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13551static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13552 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13553
13554 EVT VT = Extend->getValueType(0);
13555 if (VT.isVector())
13556 return SDValue();
13557
13558 SDValue Abs = Extend->getOperand(0);
13559 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13560 return SDValue();
13561
13562 EVT AbsVT = Abs.getValueType();
13563 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13564 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13566 return SDValue();
13567
13568 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13569
13570 SDValue SExt =
13571 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13572 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13573 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13574}
13575
13576SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13577 SDValue N0 = N->getOperand(0);
13578 EVT VT = N->getValueType(0);
13579 SDLoc DL(N);
13580
13581 if (VT.isVector())
13582 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13583 return FoldedVOp;
13584
13585 // zext(undef) = 0
13586 if (N0.isUndef())
13587 return DAG.getConstant(0, DL, VT);
13588
13589 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13590 return Res;
13591
13592 // fold (zext (zext x)) -> (zext x)
13593 // fold (zext (aext x)) -> (zext x)
13594 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13596 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13597 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13598 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13599 }
13600
13601 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13602 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13606 N0.getOperand(0));
13607
13608 // fold (zext (truncate x)) -> (zext x) or
13609 // (zext (truncate x)) -> (truncate x)
13610 // This is valid when the truncated bits of x are already zero.
13611 SDValue Op;
13612 KnownBits Known;
13613 if (isTruncateOf(DAG, N0, Op, Known)) {
13614 APInt TruncatedBits =
13615 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13616 APInt(Op.getScalarValueSizeInBits(), 0) :
13617 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13619 std::min(Op.getScalarValueSizeInBits(),
13620 VT.getScalarSizeInBits()));
13621 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13622 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13623 DAG.salvageDebugInfo(*N0.getNode());
13624
13625 return ZExtOrTrunc;
13626 }
13627 }
13628
13629 // fold (zext (truncate x)) -> (and x, mask)
13630 if (N0.getOpcode() == ISD::TRUNCATE) {
13631 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13632 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13633 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13634 SDNode *oye = N0.getOperand(0).getNode();
13635 if (NarrowLoad.getNode() != N0.getNode()) {
13636 CombineTo(N0.getNode(), NarrowLoad);
13637 // CombineTo deleted the truncate, if needed, but not what's under it.
13638 AddToWorklist(oye);
13639 }
13640 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13641 }
13642
13643 EVT SrcVT = N0.getOperand(0).getValueType();
13644 EVT MinVT = N0.getValueType();
13645
13646 if (N->getFlags().hasNonNeg()) {
13647 SDValue Op = N0.getOperand(0);
13648 unsigned OpBits = SrcVT.getScalarSizeInBits();
13649 unsigned MidBits = MinVT.getScalarSizeInBits();
13650 unsigned DestBits = VT.getScalarSizeInBits();
13651 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13652
13653 if (OpBits == DestBits) {
13654 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13655 // bits, it is already ready.
13656 if (NumSignBits > DestBits - MidBits)
13657 return Op;
13658 } else if (OpBits < DestBits) {
13659 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13660 // bits, just sext from i32.
13661 // FIXME: This can probably be ZERO_EXTEND nneg?
13662 if (NumSignBits > OpBits - MidBits)
13663 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13664 } else {
13665 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13666 // bits, just truncate to i32.
13667 if (NumSignBits > OpBits - MidBits)
13668 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13669 }
13670 }
13671
13672 // Try to mask before the extension to avoid having to generate a larger mask,
13673 // possibly over several sub-vectors.
13674 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13675 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13677 SDValue Op = N0.getOperand(0);
13678 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13679 AddToWorklist(Op.getNode());
13680 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13681 // Transfer the debug info; the new node is equivalent to N0.
13682 DAG.transferDbgValues(N0, ZExtOrTrunc);
13683 return ZExtOrTrunc;
13684 }
13685 }
13686
13687 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13688 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13689 AddToWorklist(Op.getNode());
13690 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13691 // We may safely transfer the debug info describing the truncate node over
13692 // to the equivalent and operation.
13693 DAG.transferDbgValues(N0, And);
13694 return And;
13695 }
13696 }
13697
13698 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13699 // if either of the casts is not free.
13700 if (N0.getOpcode() == ISD::AND &&
13701 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13702 N0.getOperand(1).getOpcode() == ISD::Constant &&
13703 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13704 !TLI.isZExtFree(N0.getValueType(), VT))) {
13705 SDValue X = N0.getOperand(0).getOperand(0);
13706 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13708 return DAG.getNode(ISD::AND, DL, VT,
13709 X, DAG.getConstant(Mask, DL, VT));
13710 }
13711
13712 // Try to simplify (zext (load x)).
13713 if (SDValue foldedExt = tryToFoldExtOfLoad(
13714 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13715 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13716 return foldedExt;
13717
13718 if (SDValue foldedExt =
13719 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13721 return foldedExt;
13722
13723 // fold (zext (load x)) to multiple smaller zextloads.
13724 // Only on illegal but splittable vectors.
13725 if (SDValue ExtLoad = CombineExtLoad(N))
13726 return ExtLoad;
13727
13728 // fold (zext (and/or/xor (load x), cst)) ->
13729 // (and/or/xor (zextload x), (zext cst))
13730 // Unless (and (load x) cst) will match as a zextload already and has
13731 // additional users, or the zext is already free.
13732 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13733 isa<LoadSDNode>(N0.getOperand(0)) &&
13734 N0.getOperand(1).getOpcode() == ISD::Constant &&
13735 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13736 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13737 EVT MemVT = LN00->getMemoryVT();
13738 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13739 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13740 bool DoXform = true;
13742 if (!N0.hasOneUse()) {
13743 if (N0.getOpcode() == ISD::AND) {
13744 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13745 EVT LoadResultTy = AndC->getValueType(0);
13746 EVT ExtVT;
13747 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13748 DoXform = false;
13749 }
13750 }
13751 if (DoXform)
13752 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13753 ISD::ZERO_EXTEND, SetCCs, TLI);
13754 if (DoXform) {
13755 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13756 LN00->getChain(), LN00->getBasePtr(),
13757 LN00->getMemoryVT(),
13758 LN00->getMemOperand());
13760 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13761 ExtLoad, DAG.getConstant(Mask, DL, VT));
13762 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13763 bool NoReplaceTruncAnd = !N0.hasOneUse();
13764 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13765 CombineTo(N, And);
13766 // If N0 has multiple uses, change other uses as well.
13767 if (NoReplaceTruncAnd) {
13768 SDValue TruncAnd =
13770 CombineTo(N0.getNode(), TruncAnd);
13771 }
13772 if (NoReplaceTrunc) {
13773 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13774 } else {
13775 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13776 LN00->getValueType(0), ExtLoad);
13777 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13778 }
13779 return SDValue(N,0); // Return N so it doesn't get rechecked!
13780 }
13781 }
13782 }
13783
13784 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13785 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13786 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13787 return ZExtLoad;
13788
13789 // Try to simplify (zext (zextload x)).
13790 if (SDValue foldedExt = tryToFoldExtOfExtload(
13791 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13792 return foldedExt;
13793
13794 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13795 return V;
13796
13797 if (N0.getOpcode() == ISD::SETCC) {
13798 // Propagate fast-math-flags.
13799 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13800
13801 // Only do this before legalize for now.
13802 if (!LegalOperations && VT.isVector() &&
13803 N0.getValueType().getVectorElementType() == MVT::i1) {
13804 EVT N00VT = N0.getOperand(0).getValueType();
13805 if (getSetCCResultType(N00VT) == N0.getValueType())
13806 return SDValue();
13807
13808 // We know that the # elements of the results is the same as the #
13809 // elements of the compare (and the # elements of the compare result for
13810 // that matter). Check to see that they are the same size. If so, we know
13811 // that the element size of the sext'd result matches the element size of
13812 // the compare operands.
13813 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13814 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13815 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13816 N0.getOperand(1), N0.getOperand(2));
13817 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13818 }
13819
13820 // If the desired elements are smaller or larger than the source
13821 // elements we can use a matching integer vector type and then
13822 // truncate/any extend followed by zext_in_reg.
13823 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13824 SDValue VsetCC =
13825 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13826 N0.getOperand(1), N0.getOperand(2));
13827 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13828 N0.getValueType());
13829 }
13830
13831 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13832 EVT N0VT = N0.getValueType();
13833 EVT N00VT = N0.getOperand(0).getValueType();
13834 if (SDValue SCC = SimplifySelectCC(
13835 DL, N0.getOperand(0), N0.getOperand(1),
13836 DAG.getBoolConstant(true, DL, N0VT, N00VT),
13837 DAG.getBoolConstant(false, DL, N0VT, N00VT),
13838 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13839 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13840 }
13841
13842 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13843 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13844 !TLI.isZExtFree(N0, VT)) {
13845 SDValue ShVal = N0.getOperand(0);
13846 SDValue ShAmt = N0.getOperand(1);
13847 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
13848 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
13849 if (N0.getOpcode() == ISD::SHL) {
13850 // If the original shl may be shifting out bits, do not perform this
13851 // transformation.
13852 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
13853 ShVal.getOperand(0).getValueSizeInBits();
13854 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13855 // If the shift is too large, then see if we can deduce that the
13856 // shift is safe anyway.
13857 // Create a mask that has ones for the bits being shifted out.
13858 APInt ShiftOutMask =
13860 ShAmtC->getAPIntValue().getZExtValue());
13861
13862 // Check if the bits being shifted out are known to be zero.
13863 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
13864 return SDValue();
13865 }
13866 }
13867
13868 // Ensure that the shift amount is wide enough for the shifted value.
13869 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13870 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13871
13872 return DAG.getNode(N0.getOpcode(), DL, VT,
13873 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
13874 }
13875 }
13876 }
13877
13878 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13879 return NewVSel;
13880
13881 if (SDValue NewCtPop = widenCtPop(N, DAG))
13882 return NewCtPop;
13883
13884 if (SDValue V = widenAbs(N, DAG))
13885 return V;
13886
13887 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13888 return Res;
13889
13890 // CSE zext nneg with sext if the zext is not free.
13891 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
13892 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
13893 if (CSENode)
13894 return SDValue(CSENode, 0);
13895 }
13896
13897 return SDValue();
13898}
13899
13900SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13901 SDValue N0 = N->getOperand(0);
13902 EVT VT = N->getValueType(0);
13903 SDLoc DL(N);
13904
13905 // aext(undef) = undef
13906 if (N0.isUndef())
13907 return DAG.getUNDEF(VT);
13908
13909 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13910 return Res;
13911
13912 // fold (aext (aext x)) -> (aext x)
13913 // fold (aext (zext x)) -> (zext x)
13914 // fold (aext (sext x)) -> (sext x)
13915 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
13916 N0.getOpcode() == ISD::SIGN_EXTEND) {
13918 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13919 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13920 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
13921 }
13922
13923 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
13924 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13925 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13929 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
13930
13931 // fold (aext (truncate (load x))) -> (aext (smaller load x))
13932 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
13933 if (N0.getOpcode() == ISD::TRUNCATE) {
13934 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13935 SDNode *oye = N0.getOperand(0).getNode();
13936 if (NarrowLoad.getNode() != N0.getNode()) {
13937 CombineTo(N0.getNode(), NarrowLoad);
13938 // CombineTo deleted the truncate, if needed, but not what's under it.
13939 AddToWorklist(oye);
13940 }
13941 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13942 }
13943 }
13944
13945 // fold (aext (truncate x))
13946 if (N0.getOpcode() == ISD::TRUNCATE)
13947 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13948
13949 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
13950 // if the trunc is not free.
13951 if (N0.getOpcode() == ISD::AND &&
13952 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13953 N0.getOperand(1).getOpcode() == ISD::Constant &&
13954 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
13955 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13956 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
13957 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
13958 return DAG.getNode(ISD::AND, DL, VT, X, Y);
13959 }
13960
13961 // fold (aext (load x)) -> (aext (truncate (extload x)))
13962 // None of the supported targets knows how to perform load and any_ext
13963 // on vectors in one instruction, so attempt to fold to zext instead.
13964 if (VT.isVector()) {
13965 // Try to simplify (zext (load x)).
13966 if (SDValue foldedExt =
13967 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13969 return foldedExt;
13970 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
13972 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13973 bool DoXform = true;
13975 if (!N0.hasOneUse())
13976 DoXform =
13977 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
13978 if (DoXform) {
13979 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13980 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
13981 LN0->getBasePtr(), N0.getValueType(),
13982 LN0->getMemOperand());
13983 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
13984 // If the load value is used only by N, replace it via CombineTo N.
13985 bool NoReplaceTrunc = N0.hasOneUse();
13986 CombineTo(N, ExtLoad);
13987 if (NoReplaceTrunc) {
13988 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13989 recursivelyDeleteUnusedNodes(LN0);
13990 } else {
13991 SDValue Trunc =
13992 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13993 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13994 }
13995 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13996 }
13997 }
13998
13999 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14000 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14001 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14002 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14003 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14004 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14005 ISD::LoadExtType ExtType = LN0->getExtensionType();
14006 EVT MemVT = LN0->getMemoryVT();
14007 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14008 SDValue ExtLoad =
14009 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14010 MemVT, LN0->getMemOperand());
14011 CombineTo(N, ExtLoad);
14012 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14013 recursivelyDeleteUnusedNodes(LN0);
14014 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14015 }
14016 }
14017
14018 if (N0.getOpcode() == ISD::SETCC) {
14019 // Propagate fast-math-flags.
14020 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14021
14022 // For vectors:
14023 // aext(setcc) -> vsetcc
14024 // aext(setcc) -> truncate(vsetcc)
14025 // aext(setcc) -> aext(vsetcc)
14026 // Only do this before legalize for now.
14027 if (VT.isVector() && !LegalOperations) {
14028 EVT N00VT = N0.getOperand(0).getValueType();
14029 if (getSetCCResultType(N00VT) == N0.getValueType())
14030 return SDValue();
14031
14032 // We know that the # elements of the results is the same as the
14033 // # elements of the compare (and the # elements of the compare result
14034 // for that matter). Check to see that they are the same size. If so,
14035 // we know that the element size of the sext'd result matches the
14036 // element size of the compare operands.
14037 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14038 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14039 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14040
14041 // If the desired elements are smaller or larger than the source
14042 // elements we can use a matching integer vector type and then
14043 // truncate/any extend
14044 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14045 SDValue VsetCC = DAG.getSetCC(
14046 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14047 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14048 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14049 }
14050
14051 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14052 if (SDValue SCC = SimplifySelectCC(
14053 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14054 DAG.getConstant(0, DL, VT),
14055 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14056 return SCC;
14057 }
14058
14059 if (SDValue NewCtPop = widenCtPop(N, DAG))
14060 return NewCtPop;
14061
14062 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14063 return Res;
14064
14065 return SDValue();
14066}
14067
14068SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14069 unsigned Opcode = N->getOpcode();
14070 SDValue N0 = N->getOperand(0);
14071 SDValue N1 = N->getOperand(1);
14072 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14073
14074 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14075 if (N0.getOpcode() == Opcode &&
14076 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14077 return N0;
14078
14079 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14080 N0.getOperand(0).getOpcode() == Opcode) {
14081 // We have an assert, truncate, assert sandwich. Make one stronger assert
14082 // by asserting on the smallest asserted type to the larger source type.
14083 // This eliminates the later assert:
14084 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14085 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14086 SDLoc DL(N);
14087 SDValue BigA = N0.getOperand(0);
14088 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14089 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14090 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14091 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14092 BigA.getOperand(0), MinAssertVTVal);
14093 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14094 }
14095
14096 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14097 // than X. Just move the AssertZext in front of the truncate and drop the
14098 // AssertSExt.
14099 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14101 Opcode == ISD::AssertZext) {
14102 SDValue BigA = N0.getOperand(0);
14103 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14104 if (AssertVT.bitsLT(BigA_AssertVT)) {
14105 SDLoc DL(N);
14106 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14107 BigA.getOperand(0), N1);
14108 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14109 }
14110 }
14111
14112 return SDValue();
14113}
14114
14115SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14116 SDLoc DL(N);
14117
14118 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14119 SDValue N0 = N->getOperand(0);
14120
14121 // Fold (assertalign (assertalign x, AL0), AL1) ->
14122 // (assertalign x, max(AL0, AL1))
14123 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14124 return DAG.getAssertAlign(DL, N0.getOperand(0),
14125 std::max(AL, AAN->getAlign()));
14126
14127 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14128 // this assert down to source operands so that those arithmetic ops could be
14129 // exposed to the DAG combining.
14130 switch (N0.getOpcode()) {
14131 default:
14132 break;
14133 case ISD::ADD:
14134 case ISD::SUB: {
14135 unsigned AlignShift = Log2(AL);
14136 SDValue LHS = N0.getOperand(0);
14137 SDValue RHS = N0.getOperand(1);
14138 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14139 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14140 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14141 if (LHSAlignShift < AlignShift)
14142 LHS = DAG.getAssertAlign(DL, LHS, AL);
14143 if (RHSAlignShift < AlignShift)
14144 RHS = DAG.getAssertAlign(DL, RHS, AL);
14145 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14146 }
14147 break;
14148 }
14149 }
14150
14151 return SDValue();
14152}
14153
14154/// If the result of a load is shifted/masked/truncated to an effectively
14155/// narrower type, try to transform the load to a narrower type and/or
14156/// use an extending load.
14157SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14158 unsigned Opc = N->getOpcode();
14159
14161 SDValue N0 = N->getOperand(0);
14162 EVT VT = N->getValueType(0);
14163 EVT ExtVT = VT;
14164
14165 // This transformation isn't valid for vector loads.
14166 if (VT.isVector())
14167 return SDValue();
14168
14169 // The ShAmt variable is used to indicate that we've consumed a right
14170 // shift. I.e. we want to narrow the width of the load by skipping to load the
14171 // ShAmt least significant bits.
14172 unsigned ShAmt = 0;
14173 // A special case is when the least significant bits from the load are masked
14174 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14175 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14176 // the result.
14177 unsigned ShiftedOffset = 0;
14178 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14179 // extended to VT.
14180 if (Opc == ISD::SIGN_EXTEND_INREG) {
14181 ExtType = ISD::SEXTLOAD;
14182 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14183 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14184 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14185 // value, or it may be shifting a higher subword, half or byte into the
14186 // lowest bits.
14187
14188 // Only handle shift with constant shift amount, and the shiftee must be a
14189 // load.
14190 auto *LN = dyn_cast<LoadSDNode>(N0);
14191 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14192 if (!N1C || !LN)
14193 return SDValue();
14194 // If the shift amount is larger than the memory type then we're not
14195 // accessing any of the loaded bytes.
14196 ShAmt = N1C->getZExtValue();
14197 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14198 if (MemoryWidth <= ShAmt)
14199 return SDValue();
14200 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14201 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14202 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14203 // If original load is a SEXTLOAD then we can't simply replace it by a
14204 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14205 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14206 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14207 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14208 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14209 LN->getExtensionType() != ExtType)
14210 return SDValue();
14211 } else if (Opc == ISD::AND) {
14212 // An AND with a constant mask is the same as a truncate + zero-extend.
14213 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14214 if (!AndC)
14215 return SDValue();
14216
14217 const APInt &Mask = AndC->getAPIntValue();
14218 unsigned ActiveBits = 0;
14219 if (Mask.isMask()) {
14220 ActiveBits = Mask.countr_one();
14221 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14222 ShiftedOffset = ShAmt;
14223 } else {
14224 return SDValue();
14225 }
14226
14227 ExtType = ISD::ZEXTLOAD;
14228 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14229 }
14230
14231 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14232 // a right shift. Here we redo some of those checks, to possibly adjust the
14233 // ExtVT even further based on "a masking AND". We could also end up here for
14234 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14235 // need to be done here as well.
14236 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14237 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14238 // Bail out when the SRL has more than one use. This is done for historical
14239 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14240 // check below? And maybe it could be non-profitable to do the transform in
14241 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14242 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14243 if (!SRL.hasOneUse())
14244 return SDValue();
14245
14246 // Only handle shift with constant shift amount, and the shiftee must be a
14247 // load.
14248 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14249 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14250 if (!SRL1C || !LN)
14251 return SDValue();
14252
14253 // If the shift amount is larger than the input type then we're not
14254 // accessing any of the loaded bytes. If the load was a zextload/extload
14255 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14256 ShAmt = SRL1C->getZExtValue();
14257 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14258 if (ShAmt >= MemoryWidth)
14259 return SDValue();
14260
14261 // Because a SRL must be assumed to *need* to zero-extend the high bits
14262 // (as opposed to anyext the high bits), we can't combine the zextload
14263 // lowering of SRL and an sextload.
14264 if (LN->getExtensionType() == ISD::SEXTLOAD)
14265 return SDValue();
14266
14267 // Avoid reading outside the memory accessed by the original load (could
14268 // happened if we only adjust the load base pointer by ShAmt). Instead we
14269 // try to narrow the load even further. The typical scenario here is:
14270 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14271 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14272 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14273 // Don't replace sextload by zextload.
14274 if (ExtType == ISD::SEXTLOAD)
14275 return SDValue();
14276 // Narrow the load.
14277 ExtType = ISD::ZEXTLOAD;
14278 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14279 }
14280
14281 // If the SRL is only used by a masking AND, we may be able to adjust
14282 // the ExtVT to make the AND redundant.
14283 SDNode *Mask = *(SRL->use_begin());
14284 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14285 isa<ConstantSDNode>(Mask->getOperand(1))) {
14286 unsigned Offset, ActiveBits;
14287 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14288 if (ShiftMask.isMask()) {
14289 EVT MaskedVT =
14290 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14291 // If the mask is smaller, recompute the type.
14292 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14293 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14294 ExtVT = MaskedVT;
14295 } else if (ExtType == ISD::ZEXTLOAD &&
14296 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14297 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14298 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14299 // If the mask is shifted we can use a narrower load and a shl to insert
14300 // the trailing zeros.
14301 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14302 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14303 ExtVT = MaskedVT;
14304 ShAmt = Offset + ShAmt;
14305 ShiftedOffset = Offset;
14306 }
14307 }
14308 }
14309
14310 N0 = SRL.getOperand(0);
14311 }
14312
14313 // If the load is shifted left (and the result isn't shifted back right), we
14314 // can fold a truncate through the shift. The typical scenario is that N
14315 // points at a TRUNCATE here so the attempted fold is:
14316 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14317 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14318 unsigned ShLeftAmt = 0;
14319 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14320 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14321 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14322 ShLeftAmt = N01->getZExtValue();
14323 N0 = N0.getOperand(0);
14324 }
14325 }
14326
14327 // If we haven't found a load, we can't narrow it.
14328 if (!isa<LoadSDNode>(N0))
14329 return SDValue();
14330
14331 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14332 // Reducing the width of a volatile load is illegal. For atomics, we may be
14333 // able to reduce the width provided we never widen again. (see D66309)
14334 if (!LN0->isSimple() ||
14335 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14336 return SDValue();
14337
14338 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14339 unsigned LVTStoreBits =
14341 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14342 return LVTStoreBits - EVTStoreBits - ShAmt;
14343 };
14344
14345 // We need to adjust the pointer to the load by ShAmt bits in order to load
14346 // the correct bytes.
14347 unsigned PtrAdjustmentInBits =
14348 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14349
14350 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14351 SDLoc DL(LN0);
14352 // The original load itself didn't wrap, so an offset within it doesn't.
14354 Flags.setNoUnsignedWrap(true);
14355 SDValue NewPtr = DAG.getMemBasePlusOffset(
14356 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14357 AddToWorklist(NewPtr.getNode());
14358
14359 SDValue Load;
14360 if (ExtType == ISD::NON_EXTLOAD)
14361 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14362 LN0->getPointerInfo().getWithOffset(PtrOff),
14363 LN0->getOriginalAlign(),
14364 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14365 else
14366 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14367 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14368 LN0->getOriginalAlign(),
14369 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14370
14371 // Replace the old load's chain with the new load's chain.
14372 WorklistRemover DeadNodes(*this);
14373 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14374
14375 // Shift the result left, if we've swallowed a left shift.
14377 if (ShLeftAmt != 0) {
14378 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14379 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14380 ShImmTy = VT;
14381 // If the shift amount is as large as the result size (but, presumably,
14382 // no larger than the source) then the useful bits of the result are
14383 // zero; we can't simply return the shortened shift, because the result
14384 // of that operation is undefined.
14385 if (ShLeftAmt >= VT.getScalarSizeInBits())
14386 Result = DAG.getConstant(0, DL, VT);
14387 else
14388 Result = DAG.getNode(ISD::SHL, DL, VT,
14389 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14390 }
14391
14392 if (ShiftedOffset != 0) {
14393 // We're using a shifted mask, so the load now has an offset. This means
14394 // that data has been loaded into the lower bytes than it would have been
14395 // before, so we need to shl the loaded data into the correct position in the
14396 // register.
14397 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14398 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14399 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14400 }
14401
14402 // Return the new loaded value.
14403 return Result;
14404}
14405
14406SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14407 SDValue N0 = N->getOperand(0);
14408 SDValue N1 = N->getOperand(1);
14409 EVT VT = N->getValueType(0);
14410 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14411 unsigned VTBits = VT.getScalarSizeInBits();
14412 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14413
14414 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14415 if (N0.isUndef())
14416 return DAG.getConstant(0, SDLoc(N), VT);
14417
14418 // fold (sext_in_reg c1) -> c1
14420 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14421
14422 // If the input is already sign extended, just drop the extension.
14423 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14424 return N0;
14425
14426 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14427 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14428 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14429 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14430 N1);
14431
14432 // fold (sext_in_reg (sext x)) -> (sext x)
14433 // fold (sext_in_reg (aext x)) -> (sext x)
14434 // if x is small enough or if we know that x has more than 1 sign bit and the
14435 // sign_extend_inreg is extending from one of them.
14436 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14437 SDValue N00 = N0.getOperand(0);
14438 unsigned N00Bits = N00.getScalarValueSizeInBits();
14439 if ((N00Bits <= ExtVTBits ||
14440 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14441 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14442 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14443 }
14444
14445 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14446 // if x is small enough or if we know that x has more than 1 sign bit and the
14447 // sign_extend_inreg is extending from one of them.
14449 SDValue N00 = N0.getOperand(0);
14450 unsigned N00Bits = N00.getScalarValueSizeInBits();
14451 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14452 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14453 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14454 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14455 if ((N00Bits == ExtVTBits ||
14456 (!IsZext && (N00Bits < ExtVTBits ||
14457 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14458 (!LegalOperations ||
14460 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14461 }
14462
14463 // fold (sext_in_reg (zext x)) -> (sext x)
14464 // iff we are extending the source sign bit.
14465 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14466 SDValue N00 = N0.getOperand(0);
14467 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14468 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14469 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14470 }
14471
14472 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14473 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14474 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14475
14476 // fold operands of sext_in_reg based on knowledge that the top bits are not
14477 // demanded.
14479 return SDValue(N, 0);
14480
14481 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14482 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14483 if (SDValue NarrowLoad = reduceLoadWidth(N))
14484 return NarrowLoad;
14485
14486 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14487 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14488 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14489 if (N0.getOpcode() == ISD::SRL) {
14490 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14491 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14492 // We can turn this into an SRA iff the input to the SRL is already sign
14493 // extended enough.
14494 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14495 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14496 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14497 N0.getOperand(1));
14498 }
14499 }
14500
14501 // fold (sext_inreg (extload x)) -> (sextload x)
14502 // If sextload is not supported by target, we can only do the combine when
14503 // load has one use. Doing otherwise can block folding the extload with other
14504 // extends that the target does support.
14505 if (ISD::isEXTLoad(N0.getNode()) &&
14507 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14508 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14509 N0.hasOneUse()) ||
14510 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14511 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14512 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14513 LN0->getChain(),
14514 LN0->getBasePtr(), ExtVT,
14515 LN0->getMemOperand());
14516 CombineTo(N, ExtLoad);
14517 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14518 AddToWorklist(ExtLoad.getNode());
14519 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14520 }
14521
14522 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14524 N0.hasOneUse() &&
14525 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14526 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14527 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14528 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14529 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14530 LN0->getChain(),
14531 LN0->getBasePtr(), ExtVT,
14532 LN0->getMemOperand());
14533 CombineTo(N, ExtLoad);
14534 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14535 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14536 }
14537
14538 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14539 // ignore it if the masked load is already sign extended
14540 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14541 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14542 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14543 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14544 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14545 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14546 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14547 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14548 CombineTo(N, ExtMaskedLoad);
14549 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14550 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14551 }
14552 }
14553
14554 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14555 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14556 if (SDValue(GN0, 0).hasOneUse() &&
14557 ExtVT == GN0->getMemoryVT() &&
14559 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14560 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14561
14562 SDValue ExtLoad = DAG.getMaskedGather(
14563 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14564 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14565
14566 CombineTo(N, ExtLoad);
14567 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14568 AddToWorklist(ExtLoad.getNode());
14569 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14570 }
14571 }
14572
14573 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14574 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14575 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14576 N0.getOperand(1), false))
14577 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14578 }
14579
14580 // Fold (iM_signext_inreg
14581 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14582 // from iN)
14583 // -> (extract_subvector (signext iN_v to iM))
14584 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14586 SDValue InnerExt = N0.getOperand(0);
14587 EVT InnerExtVT = InnerExt->getValueType(0);
14588 SDValue Extendee = InnerExt->getOperand(0);
14589
14590 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14591 (!LegalOperations ||
14592 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14593 SDValue SignExtExtendee =
14594 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14595 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14596 N0.getOperand(1));
14597 }
14598 }
14599
14600 return SDValue();
14601}
14602
14604 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14605 bool LegalOperations) {
14606 unsigned InregOpcode = N->getOpcode();
14607 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14608
14609 SDValue Src = N->getOperand(0);
14610 EVT VT = N->getValueType(0);
14611 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14612 Src.getValueType().getVectorElementType(),
14614
14615 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14616 "Expected EXTEND_VECTOR_INREG dag node in input!");
14617
14618 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14619 // FIXME: one-use check may be overly restrictive
14620 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14621 return SDValue();
14622
14623 // Profitability check: we must be extending exactly one of it's operands.
14624 // FIXME: this is probably overly restrictive.
14625 Src = Src.getOperand(0);
14626 if (Src.getValueType() != SrcVT)
14627 return SDValue();
14628
14629 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14630 return SDValue();
14631
14632 return DAG.getNode(Opcode, DL, VT, Src);
14633}
14634
14635SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14636 SDValue N0 = N->getOperand(0);
14637 EVT VT = N->getValueType(0);
14638 SDLoc DL(N);
14639
14640 if (N0.isUndef()) {
14641 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14642 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14643 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14644 ? DAG.getUNDEF(VT)
14645 : DAG.getConstant(0, DL, VT);
14646 }
14647
14648 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14649 return Res;
14650
14652 return SDValue(N, 0);
14653
14655 LegalOperations))
14656 return R;
14657
14658 return SDValue();
14659}
14660
14661SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14662 SDValue N0 = N->getOperand(0);
14663 EVT VT = N->getValueType(0);
14664 EVT SrcVT = N0.getValueType();
14665 bool isLE = DAG.getDataLayout().isLittleEndian();
14666 SDLoc DL(N);
14667
14668 // trunc(undef) = undef
14669 if (N0.isUndef())
14670 return DAG.getUNDEF(VT);
14671
14672 // fold (truncate (truncate x)) -> (truncate x)
14673 if (N0.getOpcode() == ISD::TRUNCATE)
14674 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14675
14676 // fold (truncate c1) -> c1
14677 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14678 return C;
14679
14680 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14681 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14682 N0.getOpcode() == ISD::SIGN_EXTEND ||
14683 N0.getOpcode() == ISD::ANY_EXTEND) {
14684 // if the source is smaller than the dest, we still need an extend.
14685 if (N0.getOperand(0).getValueType().bitsLT(VT))
14686 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14687 // if the source is larger than the dest, than we just need the truncate.
14688 if (N0.getOperand(0).getValueType().bitsGT(VT))
14689 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14690 // if the source and dest are the same type, we can drop both the extend
14691 // and the truncate.
14692 return N0.getOperand(0);
14693 }
14694
14695 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14696 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14697 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14698 N0.hasOneUse()) {
14699 SDValue X = N0.getOperand(0);
14700 SDValue ExtVal = N0.getOperand(1);
14701 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14702 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14703 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14704 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14705 }
14706 }
14707
14708 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14709 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14710 return SDValue();
14711
14712 // Fold extract-and-trunc into a narrow extract. For example:
14713 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14714 // i32 y = TRUNCATE(i64 x)
14715 // -- becomes --
14716 // v16i8 b = BITCAST (v2i64 val)
14717 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14718 //
14719 // Note: We only run this optimization after type legalization (which often
14720 // creates this pattern) and before operation legalization after which
14721 // we need to be more careful about the vector instructions that we generate.
14722 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14723 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14724 EVT VecTy = N0.getOperand(0).getValueType();
14725 EVT ExTy = N0.getValueType();
14726 EVT TrTy = N->getValueType(0);
14727
14728 auto EltCnt = VecTy.getVectorElementCount();
14729 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14730 auto NewEltCnt = EltCnt * SizeRatio;
14731
14732 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14733 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14734
14735 SDValue EltNo = N0->getOperand(1);
14736 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14737 int Elt = EltNo->getAsZExtVal();
14738 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14739 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14740 DAG.getBitcast(NVT, N0.getOperand(0)),
14742 }
14743 }
14744
14745 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14746 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14747 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14748 TLI.isTruncateFree(SrcVT, VT)) {
14749 SDLoc SL(N0);
14750 SDValue Cond = N0.getOperand(0);
14751 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14752 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14753 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
14754 }
14755 }
14756
14757 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14758 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14759 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14760 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14761 SDValue Amt = N0.getOperand(1);
14762 KnownBits Known = DAG.computeKnownBits(Amt);
14763 unsigned Size = VT.getScalarSizeInBits();
14764 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14765 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14766 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14767 if (AmtVT != Amt.getValueType()) {
14768 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
14769 AddToWorklist(Amt.getNode());
14770 }
14771 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
14772 }
14773 }
14774
14775 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
14776 return V;
14777
14778 if (SDValue ABD = foldABSToABD(N, DL))
14779 return ABD;
14780
14781 // Attempt to pre-truncate BUILD_VECTOR sources.
14782 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14783 N0.hasOneUse() &&
14784 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14785 // Avoid creating illegal types if running after type legalizer.
14786 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14787 EVT SVT = VT.getScalarType();
14788 SmallVector<SDValue, 8> TruncOps;
14789 for (const SDValue &Op : N0->op_values()) {
14790 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14791 TruncOps.push_back(TruncOp);
14792 }
14793 return DAG.getBuildVector(VT, DL, TruncOps);
14794 }
14795
14796 // trunc (splat_vector x) -> splat_vector (trunc x)
14797 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14798 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14799 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14800 EVT SVT = VT.getScalarType();
14801 return DAG.getSplatVector(
14802 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14803 }
14804
14805 // Fold a series of buildvector, bitcast, and truncate if possible.
14806 // For example fold
14807 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14808 // (2xi32 (buildvector x, y)).
14809 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14810 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14812 N0.getOperand(0).hasOneUse()) {
14813 SDValue BuildVect = N0.getOperand(0);
14814 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14815 EVT TruncVecEltTy = VT.getVectorElementType();
14816
14817 // Check that the element types match.
14818 if (BuildVectEltTy == TruncVecEltTy) {
14819 // Now we only need to compute the offset of the truncated elements.
14820 unsigned BuildVecNumElts = BuildVect.getNumOperands();
14821 unsigned TruncVecNumElts = VT.getVectorNumElements();
14822 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14823
14824 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
14825 "Invalid number of elements");
14826
14828 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14829 Opnds.push_back(BuildVect.getOperand(i));
14830
14831 return DAG.getBuildVector(VT, DL, Opnds);
14832 }
14833 }
14834
14835 // fold (truncate (load x)) -> (smaller load x)
14836 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14837 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14838 if (SDValue Reduced = reduceLoadWidth(N))
14839 return Reduced;
14840
14841 // Handle the case where the truncated result is at least as wide as the
14842 // loaded type.
14843 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14844 auto *LN0 = cast<LoadSDNode>(N0);
14845 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
14846 SDValue NewLoad = DAG.getExtLoad(
14847 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
14848 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
14849 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14850 return NewLoad;
14851 }
14852 }
14853 }
14854
14855 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14856 // where ... are all 'undef'.
14857 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14859 SDValue V;
14860 unsigned Idx = 0;
14861 unsigned NumDefs = 0;
14862
14863 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14864 SDValue X = N0.getOperand(i);
14865 if (!X.isUndef()) {
14866 V = X;
14867 Idx = i;
14868 NumDefs++;
14869 }
14870 // Stop if more than one members are non-undef.
14871 if (NumDefs > 1)
14872 break;
14873
14876 X.getValueType().getVectorElementCount()));
14877 }
14878
14879 if (NumDefs == 0)
14880 return DAG.getUNDEF(VT);
14881
14882 if (NumDefs == 1) {
14883 assert(V.getNode() && "The single defined operand is empty!");
14885 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14886 if (i != Idx) {
14887 Opnds.push_back(DAG.getUNDEF(VTs[i]));
14888 continue;
14889 }
14890 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14891 AddToWorklist(NV.getNode());
14892 Opnds.push_back(NV);
14893 }
14894 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
14895 }
14896 }
14897
14898 // Fold truncate of a bitcast of a vector to an extract of the low vector
14899 // element.
14900 //
14901 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14902 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14903 SDValue VecSrc = N0.getOperand(0);
14904 EVT VecSrcVT = VecSrc.getValueType();
14905 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14906 (!LegalOperations ||
14907 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14908 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14909 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
14911 }
14912 }
14913
14914 // Simplify the operands using demanded-bits information.
14916 return SDValue(N, 0);
14917
14918 // fold (truncate (extract_subvector(ext x))) ->
14919 // (extract_subvector x)
14920 // TODO: This can be generalized to cover cases where the truncate and extract
14921 // do not fully cancel each other out.
14922 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
14923 SDValue N00 = N0.getOperand(0);
14924 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
14925 N00.getOpcode() == ISD::ZERO_EXTEND ||
14926 N00.getOpcode() == ISD::ANY_EXTEND) {
14927 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
14929 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
14930 N00.getOperand(0), N0.getOperand(1));
14931 }
14932 }
14933
14934 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14935 return NewVSel;
14936
14937 // Narrow a suitable binary operation with a non-opaque constant operand by
14938 // moving it ahead of the truncate. This is limited to pre-legalization
14939 // because targets may prefer a wider type during later combines and invert
14940 // this transform.
14941 switch (N0.getOpcode()) {
14942 case ISD::ADD:
14943 case ISD::SUB:
14944 case ISD::MUL:
14945 case ISD::AND:
14946 case ISD::OR:
14947 case ISD::XOR:
14948 if (!LegalOperations && N0.hasOneUse() &&
14949 (isConstantOrConstantVector(N0.getOperand(0), true) ||
14950 isConstantOrConstantVector(N0.getOperand(1), true))) {
14951 // TODO: We already restricted this to pre-legalization, but for vectors
14952 // we are extra cautious to not create an unsupported operation.
14953 // Target-specific changes are likely needed to avoid regressions here.
14954 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
14955 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14956 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14957 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
14958 }
14959 }
14960 break;
14961 case ISD::ADDE:
14962 case ISD::UADDO_CARRY:
14963 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
14964 // (trunc uaddo_carry(X, Y, Carry)) ->
14965 // (uaddo_carry trunc(X), trunc(Y), Carry)
14966 // When the adde's carry is not used.
14967 // We only do for uaddo_carry before legalize operation
14968 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
14969 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
14970 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
14971 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14972 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14973 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
14974 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
14975 }
14976 break;
14977 case ISD::USUBSAT:
14978 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
14979 // enough to know that the upper bits are zero we must ensure that we don't
14980 // introduce an extra truncate.
14981 if (!LegalOperations && N0.hasOneUse() &&
14984 VT.getScalarSizeInBits() &&
14985 hasOperation(N0.getOpcode(), VT)) {
14986 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
14987 DAG, DL);
14988 }
14989 break;
14990 }
14991
14992 return SDValue();
14993}
14994
14995static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
14996 SDValue Elt = N->getOperand(i);
14997 if (Elt.getOpcode() != ISD::MERGE_VALUES)
14998 return Elt.getNode();
14999 return Elt.getOperand(Elt.getResNo()).getNode();
15000}
15001
15002/// build_pair (load, load) -> load
15003/// if load locations are consecutive.
15004SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15005 assert(N->getOpcode() == ISD::BUILD_PAIR);
15006
15007 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15008 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15009
15010 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15011 // most significant part in elt 1. So when combining into one large load, we
15012 // need to consider the endianness.
15013 if (DAG.getDataLayout().isBigEndian())
15014 std::swap(LD1, LD2);
15015
15016 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15017 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15018 LD1->getAddressSpace() != LD2->getAddressSpace())
15019 return SDValue();
15020
15021 unsigned LD1Fast = 0;
15022 EVT LD1VT = LD1->getValueType(0);
15023 unsigned LD1Bytes = LD1VT.getStoreSize();
15024 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15025 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15026 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15027 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15028 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15029 LD1->getPointerInfo(), LD1->getAlign());
15030
15031 return SDValue();
15032}
15033
15034static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15035 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15036 // and Lo parts; on big-endian machines it doesn't.
15037 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15038}
15039
15040SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15041 const TargetLowering &TLI) {
15042 // If this is not a bitcast to an FP type or if the target doesn't have
15043 // IEEE754-compliant FP logic, we're done.
15044 EVT VT = N->getValueType(0);
15045 SDValue N0 = N->getOperand(0);
15046 EVT SourceVT = N0.getValueType();
15047
15048 if (!VT.isFloatingPoint())
15049 return SDValue();
15050
15051 // TODO: Handle cases where the integer constant is a different scalar
15052 // bitwidth to the FP.
15053 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15054 return SDValue();
15055
15056 unsigned FPOpcode;
15057 APInt SignMask;
15058 switch (N0.getOpcode()) {
15059 case ISD::AND:
15060 FPOpcode = ISD::FABS;
15061 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15062 break;
15063 case ISD::XOR:
15064 FPOpcode = ISD::FNEG;
15065 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15066 break;
15067 case ISD::OR:
15068 FPOpcode = ISD::FABS;
15069 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15070 break;
15071 default:
15072 return SDValue();
15073 }
15074
15075 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15076 return SDValue();
15077
15078 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15079 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15080 // removing this would require more changes.
15081 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15082 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15083 return true;
15084
15085 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15086 };
15087
15088 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15089 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15090 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15091 // fneg (fabs X)
15092 SDValue LogicOp0 = N0.getOperand(0);
15093 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15094 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15095 IsBitCastOrFree(LogicOp0, VT)) {
15096 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15097 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15098 NumFPLogicOpsConv++;
15099 if (N0.getOpcode() == ISD::OR)
15100 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15101 return FPOp;
15102 }
15103
15104 return SDValue();
15105}
15106
15107SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15108 SDValue N0 = N->getOperand(0);
15109 EVT VT = N->getValueType(0);
15110
15111 if (N0.isUndef())
15112 return DAG.getUNDEF(VT);
15113
15114 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15115 // Only do this before legalize types, unless both types are integer and the
15116 // scalar type is legal. Only do this before legalize ops, since the target
15117 // maybe depending on the bitcast.
15118 // First check to see if this is all constant.
15119 // TODO: Support FP bitcasts after legalize types.
15120 if (VT.isVector() &&
15121 (!LegalTypes ||
15122 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15123 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15124 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15125 cast<BuildVectorSDNode>(N0)->isConstant())
15126 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15128
15129 // If the input is a constant, let getNode fold it.
15130 if (isIntOrFPConstant(N0)) {
15131 // If we can't allow illegal operations, we need to check that this is just
15132 // a fp -> int or int -> conversion and that the resulting operation will
15133 // be legal.
15134 if (!LegalOperations ||
15135 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15137 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15138 TLI.isOperationLegal(ISD::Constant, VT))) {
15139 SDValue C = DAG.getBitcast(VT, N0);
15140 if (C.getNode() != N)
15141 return C;
15142 }
15143 }
15144
15145 // (conv (conv x, t1), t2) -> (conv x, t2)
15146 if (N0.getOpcode() == ISD::BITCAST)
15147 return DAG.getBitcast(VT, N0.getOperand(0));
15148
15149 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15150 // iff the current bitwise logicop type isn't legal
15151 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15152 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15153 auto IsFreeBitcast = [VT](SDValue V) {
15154 return (V.getOpcode() == ISD::BITCAST &&
15155 V.getOperand(0).getValueType() == VT) ||
15157 V->hasOneUse());
15158 };
15159 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15160 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15161 DAG.getBitcast(VT, N0.getOperand(0)),
15162 DAG.getBitcast(VT, N0.getOperand(1)));
15163 }
15164
15165 // fold (conv (load x)) -> (load (conv*)x)
15166 // If the resultant load doesn't need a higher alignment than the original!
15167 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15168 // Do not remove the cast if the types differ in endian layout.
15170 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15171 // If the load is volatile, we only want to change the load type if the
15172 // resulting load is legal. Otherwise we might increase the number of
15173 // memory accesses. We don't care if the original type was legal or not
15174 // as we assume software couldn't rely on the number of accesses of an
15175 // illegal type.
15176 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15177 TLI.isOperationLegal(ISD::LOAD, VT))) {
15178 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15179
15180 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15181 *LN0->getMemOperand())) {
15182 SDValue Load =
15183 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15184 LN0->getMemOperand());
15185 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15186 return Load;
15187 }
15188 }
15189
15190 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15191 return V;
15192
15193 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15194 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15195 //
15196 // For ppc_fp128:
15197 // fold (bitcast (fneg x)) ->
15198 // flipbit = signbit
15199 // (xor (bitcast x) (build_pair flipbit, flipbit))
15200 //
15201 // fold (bitcast (fabs x)) ->
15202 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15203 // (xor (bitcast x) (build_pair flipbit, flipbit))
15204 // This often reduces constant pool loads.
15205 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15206 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15207 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15208 !N0.getValueType().isVector()) {
15209 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15210 AddToWorklist(NewConv.getNode());
15211
15212 SDLoc DL(N);
15213 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15214 assert(VT.getSizeInBits() == 128);
15215 SDValue SignBit = DAG.getConstant(
15216 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15217 SDValue FlipBit;
15218 if (N0.getOpcode() == ISD::FNEG) {
15219 FlipBit = SignBit;
15220 AddToWorklist(FlipBit.getNode());
15221 } else {
15222 assert(N0.getOpcode() == ISD::FABS);
15223 SDValue Hi =
15224 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15226 SDLoc(NewConv)));
15227 AddToWorklist(Hi.getNode());
15228 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15229 AddToWorklist(FlipBit.getNode());
15230 }
15231 SDValue FlipBits =
15232 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15233 AddToWorklist(FlipBits.getNode());
15234 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15235 }
15236 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15237 if (N0.getOpcode() == ISD::FNEG)
15238 return DAG.getNode(ISD::XOR, DL, VT,
15239 NewConv, DAG.getConstant(SignBit, DL, VT));
15240 assert(N0.getOpcode() == ISD::FABS);
15241 return DAG.getNode(ISD::AND, DL, VT,
15242 NewConv, DAG.getConstant(~SignBit, DL, VT));
15243 }
15244
15245 // fold (bitconvert (fcopysign cst, x)) ->
15246 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15247 // Note that we don't handle (copysign x, cst) because this can always be
15248 // folded to an fneg or fabs.
15249 //
15250 // For ppc_fp128:
15251 // fold (bitcast (fcopysign cst, x)) ->
15252 // flipbit = (and (extract_element
15253 // (xor (bitcast cst), (bitcast x)), 0),
15254 // signbit)
15255 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15256 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15257 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15258 !VT.isVector()) {
15259 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15260 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15261 if (isTypeLegal(IntXVT)) {
15262 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15263 AddToWorklist(X.getNode());
15264
15265 // If X has a different width than the result/lhs, sext it or truncate it.
15266 unsigned VTWidth = VT.getSizeInBits();
15267 if (OrigXWidth < VTWidth) {
15268 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15269 AddToWorklist(X.getNode());
15270 } else if (OrigXWidth > VTWidth) {
15271 // To get the sign bit in the right place, we have to shift it right
15272 // before truncating.
15273 SDLoc DL(X);
15274 X = DAG.getNode(ISD::SRL, DL,
15275 X.getValueType(), X,
15276 DAG.getConstant(OrigXWidth-VTWidth, DL,
15277 X.getValueType()));
15278 AddToWorklist(X.getNode());
15279 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15280 AddToWorklist(X.getNode());
15281 }
15282
15283 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15284 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15285 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15286 AddToWorklist(Cst.getNode());
15287 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15288 AddToWorklist(X.getNode());
15289 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15290 AddToWorklist(XorResult.getNode());
15291 SDValue XorResult64 = DAG.getNode(
15292 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15294 SDLoc(XorResult)));
15295 AddToWorklist(XorResult64.getNode());
15296 SDValue FlipBit =
15297 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15298 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15299 AddToWorklist(FlipBit.getNode());
15300 SDValue FlipBits =
15301 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15302 AddToWorklist(FlipBits.getNode());
15303 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15304 }
15305 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15306 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15307 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15308 AddToWorklist(X.getNode());
15309
15310 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15311 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15312 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15313 AddToWorklist(Cst.getNode());
15314
15315 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15316 }
15317 }
15318
15319 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15320 if (N0.getOpcode() == ISD::BUILD_PAIR)
15321 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15322 return CombineLD;
15323
15324 // Remove double bitcasts from shuffles - this is often a legacy of
15325 // XformToShuffleWithZero being used to combine bitmaskings (of
15326 // float vectors bitcast to integer vectors) into shuffles.
15327 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15328 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15329 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15332 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15333
15334 // If operands are a bitcast, peek through if it casts the original VT.
15335 // If operands are a constant, just bitcast back to original VT.
15336 auto PeekThroughBitcast = [&](SDValue Op) {
15337 if (Op.getOpcode() == ISD::BITCAST &&
15338 Op.getOperand(0).getValueType() == VT)
15339 return SDValue(Op.getOperand(0));
15340 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15341 return DAG.getBitcast(VT, Op);
15342 return SDValue();
15343 };
15344
15345 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15346 // the result type of this bitcast. This would eliminate at least one
15347 // bitcast. See the transform in InstCombine.
15348 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15349 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15350 if (!(SV0 && SV1))
15351 return SDValue();
15352
15353 int MaskScale =
15355 SmallVector<int, 8> NewMask;
15356 for (int M : SVN->getMask())
15357 for (int i = 0; i != MaskScale; ++i)
15358 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15359
15360 SDValue LegalShuffle =
15361 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15362 if (LegalShuffle)
15363 return LegalShuffle;
15364 }
15365
15366 return SDValue();
15367}
15368
15369SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15370 EVT VT = N->getValueType(0);
15371 return CombineConsecutiveLoads(N, VT);
15372}
15373
15374SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15375 SDValue N0 = N->getOperand(0);
15376
15377 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15378 return N0;
15379
15380 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15381 // Try to push freeze through instructions that propagate but don't produce
15382 // poison as far as possible. If an operand of freeze follows three
15383 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15384 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15385 // the freeze through to the operands that are not guaranteed non-poison.
15386 // NOTE: we will strip poison-generating flags, so ignore them here.
15387 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15388 /*ConsiderFlags*/ false) ||
15389 N0->getNumValues() != 1 || !N0->hasOneUse())
15390 return SDValue();
15391
15392 bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
15393 N0.getOpcode() == ISD::BUILD_PAIR ||
15395
15396 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15397 for (SDValue Op : N0->ops()) {
15398 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15399 /*Depth*/ 1))
15400 continue;
15401 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15402 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15403 if (!HadMaybePoisonOperands)
15404 continue;
15405 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15406 // Multiple maybe-poison ops when not allowed - bail out.
15407 return SDValue();
15408 }
15409 }
15410 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15411 // it could create undef or poison due to it's poison-generating flags.
15412 // So not finding any maybe-poison operands is fine.
15413
15414 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15415 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15416 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15417 continue;
15418 // First, freeze each offending operand.
15419 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15420 // Then, change all other uses of unfrozen operand to use frozen operand.
15421 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15422 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15423 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15424 // But, that also updated the use in the freeze we just created, thus
15425 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15426 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15427 MaybePoisonOperand);
15428 }
15429 }
15430
15431 // This node has been merged with another.
15432 if (N->getOpcode() == ISD::DELETED_NODE)
15433 return SDValue(N, 0);
15434
15435 // The whole node may have been updated, so the value we were holding
15436 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15437 N0 = N->getOperand(0);
15438
15439 // Finally, recreate the node, it's operands were updated to use
15440 // frozen operands, so we just need to use it's "original" operands.
15441 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15442 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15443 for (SDValue &Op : Ops) {
15444 if (Op.getOpcode() == ISD::UNDEF)
15445 Op = DAG.getFreeze(Op);
15446 }
15447 // NOTE: this strips poison generating flags.
15448 SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15449 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15450 "Can't create node that may be undef/poison!");
15451 return R;
15452}
15453
15454/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15455/// operands. DstEltVT indicates the destination element value type.
15456SDValue DAGCombiner::
15457ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15458 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15459
15460 // If this is already the right type, we're done.
15461 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15462
15463 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15464 unsigned DstBitSize = DstEltVT.getSizeInBits();
15465
15466 // If this is a conversion of N elements of one type to N elements of another
15467 // type, convert each element. This handles FP<->INT cases.
15468 if (SrcBitSize == DstBitSize) {
15470 for (SDValue Op : BV->op_values()) {
15471 // If the vector element type is not legal, the BUILD_VECTOR operands
15472 // are promoted and implicitly truncated. Make that explicit here.
15473 if (Op.getValueType() != SrcEltVT)
15474 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15475 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15476 AddToWorklist(Ops.back().getNode());
15477 }
15478 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15480 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15481 }
15482
15483 // Otherwise, we're growing or shrinking the elements. To avoid having to
15484 // handle annoying details of growing/shrinking FP values, we convert them to
15485 // int first.
15486 if (SrcEltVT.isFloatingPoint()) {
15487 // Convert the input float vector to a int vector where the elements are the
15488 // same sizes.
15489 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15490 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15491 SrcEltVT = IntVT;
15492 }
15493
15494 // Now we know the input is an integer vector. If the output is a FP type,
15495 // convert to integer first, then to FP of the right size.
15496 if (DstEltVT.isFloatingPoint()) {
15497 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15498 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15499
15500 // Next, convert to FP elements of the same size.
15501 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15502 }
15503
15504 // Okay, we know the src/dst types are both integers of differing types.
15505 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15506
15507 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15508 // BuildVectorSDNode?
15509 auto *BVN = cast<BuildVectorSDNode>(BV);
15510
15511 // Extract the constant raw bit data.
15512 BitVector UndefElements;
15513 SmallVector<APInt> RawBits;
15514 bool IsLE = DAG.getDataLayout().isLittleEndian();
15515 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15516 return SDValue();
15517
15518 SDLoc DL(BV);
15520 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15521 if (UndefElements[I])
15522 Ops.push_back(DAG.getUNDEF(DstEltVT));
15523 else
15524 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15525 }
15526
15527 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15528 return DAG.getBuildVector(VT, DL, Ops);
15529}
15530
15531// Returns true if floating point contraction is allowed on the FMUL-SDValue
15532// `N`
15534 assert(N.getOpcode() == ISD::FMUL);
15535
15536 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15537 N->getFlags().hasAllowContract();
15538}
15539
15540// Returns true if `N` can assume no infinities involved in its computation.
15542 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15543}
15544
15545/// Try to perform FMA combining on a given FADD node.
15546template <class MatchContextClass>
15547SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15548 SDValue N0 = N->getOperand(0);
15549 SDValue N1 = N->getOperand(1);
15550 EVT VT = N->getValueType(0);
15551 SDLoc SL(N);
15552 MatchContextClass matcher(DAG, TLI, N);
15553 const TargetOptions &Options = DAG.getTarget().Options;
15554
15555 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15556
15557 // Floating-point multiply-add with intermediate rounding.
15558 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15559 // FIXME: Add VP_FMAD opcode.
15560 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15561
15562 // Floating-point multiply-add without intermediate rounding.
15563 bool HasFMA =
15565 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15566
15567 // No valid opcode, do not combine.
15568 if (!HasFMAD && !HasFMA)
15569 return SDValue();
15570
15571 bool CanReassociate =
15572 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15573 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15574 Options.UnsafeFPMath || HasFMAD);
15575 // If the addition is not contractable, do not combine.
15576 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15577 return SDValue();
15578
15579 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15580 // beneficial. It does not reduce latency. It increases register pressure. It
15581 // replaces an fadd with an fma which is a more complex instruction, so is
15582 // likely to have a larger encoding, use more functional units, etc.
15583 if (N0 == N1)
15584 return SDValue();
15585
15586 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15587 return SDValue();
15588
15589 // Always prefer FMAD to FMA for precision.
15590 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15592
15593 auto isFusedOp = [&](SDValue N) {
15594 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15595 };
15596
15597 // Is the node an FMUL and contractable either due to global flags or
15598 // SDNodeFlags.
15599 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15600 if (!matcher.match(N, ISD::FMUL))
15601 return false;
15602 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15603 };
15604 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15605 // prefer to fold the multiply with fewer uses.
15607 if (N0->use_size() > N1->use_size())
15608 std::swap(N0, N1);
15609 }
15610
15611 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15612 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15613 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15614 N0.getOperand(1), N1);
15615 }
15616
15617 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15618 // Note: Commutes FADD operands.
15619 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15620 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15621 N1.getOperand(1), N0);
15622 }
15623
15624 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15625 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15626 // This also works with nested fma instructions:
15627 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15628 // fma A, B, (fma C, D, fma (E, F, G))
15629 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15630 // fma A, B, (fma C, D, fma (E, F, G)).
15631 // This requires reassociation because it changes the order of operations.
15632 if (CanReassociate) {
15633 SDValue FMA, E;
15634 if (isFusedOp(N0) && N0.hasOneUse()) {
15635 FMA = N0;
15636 E = N1;
15637 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15638 FMA = N1;
15639 E = N0;
15640 }
15641
15642 SDValue TmpFMA = FMA;
15643 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15644 SDValue FMul = TmpFMA->getOperand(2);
15645 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15646 SDValue C = FMul.getOperand(0);
15647 SDValue D = FMul.getOperand(1);
15648 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15650 // Replacing the inner FMul could cause the outer FMA to be simplified
15651 // away.
15652 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15653 }
15654
15655 TmpFMA = TmpFMA->getOperand(2);
15656 }
15657 }
15658
15659 // Look through FP_EXTEND nodes to do more combining.
15660
15661 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15662 if (matcher.match(N0, ISD::FP_EXTEND)) {
15663 SDValue N00 = N0.getOperand(0);
15664 if (isContractableFMUL(N00) &&
15665 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15666 N00.getValueType())) {
15667 return matcher.getNode(
15668 PreferredFusedOpcode, SL, VT,
15669 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15670 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15671 }
15672 }
15673
15674 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15675 // Note: Commutes FADD operands.
15676 if (matcher.match(N1, ISD::FP_EXTEND)) {
15677 SDValue N10 = N1.getOperand(0);
15678 if (isContractableFMUL(N10) &&
15679 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15680 N10.getValueType())) {
15681 return matcher.getNode(
15682 PreferredFusedOpcode, SL, VT,
15683 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15684 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15685 }
15686 }
15687
15688 // More folding opportunities when target permits.
15689 if (Aggressive) {
15690 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15691 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15692 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15693 SDValue Z) {
15694 return matcher.getNode(
15695 PreferredFusedOpcode, SL, VT, X, Y,
15696 matcher.getNode(PreferredFusedOpcode, SL, VT,
15697 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15698 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15699 };
15700 if (isFusedOp(N0)) {
15701 SDValue N02 = N0.getOperand(2);
15702 if (matcher.match(N02, ISD::FP_EXTEND)) {
15703 SDValue N020 = N02.getOperand(0);
15704 if (isContractableFMUL(N020) &&
15705 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15706 N020.getValueType())) {
15707 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15708 N020.getOperand(0), N020.getOperand(1),
15709 N1);
15710 }
15711 }
15712 }
15713
15714 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15715 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15716 // FIXME: This turns two single-precision and one double-precision
15717 // operation into two double-precision operations, which might not be
15718 // interesting for all targets, especially GPUs.
15719 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15720 SDValue Z) {
15721 return matcher.getNode(
15722 PreferredFusedOpcode, SL, VT,
15723 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15724 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15725 matcher.getNode(PreferredFusedOpcode, SL, VT,
15726 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15727 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15728 };
15729 if (N0.getOpcode() == ISD::FP_EXTEND) {
15730 SDValue N00 = N0.getOperand(0);
15731 if (isFusedOp(N00)) {
15732 SDValue N002 = N00.getOperand(2);
15733 if (isContractableFMUL(N002) &&
15734 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15735 N00.getValueType())) {
15736 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15737 N002.getOperand(0), N002.getOperand(1),
15738 N1);
15739 }
15740 }
15741 }
15742
15743 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15744 // -> (fma y, z, (fma (fpext u), (fpext v), x))
15745 if (isFusedOp(N1)) {
15746 SDValue N12 = N1.getOperand(2);
15747 if (N12.getOpcode() == ISD::FP_EXTEND) {
15748 SDValue N120 = N12.getOperand(0);
15749 if (isContractableFMUL(N120) &&
15750 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15751 N120.getValueType())) {
15752 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15753 N120.getOperand(0), N120.getOperand(1),
15754 N0);
15755 }
15756 }
15757 }
15758
15759 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15760 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15761 // FIXME: This turns two single-precision and one double-precision
15762 // operation into two double-precision operations, which might not be
15763 // interesting for all targets, especially GPUs.
15764 if (N1.getOpcode() == ISD::FP_EXTEND) {
15765 SDValue N10 = N1.getOperand(0);
15766 if (isFusedOp(N10)) {
15767 SDValue N102 = N10.getOperand(2);
15768 if (isContractableFMUL(N102) &&
15769 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15770 N10.getValueType())) {
15771 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15772 N102.getOperand(0), N102.getOperand(1),
15773 N0);
15774 }
15775 }
15776 }
15777 }
15778
15779 return SDValue();
15780}
15781
15782/// Try to perform FMA combining on a given FSUB node.
15783template <class MatchContextClass>
15784SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
15785 SDValue N0 = N->getOperand(0);
15786 SDValue N1 = N->getOperand(1);
15787 EVT VT = N->getValueType(0);
15788 SDLoc SL(N);
15789 MatchContextClass matcher(DAG, TLI, N);
15790 const TargetOptions &Options = DAG.getTarget().Options;
15791
15792 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15793
15794 // Floating-point multiply-add with intermediate rounding.
15795 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15796 // FIXME: Add VP_FMAD opcode.
15797 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15798
15799 // Floating-point multiply-add without intermediate rounding.
15800 bool HasFMA =
15802 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15803
15804 // No valid opcode, do not combine.
15805 if (!HasFMAD && !HasFMA)
15806 return SDValue();
15807
15808 const SDNodeFlags Flags = N->getFlags();
15809 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15810 Options.UnsafeFPMath || HasFMAD);
15811
15812 // If the subtraction is not contractable, do not combine.
15813 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15814 return SDValue();
15815
15816 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15817 return SDValue();
15818
15819 // Always prefer FMAD to FMA for precision.
15820 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15822 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15823
15824 // Is the node an FMUL and contractable either due to global flags or
15825 // SDNodeFlags.
15826 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15827 if (!matcher.match(N, ISD::FMUL))
15828 return false;
15829 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15830 };
15831
15832 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15833 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15834 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15835 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15836 XY.getOperand(1),
15837 matcher.getNode(ISD::FNEG, SL, VT, Z));
15838 }
15839 return SDValue();
15840 };
15841
15842 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15843 // Note: Commutes FSUB operands.
15844 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15845 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15846 return matcher.getNode(
15847 PreferredFusedOpcode, SL, VT,
15848 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15849 YZ.getOperand(1), X);
15850 }
15851 return SDValue();
15852 };
15853
15854 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15855 // prefer to fold the multiply with fewer uses.
15856 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15857 (N0->use_size() > N1->use_size())) {
15858 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15859 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15860 return V;
15861 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15862 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15863 return V;
15864 } else {
15865 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15866 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15867 return V;
15868 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15869 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15870 return V;
15871 }
15872
15873 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15874 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
15875 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15876 SDValue N00 = N0.getOperand(0).getOperand(0);
15877 SDValue N01 = N0.getOperand(0).getOperand(1);
15878 return matcher.getNode(PreferredFusedOpcode, SL, VT,
15879 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
15880 matcher.getNode(ISD::FNEG, SL, VT, N1));
15881 }
15882
15883 // Look through FP_EXTEND nodes to do more combining.
15884
15885 // fold (fsub (fpext (fmul x, y)), z)
15886 // -> (fma (fpext x), (fpext y), (fneg z))
15887 if (matcher.match(N0, ISD::FP_EXTEND)) {
15888 SDValue N00 = N0.getOperand(0);
15889 if (isContractableFMUL(N00) &&
15890 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15891 N00.getValueType())) {
15892 return matcher.getNode(
15893 PreferredFusedOpcode, SL, VT,
15894 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15895 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15896 matcher.getNode(ISD::FNEG, SL, VT, N1));
15897 }
15898 }
15899
15900 // fold (fsub x, (fpext (fmul y, z)))
15901 // -> (fma (fneg (fpext y)), (fpext z), x)
15902 // Note: Commutes FSUB operands.
15903 if (matcher.match(N1, ISD::FP_EXTEND)) {
15904 SDValue N10 = N1.getOperand(0);
15905 if (isContractableFMUL(N10) &&
15906 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15907 N10.getValueType())) {
15908 return matcher.getNode(
15909 PreferredFusedOpcode, SL, VT,
15910 matcher.getNode(
15911 ISD::FNEG, SL, VT,
15912 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
15913 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15914 }
15915 }
15916
15917 // fold (fsub (fpext (fneg (fmul, x, y))), z)
15918 // -> (fneg (fma (fpext x), (fpext y), z))
15919 // Note: This could be removed with appropriate canonicalization of the
15920 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15921 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15922 // from implementing the canonicalization in visitFSUB.
15923 if (matcher.match(N0, ISD::FP_EXTEND)) {
15924 SDValue N00 = N0.getOperand(0);
15925 if (matcher.match(N00, ISD::FNEG)) {
15926 SDValue N000 = N00.getOperand(0);
15927 if (isContractableFMUL(N000) &&
15928 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15929 N00.getValueType())) {
15930 return matcher.getNode(
15931 ISD::FNEG, SL, VT,
15932 matcher.getNode(
15933 PreferredFusedOpcode, SL, VT,
15934 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15935 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15936 N1));
15937 }
15938 }
15939 }
15940
15941 // fold (fsub (fneg (fpext (fmul, x, y))), z)
15942 // -> (fneg (fma (fpext x)), (fpext y), z)
15943 // Note: This could be removed with appropriate canonicalization of the
15944 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15945 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15946 // from implementing the canonicalization in visitFSUB.
15947 if (matcher.match(N0, ISD::FNEG)) {
15948 SDValue N00 = N0.getOperand(0);
15949 if (matcher.match(N00, ISD::FP_EXTEND)) {
15950 SDValue N000 = N00.getOperand(0);
15951 if (isContractableFMUL(N000) &&
15952 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15953 N000.getValueType())) {
15954 return matcher.getNode(
15955 ISD::FNEG, SL, VT,
15956 matcher.getNode(
15957 PreferredFusedOpcode, SL, VT,
15958 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15959 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15960 N1));
15961 }
15962 }
15963 }
15964
15965 auto isReassociable = [&Options](SDNode *N) {
15966 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15967 };
15968
15969 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
15970 &isReassociable](SDValue N) {
15971 return isContractableFMUL(N) && isReassociable(N.getNode());
15972 };
15973
15974 auto isFusedOp = [&](SDValue N) {
15975 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15976 };
15977
15978 // More folding opportunities when target permits.
15979 if (Aggressive && isReassociable(N)) {
15980 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
15981 // fold (fsub (fma x, y, (fmul u, v)), z)
15982 // -> (fma x, y (fma u, v, (fneg z)))
15983 if (CanFuse && isFusedOp(N0) &&
15984 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
15985 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
15986 return matcher.getNode(
15987 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
15988 matcher.getNode(PreferredFusedOpcode, SL, VT,
15989 N0.getOperand(2).getOperand(0),
15990 N0.getOperand(2).getOperand(1),
15991 matcher.getNode(ISD::FNEG, SL, VT, N1)));
15992 }
15993
15994 // fold (fsub x, (fma y, z, (fmul u, v)))
15995 // -> (fma (fneg y), z, (fma (fneg u), v, x))
15996 if (CanFuse && isFusedOp(N1) &&
15997 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
15998 N1->hasOneUse() && NoSignedZero) {
15999 SDValue N20 = N1.getOperand(2).getOperand(0);
16000 SDValue N21 = N1.getOperand(2).getOperand(1);
16001 return matcher.getNode(
16002 PreferredFusedOpcode, SL, VT,
16003 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16004 N1.getOperand(1),
16005 matcher.getNode(PreferredFusedOpcode, SL, VT,
16006 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16007 }
16008
16009 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16010 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16011 if (isFusedOp(N0) && N0->hasOneUse()) {
16012 SDValue N02 = N0.getOperand(2);
16013 if (matcher.match(N02, ISD::FP_EXTEND)) {
16014 SDValue N020 = N02.getOperand(0);
16015 if (isContractableAndReassociableFMUL(N020) &&
16016 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16017 N020.getValueType())) {
16018 return matcher.getNode(
16019 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16020 matcher.getNode(
16021 PreferredFusedOpcode, SL, VT,
16022 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16023 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16024 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16025 }
16026 }
16027 }
16028
16029 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16030 // -> (fma (fpext x), (fpext y),
16031 // (fma (fpext u), (fpext v), (fneg z)))
16032 // FIXME: This turns two single-precision and one double-precision
16033 // operation into two double-precision operations, which might not be
16034 // interesting for all targets, especially GPUs.
16035 if (matcher.match(N0, ISD::FP_EXTEND)) {
16036 SDValue N00 = N0.getOperand(0);
16037 if (isFusedOp(N00)) {
16038 SDValue N002 = N00.getOperand(2);
16039 if (isContractableAndReassociableFMUL(N002) &&
16040 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16041 N00.getValueType())) {
16042 return matcher.getNode(
16043 PreferredFusedOpcode, SL, VT,
16044 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16045 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16046 matcher.getNode(
16047 PreferredFusedOpcode, SL, VT,
16048 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16049 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16050 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16051 }
16052 }
16053 }
16054
16055 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16056 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16057 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16058 N1->hasOneUse()) {
16059 SDValue N120 = N1.getOperand(2).getOperand(0);
16060 if (isContractableAndReassociableFMUL(N120) &&
16061 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16062 N120.getValueType())) {
16063 SDValue N1200 = N120.getOperand(0);
16064 SDValue N1201 = N120.getOperand(1);
16065 return matcher.getNode(
16066 PreferredFusedOpcode, SL, VT,
16067 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16068 N1.getOperand(1),
16069 matcher.getNode(
16070 PreferredFusedOpcode, SL, VT,
16071 matcher.getNode(ISD::FNEG, SL, VT,
16072 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16073 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16074 }
16075 }
16076
16077 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16078 // -> (fma (fneg (fpext y)), (fpext z),
16079 // (fma (fneg (fpext u)), (fpext v), x))
16080 // FIXME: This turns two single-precision and one double-precision
16081 // operation into two double-precision operations, which might not be
16082 // interesting for all targets, especially GPUs.
16083 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16084 SDValue CvtSrc = N1.getOperand(0);
16085 SDValue N100 = CvtSrc.getOperand(0);
16086 SDValue N101 = CvtSrc.getOperand(1);
16087 SDValue N102 = CvtSrc.getOperand(2);
16088 if (isContractableAndReassociableFMUL(N102) &&
16089 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16090 CvtSrc.getValueType())) {
16091 SDValue N1020 = N102.getOperand(0);
16092 SDValue N1021 = N102.getOperand(1);
16093 return matcher.getNode(
16094 PreferredFusedOpcode, SL, VT,
16095 matcher.getNode(ISD::FNEG, SL, VT,
16096 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16097 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16098 matcher.getNode(
16099 PreferredFusedOpcode, SL, VT,
16100 matcher.getNode(ISD::FNEG, SL, VT,
16101 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16102 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16103 }
16104 }
16105 }
16106
16107 return SDValue();
16108}
16109
16110/// Try to perform FMA combining on a given FMUL node based on the distributive
16111/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16112/// subtraction instead of addition).
16113SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16114 SDValue N0 = N->getOperand(0);
16115 SDValue N1 = N->getOperand(1);
16116 EVT VT = N->getValueType(0);
16117 SDLoc SL(N);
16118
16119 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16120
16121 const TargetOptions &Options = DAG.getTarget().Options;
16122
16123 // The transforms below are incorrect when x == 0 and y == inf, because the
16124 // intermediate multiplication produces a nan.
16125 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16126 if (!hasNoInfs(Options, FAdd))
16127 return SDValue();
16128
16129 // Floating-point multiply-add without intermediate rounding.
16130 bool HasFMA =
16133 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16134
16135 // Floating-point multiply-add with intermediate rounding. This can result
16136 // in a less precise result due to the changed rounding order.
16137 bool HasFMAD = Options.UnsafeFPMath &&
16138 (LegalOperations && TLI.isFMADLegal(DAG, N));
16139
16140 // No valid opcode, do not combine.
16141 if (!HasFMAD && !HasFMA)
16142 return SDValue();
16143
16144 // Always prefer FMAD to FMA for precision.
16145 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16147
16148 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16149 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16150 auto FuseFADD = [&](SDValue X, SDValue Y) {
16151 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16152 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16153 if (C->isExactlyValue(+1.0))
16154 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16155 Y);
16156 if (C->isExactlyValue(-1.0))
16157 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16158 DAG.getNode(ISD::FNEG, SL, VT, Y));
16159 }
16160 }
16161 return SDValue();
16162 };
16163
16164 if (SDValue FMA = FuseFADD(N0, N1))
16165 return FMA;
16166 if (SDValue FMA = FuseFADD(N1, N0))
16167 return FMA;
16168
16169 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16170 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16171 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16172 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16173 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16174 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16175 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16176 if (C0->isExactlyValue(+1.0))
16177 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16178 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16179 Y);
16180 if (C0->isExactlyValue(-1.0))
16181 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16182 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16183 DAG.getNode(ISD::FNEG, SL, VT, Y));
16184 }
16185 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16186 if (C1->isExactlyValue(+1.0))
16187 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16188 DAG.getNode(ISD::FNEG, SL, VT, Y));
16189 if (C1->isExactlyValue(-1.0))
16190 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16191 Y);
16192 }
16193 }
16194 return SDValue();
16195 };
16196
16197 if (SDValue FMA = FuseFSUB(N0, N1))
16198 return FMA;
16199 if (SDValue FMA = FuseFSUB(N1, N0))
16200 return FMA;
16201
16202 return SDValue();
16203}
16204
16205SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16206 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16207
16208 // FADD -> FMA combines:
16209 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16210 if (Fused.getOpcode() != ISD::DELETED_NODE)
16211 AddToWorklist(Fused.getNode());
16212 return Fused;
16213 }
16214 return SDValue();
16215}
16216
16217SDValue DAGCombiner::visitFADD(SDNode *N) {
16218 SDValue N0 = N->getOperand(0);
16219 SDValue N1 = N->getOperand(1);
16222 EVT VT = N->getValueType(0);
16223 SDLoc DL(N);
16224 const TargetOptions &Options = DAG.getTarget().Options;
16225 SDNodeFlags Flags = N->getFlags();
16226 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16227
16228 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16229 return R;
16230
16231 // fold (fadd c1, c2) -> c1 + c2
16232 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16233 return C;
16234
16235 // canonicalize constant to RHS
16236 if (N0CFP && !N1CFP)
16237 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16238
16239 // fold vector ops
16240 if (VT.isVector())
16241 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16242 return FoldedVOp;
16243
16244 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16245 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16246 if (N1C && N1C->isZero())
16247 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16248 return N0;
16249
16250 if (SDValue NewSel = foldBinOpIntoSelect(N))
16251 return NewSel;
16252
16253 // fold (fadd A, (fneg B)) -> (fsub A, B)
16254 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16255 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16256 N1, DAG, LegalOperations, ForCodeSize))
16257 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16258
16259 // fold (fadd (fneg A), B) -> (fsub B, A)
16260 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16261 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16262 N0, DAG, LegalOperations, ForCodeSize))
16263 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16264
16265 auto isFMulNegTwo = [](SDValue FMul) {
16266 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16267 return false;
16268 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16269 return C && C->isExactlyValue(-2.0);
16270 };
16271
16272 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16273 if (isFMulNegTwo(N0)) {
16274 SDValue B = N0.getOperand(0);
16275 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16276 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16277 }
16278 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16279 if (isFMulNegTwo(N1)) {
16280 SDValue B = N1.getOperand(0);
16281 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16282 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16283 }
16284
16285 // No FP constant should be created after legalization as Instruction
16286 // Selection pass has a hard time dealing with FP constants.
16287 bool AllowNewConst = (Level < AfterLegalizeDAG);
16288
16289 // If nnan is enabled, fold lots of things.
16290 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16291 // If allowed, fold (fadd (fneg x), x) -> 0.0
16292 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16293 return DAG.getConstantFP(0.0, DL, VT);
16294
16295 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16296 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16297 return DAG.getConstantFP(0.0, DL, VT);
16298 }
16299
16300 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16301 // TODO: break out portions of the transformations below for which Unsafe is
16302 // considered and which do not require both nsz and reassoc
16303 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16304 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16305 AllowNewConst) {
16306 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16307 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16309 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16310 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16311 }
16312
16313 // We can fold chains of FADD's of the same value into multiplications.
16314 // This transform is not safe in general because we are reducing the number
16315 // of rounding steps.
16316 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16317 if (N0.getOpcode() == ISD::FMUL) {
16318 SDNode *CFP00 =
16320 SDNode *CFP01 =
16322
16323 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16324 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16325 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16326 DAG.getConstantFP(1.0, DL, VT));
16327 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16328 }
16329
16330 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16331 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16332 N1.getOperand(0) == N1.getOperand(1) &&
16333 N0.getOperand(0) == N1.getOperand(0)) {
16334 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16335 DAG.getConstantFP(2.0, DL, VT));
16336 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16337 }
16338 }
16339
16340 if (N1.getOpcode() == ISD::FMUL) {
16341 SDNode *CFP10 =
16343 SDNode *CFP11 =
16345
16346 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16347 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16348 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16349 DAG.getConstantFP(1.0, DL, VT));
16350 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16351 }
16352
16353 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16354 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16355 N0.getOperand(0) == N0.getOperand(1) &&
16356 N1.getOperand(0) == N0.getOperand(0)) {
16357 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16358 DAG.getConstantFP(2.0, DL, VT));
16359 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16360 }
16361 }
16362
16363 if (N0.getOpcode() == ISD::FADD) {
16364 SDNode *CFP00 =
16366 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16367 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16368 (N0.getOperand(0) == N1)) {
16369 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16370 DAG.getConstantFP(3.0, DL, VT));
16371 }
16372 }
16373
16374 if (N1.getOpcode() == ISD::FADD) {
16375 SDNode *CFP10 =
16377 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16378 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16379 N1.getOperand(0) == N0) {
16380 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16381 DAG.getConstantFP(3.0, DL, VT));
16382 }
16383 }
16384
16385 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16386 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16387 N0.getOperand(0) == N0.getOperand(1) &&
16388 N1.getOperand(0) == N1.getOperand(1) &&
16389 N0.getOperand(0) == N1.getOperand(0)) {
16390 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16391 DAG.getConstantFP(4.0, DL, VT));
16392 }
16393 }
16394
16395 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16396 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16397 VT, N0, N1, Flags))
16398 return SD;
16399 } // enable-unsafe-fp-math
16400
16401 // FADD -> FMA combines:
16402 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16403 if (Fused.getOpcode() != ISD::DELETED_NODE)
16404 AddToWorklist(Fused.getNode());
16405 return Fused;
16406 }
16407 return SDValue();
16408}
16409
16410SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16411 SDValue Chain = N->getOperand(0);
16412 SDValue N0 = N->getOperand(1);
16413 SDValue N1 = N->getOperand(2);
16414 EVT VT = N->getValueType(0);
16415 EVT ChainVT = N->getValueType(1);
16416 SDLoc DL(N);
16417 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16418
16419 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16420 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16421 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16422 N1, DAG, LegalOperations, ForCodeSize)) {
16423 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16424 {Chain, N0, NegN1});
16425 }
16426
16427 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16428 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16429 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16430 N0, DAG, LegalOperations, ForCodeSize)) {
16431 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16432 {Chain, N1, NegN0});
16433 }
16434 return SDValue();
16435}
16436
16437SDValue DAGCombiner::visitFSUB(SDNode *N) {
16438 SDValue N0 = N->getOperand(0);
16439 SDValue N1 = N->getOperand(1);
16440 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16441 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16442 EVT VT = N->getValueType(0);
16443 SDLoc DL(N);
16444 const TargetOptions &Options = DAG.getTarget().Options;
16445 const SDNodeFlags Flags = N->getFlags();
16446 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16447
16448 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16449 return R;
16450
16451 // fold (fsub c1, c2) -> c1-c2
16452 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16453 return C;
16454
16455 // fold vector ops
16456 if (VT.isVector())
16457 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16458 return FoldedVOp;
16459
16460 if (SDValue NewSel = foldBinOpIntoSelect(N))
16461 return NewSel;
16462
16463 // (fsub A, 0) -> A
16464 if (N1CFP && N1CFP->isZero()) {
16465 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16466 Flags.hasNoSignedZeros()) {
16467 return N0;
16468 }
16469 }
16470
16471 if (N0 == N1) {
16472 // (fsub x, x) -> 0.0
16473 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16474 return DAG.getConstantFP(0.0f, DL, VT);
16475 }
16476
16477 // (fsub -0.0, N1) -> -N1
16478 if (N0CFP && N0CFP->isZero()) {
16479 if (N0CFP->isNegative() ||
16480 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16481 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16482 // flushed to zero, unless all users treat denorms as zero (DAZ).
16483 // FIXME: This transform will change the sign of a NaN and the behavior
16484 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16485 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16486 if (DenormMode == DenormalMode::getIEEE()) {
16487 if (SDValue NegN1 =
16488 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16489 return NegN1;
16490 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16491 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16492 }
16493 }
16494 }
16495
16496 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16497 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16498 N1.getOpcode() == ISD::FADD) {
16499 // X - (X + Y) -> -Y
16500 if (N0 == N1->getOperand(0))
16501 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16502 // X - (Y + X) -> -Y
16503 if (N0 == N1->getOperand(1))
16504 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16505 }
16506
16507 // fold (fsub A, (fneg B)) -> (fadd A, B)
16508 if (SDValue NegN1 =
16509 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16510 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16511
16512 // FSUB -> FMA combines:
16513 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16514 AddToWorklist(Fused.getNode());
16515 return Fused;
16516 }
16517
16518 return SDValue();
16519}
16520
16521// Transform IEEE Floats:
16522// (fmul C, (uitofp Pow2))
16523// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16524// (fdiv C, (uitofp Pow2))
16525// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16526//
16527// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16528// there is no need for more than an add/sub.
16529//
16530// This is valid under the following circumstances:
16531// 1) We are dealing with IEEE floats
16532// 2) C is normal
16533// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16534// TODO: Much of this could also be used for generating `ldexp` on targets the
16535// prefer it.
16536SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16537 EVT VT = N->getValueType(0);
16538 SDValue ConstOp, Pow2Op;
16539
16540 std::optional<int> Mantissa;
16541 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16542 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16543 return false;
16544
16545 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16546 Pow2Op = N->getOperand(1 - ConstOpIdx);
16547 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16548 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16549 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16550 return false;
16551
16552 Pow2Op = Pow2Op.getOperand(0);
16553
16554 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16555 // TODO: We could use knownbits to make this bound more precise.
16556 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16557
16558 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16559 if (CFP == nullptr)
16560 return false;
16561
16562 const APFloat &APF = CFP->getValueAPF();
16563
16564 // Make sure we have normal/ieee constant.
16565 if (!APF.isNormal() || !APF.isIEEE())
16566 return false;
16567
16568 // Make sure the floats exponent is within the bounds that this transform
16569 // produces bitwise equals value.
16570 int CurExp = ilogb(APF);
16571 // FMul by pow2 will only increase exponent.
16572 int MinExp =
16573 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16574 // FDiv by pow2 will only decrease exponent.
16575 int MaxExp =
16576 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16577 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16579 return false;
16580
16581 // Finally make sure we actually know the mantissa for the float type.
16582 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16583 if (!Mantissa)
16584 Mantissa = ThisMantissa;
16585
16586 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16587 };
16588
16589 // TODO: We may be able to include undefs.
16590 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16591 };
16592
16593 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16594 return SDValue();
16595
16596 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16597 return SDValue();
16598
16599 // Get log2 after all other checks have taken place. This is because
16600 // BuildLogBase2 may create a new node.
16601 SDLoc DL(N);
16602 // Get Log2 type with same bitwidth as the float type (VT).
16603 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16604 if (VT.isVector())
16605 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16607
16608 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16609 /*InexpensiveOnly*/ true, NewIntVT);
16610 if (!Log2)
16611 return SDValue();
16612
16613 // Perform actual transform.
16614 SDValue MantissaShiftCnt =
16615 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16616 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16617 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16618 // cast. We could implement that by handle here to handle the casts.
16619 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16620 SDValue ResAsInt =
16621 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16622 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16623 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16624 return ResAsFP;
16625}
16626
16627SDValue DAGCombiner::visitFMUL(SDNode *N) {
16628 SDValue N0 = N->getOperand(0);
16629 SDValue N1 = N->getOperand(1);
16630 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16631 EVT VT = N->getValueType(0);
16632 SDLoc DL(N);
16633 const TargetOptions &Options = DAG.getTarget().Options;
16634 const SDNodeFlags Flags = N->getFlags();
16635 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16636
16637 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16638 return R;
16639
16640 // fold (fmul c1, c2) -> c1*c2
16641 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16642 return C;
16643
16644 // canonicalize constant to RHS
16647 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16648
16649 // fold vector ops
16650 if (VT.isVector())
16651 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16652 return FoldedVOp;
16653
16654 if (SDValue NewSel = foldBinOpIntoSelect(N))
16655 return NewSel;
16656
16657 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16658 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16660 N0.getOpcode() == ISD::FMUL) {
16661 SDValue N00 = N0.getOperand(0);
16662 SDValue N01 = N0.getOperand(1);
16663 // Avoid an infinite loop by making sure that N00 is not a constant
16664 // (the inner multiply has not been constant folded yet).
16667 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16668 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16669 }
16670 }
16671
16672 // Match a special-case: we convert X * 2.0 into fadd.
16673 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16674 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16675 N0.getOperand(0) == N0.getOperand(1)) {
16676 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16677 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16678 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16679 }
16680
16681 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16682 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16683 VT, N0, N1, Flags))
16684 return SD;
16685 }
16686
16687 // fold (fmul X, 2.0) -> (fadd X, X)
16688 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16689 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16690
16691 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16692 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16693 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16694 return DAG.getNode(ISD::FSUB, DL, VT,
16695 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16696 }
16697 }
16698
16699 // -N0 * -N1 --> N0 * N1
16704 SDValue NegN0 =
16705 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16706 if (NegN0) {
16707 HandleSDNode NegN0Handle(NegN0);
16708 SDValue NegN1 =
16709 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16710 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16712 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16713 }
16714
16715 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16716 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16717 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16718 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16719 TLI.isOperationLegal(ISD::FABS, VT)) {
16720 SDValue Select = N0, X = N1;
16721 if (Select.getOpcode() != ISD::SELECT)
16722 std::swap(Select, X);
16723
16724 SDValue Cond = Select.getOperand(0);
16725 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16726 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16727
16728 if (TrueOpnd && FalseOpnd &&
16729 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16730 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16731 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16732 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16733 switch (CC) {
16734 default: break;
16735 case ISD::SETOLT:
16736 case ISD::SETULT:
16737 case ISD::SETOLE:
16738 case ISD::SETULE:
16739 case ISD::SETLT:
16740 case ISD::SETLE:
16741 std::swap(TrueOpnd, FalseOpnd);
16742 [[fallthrough]];
16743 case ISD::SETOGT:
16744 case ISD::SETUGT:
16745 case ISD::SETOGE:
16746 case ISD::SETUGE:
16747 case ISD::SETGT:
16748 case ISD::SETGE:
16749 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16750 TLI.isOperationLegal(ISD::FNEG, VT))
16751 return DAG.getNode(ISD::FNEG, DL, VT,
16752 DAG.getNode(ISD::FABS, DL, VT, X));
16753 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16754 return DAG.getNode(ISD::FABS, DL, VT, X);
16755
16756 break;
16757 }
16758 }
16759 }
16760
16761 // FMUL -> FMA combines:
16762 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16763 AddToWorklist(Fused.getNode());
16764 return Fused;
16765 }
16766
16767 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16768 // able to run.
16769 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16770 return R;
16771
16772 return SDValue();
16773}
16774
16775template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16776 SDValue N0 = N->getOperand(0);
16777 SDValue N1 = N->getOperand(1);
16778 SDValue N2 = N->getOperand(2);
16779 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16780 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16781 EVT VT = N->getValueType(0);
16782 SDLoc DL(N);
16783 const TargetOptions &Options = DAG.getTarget().Options;
16784 // FMA nodes have flags that propagate to the created nodes.
16785 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16786 MatchContextClass matcher(DAG, TLI, N);
16787
16788 bool CanReassociate =
16789 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16790
16791 // Constant fold FMA.
16792 if (isa<ConstantFPSDNode>(N0) &&
16793 isa<ConstantFPSDNode>(N1) &&
16794 isa<ConstantFPSDNode>(N2)) {
16795 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
16796 }
16797
16798 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
16803 SDValue NegN0 =
16804 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16805 if (NegN0) {
16806 HandleSDNode NegN0Handle(NegN0);
16807 SDValue NegN1 =
16808 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16809 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16811 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
16812 }
16813
16814 // FIXME: use fast math flags instead of Options.UnsafeFPMath
16815 if (Options.UnsafeFPMath) {
16816 if (N0CFP && N0CFP->isZero())
16817 return N2;
16818 if (N1CFP && N1CFP->isZero())
16819 return N2;
16820 }
16821
16822 // FIXME: Support splat of constant.
16823 if (N0CFP && N0CFP->isExactlyValue(1.0))
16824 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
16825 if (N1CFP && N1CFP->isExactlyValue(1.0))
16826 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
16827
16828 // Canonicalize (fma c, x, y) -> (fma x, c, y)
16831 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
16832
16833 if (CanReassociate) {
16834 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
16835 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
16838 return matcher.getNode(
16839 ISD::FMUL, DL, VT, N0,
16840 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
16841 }
16842
16843 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
16844 if (matcher.match(N0, ISD::FMUL) &&
16847 return matcher.getNode(
16848 ISD::FMA, DL, VT, N0.getOperand(0),
16849 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
16850 }
16851 }
16852
16853 // (fma x, -1, y) -> (fadd (fneg x), y)
16854 // FIXME: Support splat of constant.
16855 if (N1CFP) {
16856 if (N1CFP->isExactlyValue(1.0))
16857 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
16858
16859 if (N1CFP->isExactlyValue(-1.0) &&
16860 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
16861 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
16862 AddToWorklist(RHSNeg.getNode());
16863 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
16864 }
16865
16866 // fma (fneg x), K, y -> fma x -K, y
16867 if (matcher.match(N0, ISD::FNEG) &&
16869 (N1.hasOneUse() &&
16870 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
16871 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
16872 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
16873 }
16874 }
16875
16876 // FIXME: Support splat of constant.
16877 if (CanReassociate) {
16878 // (fma x, c, x) -> (fmul x, (c+1))
16879 if (N1CFP && N0 == N2) {
16880 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16881 matcher.getNode(ISD::FADD, DL, VT, N1,
16882 DAG.getConstantFP(1.0, DL, VT)));
16883 }
16884
16885 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
16886 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
16887 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16888 matcher.getNode(ISD::FADD, DL, VT, N1,
16889 DAG.getConstantFP(-1.0, DL, VT)));
16890 }
16891 }
16892
16893 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
16894 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
16895 if (!TLI.isFNegFree(VT))
16897 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
16898 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
16899 return SDValue();
16900}
16901
16902SDValue DAGCombiner::visitFMAD(SDNode *N) {
16903 SDValue N0 = N->getOperand(0);
16904 SDValue N1 = N->getOperand(1);
16905 SDValue N2 = N->getOperand(2);
16906 EVT VT = N->getValueType(0);
16907 SDLoc DL(N);
16908
16909 // Constant fold FMAD.
16910 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
16911 isa<ConstantFPSDNode>(N2))
16912 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
16913
16914 return SDValue();
16915}
16916
16917// Combine multiple FDIVs with the same divisor into multiple FMULs by the
16918// reciprocal.
16919// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
16920// Notice that this is not always beneficial. One reason is different targets
16921// may have different costs for FDIV and FMUL, so sometimes the cost of two
16922// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
16923// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
16924SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
16925 // TODO: Limit this transform based on optsize/minsize - it always creates at
16926 // least 1 extra instruction. But the perf win may be substantial enough
16927 // that only minsize should restrict this.
16928 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
16929 const SDNodeFlags Flags = N->getFlags();
16930 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
16931 return SDValue();
16932
16933 // Skip if current node is a reciprocal/fneg-reciprocal.
16934 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
16935 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
16936 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
16937 return SDValue();
16938
16939 // Exit early if the target does not want this transform or if there can't
16940 // possibly be enough uses of the divisor to make the transform worthwhile.
16941 unsigned MinUses = TLI.combineRepeatedFPDivisors();
16942
16943 // For splat vectors, scale the number of uses by the splat factor. If we can
16944 // convert the division into a scalar op, that will likely be much faster.
16945 unsigned NumElts = 1;
16946 EVT VT = N->getValueType(0);
16947 if (VT.isVector() && DAG.isSplatValue(N1))
16948 NumElts = VT.getVectorMinNumElements();
16949
16950 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
16951 return SDValue();
16952
16953 // Find all FDIV users of the same divisor.
16954 // Use a set because duplicates may be present in the user list.
16956 for (auto *U : N1->uses()) {
16957 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
16958 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
16959 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
16960 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
16961 U->getFlags().hasAllowReassociation() &&
16962 U->getFlags().hasNoSignedZeros())
16963 continue;
16964
16965 // This division is eligible for optimization only if global unsafe math
16966 // is enabled or if this division allows reciprocal formation.
16967 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
16968 Users.insert(U);
16969 }
16970 }
16971
16972 // Now that we have the actual number of divisor uses, make sure it meets
16973 // the minimum threshold specified by the target.
16974 if ((Users.size() * NumElts) < MinUses)
16975 return SDValue();
16976
16977 SDLoc DL(N);
16978 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16979 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
16980
16981 // Dividend / Divisor -> Dividend * Reciprocal
16982 for (auto *U : Users) {
16983 SDValue Dividend = U->getOperand(0);
16984 if (Dividend != FPOne) {
16985 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
16986 Reciprocal, Flags);
16987 CombineTo(U, NewNode);
16988 } else if (U != Reciprocal.getNode()) {
16989 // In the absence of fast-math-flags, this user node is always the
16990 // same node as Reciprocal, but with FMF they may be different nodes.
16991 CombineTo(U, Reciprocal);
16992 }
16993 }
16994 return SDValue(N, 0); // N was replaced.
16995}
16996
16997SDValue DAGCombiner::visitFDIV(SDNode *N) {
16998 SDValue N0 = N->getOperand(0);
16999 SDValue N1 = N->getOperand(1);
17000 EVT VT = N->getValueType(0);
17001 SDLoc DL(N);
17002 const TargetOptions &Options = DAG.getTarget().Options;
17003 SDNodeFlags Flags = N->getFlags();
17004 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17005
17006 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17007 return R;
17008
17009 // fold (fdiv c1, c2) -> c1/c2
17010 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17011 return C;
17012
17013 // fold vector ops
17014 if (VT.isVector())
17015 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17016 return FoldedVOp;
17017
17018 if (SDValue NewSel = foldBinOpIntoSelect(N))
17019 return NewSel;
17020
17022 return V;
17023
17024 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17025 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17026 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17027 // Compute the reciprocal 1.0 / c2.
17028 const APFloat &N1APF = N1CFP->getValueAPF();
17029 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17030 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17031 // Only do the transform if the reciprocal is a legal fp immediate that
17032 // isn't too nasty (eg NaN, denormal, ...).
17033 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17034 (!LegalOperations ||
17035 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17036 // backend)... we should handle this gracefully after Legalize.
17037 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17039 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17040 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17041 DAG.getConstantFP(Recip, DL, VT));
17042 }
17043
17044 // If this FDIV is part of a reciprocal square root, it may be folded
17045 // into a target-specific square root estimate instruction.
17046 if (N1.getOpcode() == ISD::FSQRT) {
17047 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17048 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17049 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17050 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17051 if (SDValue RV =
17052 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17053 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17054 AddToWorklist(RV.getNode());
17055 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17056 }
17057 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17058 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17059 if (SDValue RV =
17060 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17061 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17062 AddToWorklist(RV.getNode());
17063 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17064 }
17065 } else if (N1.getOpcode() == ISD::FMUL) {
17066 // Look through an FMUL. Even though this won't remove the FDIV directly,
17067 // it's still worthwhile to get rid of the FSQRT if possible.
17068 SDValue Sqrt, Y;
17069 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17070 Sqrt = N1.getOperand(0);
17071 Y = N1.getOperand(1);
17072 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17073 Sqrt = N1.getOperand(1);
17074 Y = N1.getOperand(0);
17075 }
17076 if (Sqrt.getNode()) {
17077 // If the other multiply operand is known positive, pull it into the
17078 // sqrt. That will eliminate the division if we convert to an estimate.
17079 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17080 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17081 SDValue A;
17082 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17083 A = Y.getOperand(0);
17084 else if (Y == Sqrt.getOperand(0))
17085 A = Y;
17086 if (A) {
17087 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17088 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17089 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17090 SDValue AAZ =
17091 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17092 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17093 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17094
17095 // Estimate creation failed. Clean up speculatively created nodes.
17096 recursivelyDeleteUnusedNodes(AAZ.getNode());
17097 }
17098 }
17099
17100 // We found a FSQRT, so try to make this fold:
17101 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17102 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17103 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17104 AddToWorklist(Div.getNode());
17105 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17106 }
17107 }
17108 }
17109
17110 // Fold into a reciprocal estimate and multiply instead of a real divide.
17111 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17112 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17113 return RV;
17114 }
17115
17116 // Fold X/Sqrt(X) -> Sqrt(X)
17117 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17118 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17119 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17120 return N1;
17121
17122 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17127 SDValue NegN0 =
17128 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17129 if (NegN0) {
17130 HandleSDNode NegN0Handle(NegN0);
17131 SDValue NegN1 =
17132 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17133 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17135 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17136 }
17137
17138 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17139 return R;
17140
17141 return SDValue();
17142}
17143
17144SDValue DAGCombiner::visitFREM(SDNode *N) {
17145 SDValue N0 = N->getOperand(0);
17146 SDValue N1 = N->getOperand(1);
17147 EVT VT = N->getValueType(0);
17148 SDNodeFlags Flags = N->getFlags();
17149 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17150
17151 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17152 return R;
17153
17154 // fold (frem c1, c2) -> fmod(c1,c2)
17155 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
17156 return C;
17157
17158 if (SDValue NewSel = foldBinOpIntoSelect(N))
17159 return NewSel;
17160
17161 return SDValue();
17162}
17163
17164SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17165 SDNodeFlags Flags = N->getFlags();
17166 const TargetOptions &Options = DAG.getTarget().Options;
17167
17168 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17169 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17170 if (!Flags.hasApproximateFuncs() ||
17171 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17172 return SDValue();
17173
17174 SDValue N0 = N->getOperand(0);
17175 if (TLI.isFsqrtCheap(N0, DAG))
17176 return SDValue();
17177
17178 // FSQRT nodes have flags that propagate to the created nodes.
17179 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17180 // transform the fdiv, we may produce a sub-optimal estimate sequence
17181 // because the reciprocal calculation may not have to filter out a
17182 // 0.0 input.
17183 return buildSqrtEstimate(N0, Flags);
17184}
17185
17186/// copysign(x, fp_extend(y)) -> copysign(x, y)
17187/// copysign(x, fp_round(y)) -> copysign(x, y)
17188/// Operands to the functions are the type of X and Y respectively.
17189static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17190 // Always fold no-op FP casts.
17191 if (XTy == YTy)
17192 return true;
17193
17194 // Do not optimize out type conversion of f128 type yet.
17195 // For some targets like x86_64, configuration is changed to keep one f128
17196 // value in one SSE register, but instruction selection cannot handle
17197 // FCOPYSIGN on SSE registers yet.
17198 if (YTy == MVT::f128)
17199 return false;
17200
17202}
17203
17205 SDValue N1 = N->getOperand(1);
17206 if (N1.getOpcode() != ISD::FP_EXTEND &&
17207 N1.getOpcode() != ISD::FP_ROUND)
17208 return false;
17209 EVT N1VT = N1->getValueType(0);
17210 EVT N1Op0VT = N1->getOperand(0).getValueType();
17211 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17212}
17213
17214SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17215 SDValue N0 = N->getOperand(0);
17216 SDValue N1 = N->getOperand(1);
17217 EVT VT = N->getValueType(0);
17218
17219 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17220 if (SDValue C =
17221 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17222 return C;
17223
17224 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17225 const APFloat &V = N1C->getValueAPF();
17226 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17227 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17228 if (!V.isNegative()) {
17229 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17230 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17231 } else {
17232 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17233 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17234 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17235 }
17236 }
17237
17238 // copysign(fabs(x), y) -> copysign(x, y)
17239 // copysign(fneg(x), y) -> copysign(x, y)
17240 // copysign(copysign(x,z), y) -> copysign(x, y)
17241 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17242 N0.getOpcode() == ISD::FCOPYSIGN)
17243 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17244
17245 // copysign(x, abs(y)) -> abs(x)
17246 if (N1.getOpcode() == ISD::FABS)
17247 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17248
17249 // copysign(x, copysign(y,z)) -> copysign(x, z)
17250 if (N1.getOpcode() == ISD::FCOPYSIGN)
17251 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17252
17253 // copysign(x, fp_extend(y)) -> copysign(x, y)
17254 // copysign(x, fp_round(y)) -> copysign(x, y)
17256 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17257
17258 return SDValue();
17259}
17260
17261SDValue DAGCombiner::visitFPOW(SDNode *N) {
17262 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17263 if (!ExponentC)
17264 return SDValue();
17265 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17266
17267 // Try to convert x ** (1/3) into cube root.
17268 // TODO: Handle the various flavors of long double.
17269 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17270 // Some range near 1/3 should be fine.
17271 EVT VT = N->getValueType(0);
17272 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17273 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17274 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17275 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17276 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17277 // For regular numbers, rounding may cause the results to differ.
17278 // Therefore, we require { nsz ninf nnan afn } for this transform.
17279 // TODO: We could select out the special cases if we don't have nsz/ninf.
17280 SDNodeFlags Flags = N->getFlags();
17281 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17282 !Flags.hasApproximateFuncs())
17283 return SDValue();
17284
17285 // Do not create a cbrt() libcall if the target does not have it, and do not
17286 // turn a pow that has lowering support into a cbrt() libcall.
17287 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17290 return SDValue();
17291
17292 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17293 }
17294
17295 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17296 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17297 // TODO: This could be extended (using a target hook) to handle smaller
17298 // power-of-2 fractional exponents.
17299 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17300 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17301 if (ExponentIs025 || ExponentIs075) {
17302 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17303 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17304 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17305 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17306 // For regular numbers, rounding may cause the results to differ.
17307 // Therefore, we require { nsz ninf afn } for this transform.
17308 // TODO: We could select out the special cases if we don't have nsz/ninf.
17309 SDNodeFlags Flags = N->getFlags();
17310
17311 // We only need no signed zeros for the 0.25 case.
17312 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17313 !Flags.hasApproximateFuncs())
17314 return SDValue();
17315
17316 // Don't double the number of libcalls. We are trying to inline fast code.
17318 return SDValue();
17319
17320 // Assume that libcalls are the smallest code.
17321 // TODO: This restriction should probably be lifted for vectors.
17322 if (ForCodeSize)
17323 return SDValue();
17324
17325 // pow(X, 0.25) --> sqrt(sqrt(X))
17326 SDLoc DL(N);
17327 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17328 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17329 if (ExponentIs025)
17330 return SqrtSqrt;
17331 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17332 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17333 }
17334
17335 return SDValue();
17336}
17337
17339 const TargetLowering &TLI) {
17340 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17341 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17342 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17343 // conversions would return +0.0.
17344 // FIXME: We should be able to use node-level FMF here.
17345 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17346 EVT VT = N->getValueType(0);
17347 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17349 return SDValue();
17350
17351 // fptosi/fptoui round towards zero, so converting from FP to integer and
17352 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17353 SDValue N0 = N->getOperand(0);
17354 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17355 N0.getOperand(0).getValueType() == VT)
17356 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17357
17358 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17359 N0.getOperand(0).getValueType() == VT)
17360 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17361
17362 return SDValue();
17363}
17364
17365SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17366 SDValue N0 = N->getOperand(0);
17367 EVT VT = N->getValueType(0);
17368 EVT OpVT = N0.getValueType();
17369
17370 // [us]itofp(undef) = 0, because the result value is bounded.
17371 if (N0.isUndef())
17372 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17373
17374 // fold (sint_to_fp c1) -> c1fp
17376 // ...but only if the target supports immediate floating-point values
17377 (!LegalOperations ||
17379 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17380
17381 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17382 // but UINT_TO_FP is legal on this target, try to convert.
17383 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17384 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17385 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17386 if (DAG.SignBitIsZero(N0))
17387 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17388 }
17389
17390 // The next optimizations are desirable only if SELECT_CC can be lowered.
17391 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17392 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17393 !VT.isVector() &&
17394 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17395 SDLoc DL(N);
17396 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17397 DAG.getConstantFP(0.0, DL, VT));
17398 }
17399
17400 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17401 // (select (setcc x, y, cc), 1.0, 0.0)
17402 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17403 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17404 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17405 SDLoc DL(N);
17406 return DAG.getSelect(DL, VT, N0.getOperand(0),
17407 DAG.getConstantFP(1.0, DL, VT),
17408 DAG.getConstantFP(0.0, DL, VT));
17409 }
17410
17411 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17412 return FTrunc;
17413
17414 return SDValue();
17415}
17416
17417SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17418 SDValue N0 = N->getOperand(0);
17419 EVT VT = N->getValueType(0);
17420 EVT OpVT = N0.getValueType();
17421
17422 // [us]itofp(undef) = 0, because the result value is bounded.
17423 if (N0.isUndef())
17424 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17425
17426 // fold (uint_to_fp c1) -> c1fp
17428 // ...but only if the target supports immediate floating-point values
17429 (!LegalOperations ||
17431 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17432
17433 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17434 // but SINT_TO_FP is legal on this target, try to convert.
17435 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17436 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17437 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17438 if (DAG.SignBitIsZero(N0))
17439 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17440 }
17441
17442 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17443 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17444 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17445 SDLoc DL(N);
17446 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17447 DAG.getConstantFP(0.0, DL, VT));
17448 }
17449
17450 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17451 return FTrunc;
17452
17453 return SDValue();
17454}
17455
17456// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17458 SDValue N0 = N->getOperand(0);
17459 EVT VT = N->getValueType(0);
17460
17461 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17462 return SDValue();
17463
17464 SDValue Src = N0.getOperand(0);
17465 EVT SrcVT = Src.getValueType();
17466 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17467 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17468
17469 // We can safely assume the conversion won't overflow the output range,
17470 // because (for example) (uint8_t)18293.f is undefined behavior.
17471
17472 // Since we can assume the conversion won't overflow, our decision as to
17473 // whether the input will fit in the float should depend on the minimum
17474 // of the input range and output range.
17475
17476 // This means this is also safe for a signed input and unsigned output, since
17477 // a negative input would lead to undefined behavior.
17478 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17479 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17480 unsigned ActualSize = std::min(InputSize, OutputSize);
17481 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17482
17483 // We can only fold away the float conversion if the input range can be
17484 // represented exactly in the float range.
17485 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17486 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17487 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17489 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17490 }
17491 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17492 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17493 return DAG.getBitcast(VT, Src);
17494 }
17495 return SDValue();
17496}
17497
17498SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17499 SDValue N0 = N->getOperand(0);
17500 EVT VT = N->getValueType(0);
17501
17502 // fold (fp_to_sint undef) -> undef
17503 if (N0.isUndef())
17504 return DAG.getUNDEF(VT);
17505
17506 // fold (fp_to_sint c1fp) -> c1
17508 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17509
17510 return FoldIntToFPToInt(N, DAG);
17511}
17512
17513SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17514 SDValue N0 = N->getOperand(0);
17515 EVT VT = N->getValueType(0);
17516
17517 // fold (fp_to_uint undef) -> undef
17518 if (N0.isUndef())
17519 return DAG.getUNDEF(VT);
17520
17521 // fold (fp_to_uint c1fp) -> c1
17523 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17524
17525 return FoldIntToFPToInt(N, DAG);
17526}
17527
17528SDValue DAGCombiner::visitXRINT(SDNode *N) {
17529 SDValue N0 = N->getOperand(0);
17530 EVT VT = N->getValueType(0);
17531
17532 // fold (lrint|llrint undef) -> undef
17533 if (N0.isUndef())
17534 return DAG.getUNDEF(VT);
17535
17536 // fold (lrint|llrint c1fp) -> c1
17538 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17539
17540 return SDValue();
17541}
17542
17543SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17544 SDValue N0 = N->getOperand(0);
17545 SDValue N1 = N->getOperand(1);
17546 EVT VT = N->getValueType(0);
17547
17548 // fold (fp_round c1fp) -> c1fp
17549 if (SDValue C =
17550 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17551 return C;
17552
17553 // fold (fp_round (fp_extend x)) -> x
17554 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17555 return N0.getOperand(0);
17556
17557 // fold (fp_round (fp_round x)) -> (fp_round x)
17558 if (N0.getOpcode() == ISD::FP_ROUND) {
17559 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17560 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17561
17562 // Avoid folding legal fp_rounds into non-legal ones.
17563 if (!hasOperation(ISD::FP_ROUND, VT))
17564 return SDValue();
17565
17566 // Skip this folding if it results in an fp_round from f80 to f16.
17567 //
17568 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17569 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17570 // instructions from f32 or f64. Moreover, the first (value-preserving)
17571 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17572 // x86.
17573 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17574 return SDValue();
17575
17576 // If the first fp_round isn't a value preserving truncation, it might
17577 // introduce a tie in the second fp_round, that wouldn't occur in the
17578 // single-step fp_round we want to fold to.
17579 // In other words, double rounding isn't the same as rounding.
17580 // Also, this is a value preserving truncation iff both fp_round's are.
17581 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17582 SDLoc DL(N);
17583 return DAG.getNode(
17584 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17585 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17586 }
17587 }
17588
17589 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17590 // Note: From a legality perspective, this is a two step transform. First,
17591 // we duplicate the fp_round to the arguments of the copysign, then we
17592 // eliminate the fp_round on Y. The second step requires an additional
17593 // predicate to match the implementation above.
17594 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17596 N0.getValueType())) {
17597 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17598 N0.getOperand(0), N1);
17599 AddToWorklist(Tmp.getNode());
17600 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17601 Tmp, N0.getOperand(1));
17602 }
17603
17604 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17605 return NewVSel;
17606
17607 return SDValue();
17608}
17609
17610SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17611 SDValue N0 = N->getOperand(0);
17612 EVT VT = N->getValueType(0);
17613
17614 if (VT.isVector())
17615 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17616 return FoldedVOp;
17617
17618 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17619 if (N->hasOneUse() &&
17620 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17621 return SDValue();
17622
17623 // fold (fp_extend c1fp) -> c1fp
17625 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17626
17627 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17628 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17630 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17631
17632 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17633 // value of X.
17634 if (N0.getOpcode() == ISD::FP_ROUND
17635 && N0.getConstantOperandVal(1) == 1) {
17636 SDValue In = N0.getOperand(0);
17637 if (In.getValueType() == VT) return In;
17638 if (VT.bitsLT(In.getValueType()))
17639 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17640 In, N0.getOperand(1));
17641 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17642 }
17643
17644 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17645 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17647 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17648 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17649 LN0->getChain(),
17650 LN0->getBasePtr(), N0.getValueType(),
17651 LN0->getMemOperand());
17652 CombineTo(N, ExtLoad);
17653 CombineTo(
17654 N0.getNode(),
17655 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17656 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17657 ExtLoad.getValue(1));
17658 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17659 }
17660
17661 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17662 return NewVSel;
17663
17664 return SDValue();
17665}
17666
17667SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17668 SDValue N0 = N->getOperand(0);
17669 EVT VT = N->getValueType(0);
17670
17671 // fold (fceil c1) -> fceil(c1)
17673 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17674
17675 return SDValue();
17676}
17677
17678SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17679 SDValue N0 = N->getOperand(0);
17680 EVT VT = N->getValueType(0);
17681
17682 // fold (ftrunc c1) -> ftrunc(c1)
17684 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17685
17686 // fold ftrunc (known rounded int x) -> x
17687 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17688 // likely to be generated to extract integer from a rounded floating value.
17689 switch (N0.getOpcode()) {
17690 default: break;
17691 case ISD::FRINT:
17692 case ISD::FTRUNC:
17693 case ISD::FNEARBYINT:
17694 case ISD::FROUNDEVEN:
17695 case ISD::FFLOOR:
17696 case ISD::FCEIL:
17697 return N0;
17698 }
17699
17700 return SDValue();
17701}
17702
17703SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17704 SDValue N0 = N->getOperand(0);
17705
17706 // fold (ffrexp c1) -> ffrexp(c1)
17708 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17709 return SDValue();
17710}
17711
17712SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17713 SDValue N0 = N->getOperand(0);
17714 EVT VT = N->getValueType(0);
17715
17716 // fold (ffloor c1) -> ffloor(c1)
17718 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17719
17720 return SDValue();
17721}
17722
17723SDValue DAGCombiner::visitFNEG(SDNode *N) {
17724 SDValue N0 = N->getOperand(0);
17725 EVT VT = N->getValueType(0);
17726 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17727
17728 // Constant fold FNEG.
17730 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17731
17732 if (SDValue NegN0 =
17733 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17734 return NegN0;
17735
17736 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17737 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17738 // know it was called from a context with a nsz flag if the input fsub does
17739 // not.
17740 if (N0.getOpcode() == ISD::FSUB &&
17742 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17743 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17744 N0.getOperand(0));
17745 }
17746
17747 if (SDValue Cast = foldSignChangeInBitcast(N))
17748 return Cast;
17749
17750 return SDValue();
17751}
17752
17753SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17754 SDValue N0 = N->getOperand(0);
17755 SDValue N1 = N->getOperand(1);
17756 EVT VT = N->getValueType(0);
17757 const SDNodeFlags Flags = N->getFlags();
17758 unsigned Opc = N->getOpcode();
17759 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17760 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17761 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17762
17763 // Constant fold.
17764 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
17765 return C;
17766
17767 // Canonicalize to constant on RHS.
17770 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
17771
17772 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
17773 const APFloat &AF = N1CFP->getValueAPF();
17774
17775 // minnum(X, nan) -> X
17776 // maxnum(X, nan) -> X
17777 // minimum(X, nan) -> nan
17778 // maximum(X, nan) -> nan
17779 if (AF.isNaN())
17780 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
17781
17782 // In the following folds, inf can be replaced with the largest finite
17783 // float, if the ninf flag is set.
17784 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
17785 // minnum(X, -inf) -> -inf
17786 // maxnum(X, +inf) -> +inf
17787 // minimum(X, -inf) -> -inf if nnan
17788 // maximum(X, +inf) -> +inf if nnan
17789 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
17790 return N->getOperand(1);
17791
17792 // minnum(X, +inf) -> X if nnan
17793 // maxnum(X, -inf) -> X if nnan
17794 // minimum(X, +inf) -> X
17795 // maximum(X, -inf) -> X
17796 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
17797 return N->getOperand(0);
17798 }
17799 }
17800
17801 if (SDValue SD = reassociateReduction(
17802 PropagatesNaN
17805 Opc, SDLoc(N), VT, N0, N1, Flags))
17806 return SD;
17807
17808 return SDValue();
17809}
17810
17811SDValue DAGCombiner::visitFABS(SDNode *N) {
17812 SDValue N0 = N->getOperand(0);
17813 EVT VT = N->getValueType(0);
17814
17815 // fold (fabs c1) -> fabs(c1)
17817 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17818
17819 // fold (fabs (fabs x)) -> (fabs x)
17820 if (N0.getOpcode() == ISD::FABS)
17821 return N->getOperand(0);
17822
17823 // fold (fabs (fneg x)) -> (fabs x)
17824 // fold (fabs (fcopysign x, y)) -> (fabs x)
17825 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
17826 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
17827
17828 if (SDValue Cast = foldSignChangeInBitcast(N))
17829 return Cast;
17830
17831 return SDValue();
17832}
17833
17834SDValue DAGCombiner::visitBRCOND(SDNode *N) {
17835 SDValue Chain = N->getOperand(0);
17836 SDValue N1 = N->getOperand(1);
17837 SDValue N2 = N->getOperand(2);
17838
17839 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
17840 // nondeterministic jumps).
17841 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
17842 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17843 N1->getOperand(0), N2);
17844 }
17845
17846 // Variant of the previous fold where there is a SETCC in between:
17847 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
17848 // =>
17849 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
17850 // =>
17851 // BRCOND(SETCC(X, CONST, Cond))
17852 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
17853 // isn't equivalent to true or false.
17854 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
17855 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
17856 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
17857 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
17858 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
17859 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
17860 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
17861 bool Updated = false;
17862
17863 // Is 'X Cond C' always true or false?
17864 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
17865 bool False = (Cond == ISD::SETULT && C->isZero()) ||
17866 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
17867 (Cond == ISD::SETUGT && C->isAllOnes()) ||
17868 (Cond == ISD::SETGT && C->isMaxSignedValue());
17869 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
17870 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
17871 (Cond == ISD::SETUGE && C->isZero()) ||
17872 (Cond == ISD::SETGE && C->isMinSignedValue());
17873 return True || False;
17874 };
17875
17876 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
17877 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
17878 S0 = S0->getOperand(0);
17879 Updated = true;
17880 }
17881 }
17882 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
17883 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
17884 S1 = S1->getOperand(0);
17885 Updated = true;
17886 }
17887 }
17888
17889 if (Updated)
17890 return DAG.getNode(
17891 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17892 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
17893 }
17894
17895 // If N is a constant we could fold this into a fallthrough or unconditional
17896 // branch. However that doesn't happen very often in normal code, because
17897 // Instcombine/SimplifyCFG should have handled the available opportunities.
17898 // If we did this folding here, it would be necessary to update the
17899 // MachineBasicBlock CFG, which is awkward.
17900
17901 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
17902 // on the target.
17903 if (N1.getOpcode() == ISD::SETCC &&
17905 N1.getOperand(0).getValueType())) {
17906 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
17907 Chain, N1.getOperand(2),
17908 N1.getOperand(0), N1.getOperand(1), N2);
17909 }
17910
17911 if (N1.hasOneUse()) {
17912 // rebuildSetCC calls visitXor which may change the Chain when there is a
17913 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
17914 HandleSDNode ChainHandle(Chain);
17915 if (SDValue NewN1 = rebuildSetCC(N1))
17916 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
17917 ChainHandle.getValue(), NewN1, N2);
17918 }
17919
17920 return SDValue();
17921}
17922
17923SDValue DAGCombiner::rebuildSetCC(SDValue N) {
17924 if (N.getOpcode() == ISD::SRL ||
17925 (N.getOpcode() == ISD::TRUNCATE &&
17926 (N.getOperand(0).hasOneUse() &&
17927 N.getOperand(0).getOpcode() == ISD::SRL))) {
17928 // Look pass the truncate.
17929 if (N.getOpcode() == ISD::TRUNCATE)
17930 N = N.getOperand(0);
17931
17932 // Match this pattern so that we can generate simpler code:
17933 //
17934 // %a = ...
17935 // %b = and i32 %a, 2
17936 // %c = srl i32 %b, 1
17937 // brcond i32 %c ...
17938 //
17939 // into
17940 //
17941 // %a = ...
17942 // %b = and i32 %a, 2
17943 // %c = setcc eq %b, 0
17944 // brcond %c ...
17945 //
17946 // This applies only when the AND constant value has one bit set and the
17947 // SRL constant is equal to the log2 of the AND constant. The back-end is
17948 // smart enough to convert the result into a TEST/JMP sequence.
17949 SDValue Op0 = N.getOperand(0);
17950 SDValue Op1 = N.getOperand(1);
17951
17952 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
17953 SDValue AndOp1 = Op0.getOperand(1);
17954
17955 if (AndOp1.getOpcode() == ISD::Constant) {
17956 const APInt &AndConst = AndOp1->getAsAPIntVal();
17957
17958 if (AndConst.isPowerOf2() &&
17959 Op1->getAsAPIntVal() == AndConst.logBase2()) {
17960 SDLoc DL(N);
17961 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
17962 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
17963 ISD::SETNE);
17964 }
17965 }
17966 }
17967 }
17968
17969 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
17970 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
17971 if (N.getOpcode() == ISD::XOR) {
17972 // Because we may call this on a speculatively constructed
17973 // SimplifiedSetCC Node, we need to simplify this node first.
17974 // Ideally this should be folded into SimplifySetCC and not
17975 // here. For now, grab a handle to N so we don't lose it from
17976 // replacements interal to the visit.
17977 HandleSDNode XORHandle(N);
17978 while (N.getOpcode() == ISD::XOR) {
17979 SDValue Tmp = visitXOR(N.getNode());
17980 // No simplification done.
17981 if (!Tmp.getNode())
17982 break;
17983 // Returning N is form in-visit replacement that may invalidated
17984 // N. Grab value from Handle.
17985 if (Tmp.getNode() == N.getNode())
17986 N = XORHandle.getValue();
17987 else // Node simplified. Try simplifying again.
17988 N = Tmp;
17989 }
17990
17991 if (N.getOpcode() != ISD::XOR)
17992 return N;
17993
17994 SDValue Op0 = N->getOperand(0);
17995 SDValue Op1 = N->getOperand(1);
17996
17997 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
17998 bool Equal = false;
17999 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18000 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18001 Op0.getValueType() == MVT::i1) {
18002 N = Op0;
18003 Op0 = N->getOperand(0);
18004 Op1 = N->getOperand(1);
18005 Equal = true;
18006 }
18007
18008 EVT SetCCVT = N.getValueType();
18009 if (LegalTypes)
18010 SetCCVT = getSetCCResultType(SetCCVT);
18011 // Replace the uses of XOR with SETCC
18012 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18013 Equal ? ISD::SETEQ : ISD::SETNE);
18014 }
18015 }
18016
18017 return SDValue();
18018}
18019
18020// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18021//
18022SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18023 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18024 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18025
18026 // If N is a constant we could fold this into a fallthrough or unconditional
18027 // branch. However that doesn't happen very often in normal code, because
18028 // Instcombine/SimplifyCFG should have handled the available opportunities.
18029 // If we did this folding here, it would be necessary to update the
18030 // MachineBasicBlock CFG, which is awkward.
18031
18032 // Use SimplifySetCC to simplify SETCC's.
18034 CondLHS, CondRHS, CC->get(), SDLoc(N),
18035 false);
18036 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18037
18038 // fold to a simpler setcc
18039 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18040 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18041 N->getOperand(0), Simp.getOperand(2),
18042 Simp.getOperand(0), Simp.getOperand(1),
18043 N->getOperand(4));
18044
18045 return SDValue();
18046}
18047
18048static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18049 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18050 const TargetLowering &TLI) {
18051 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18052 if (LD->isIndexed())
18053 return false;
18054 EVT VT = LD->getMemoryVT();
18055 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18056 return false;
18057 Ptr = LD->getBasePtr();
18058 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18059 if (ST->isIndexed())
18060 return false;
18061 EVT VT = ST->getMemoryVT();
18062 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18063 return false;
18064 Ptr = ST->getBasePtr();
18065 IsLoad = false;
18066 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18067 if (LD->isIndexed())
18068 return false;
18069 EVT VT = LD->getMemoryVT();
18070 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18071 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18072 return false;
18073 Ptr = LD->getBasePtr();
18074 IsMasked = true;
18075 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18076 if (ST->isIndexed())
18077 return false;
18078 EVT VT = ST->getMemoryVT();
18079 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18080 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18081 return false;
18082 Ptr = ST->getBasePtr();
18083 IsLoad = false;
18084 IsMasked = true;
18085 } else {
18086 return false;
18087 }
18088 return true;
18089}
18090
18091/// Try turning a load/store into a pre-indexed load/store when the base
18092/// pointer is an add or subtract and it has other uses besides the load/store.
18093/// After the transformation, the new indexed load/store has effectively folded
18094/// the add/subtract in and all of its other uses are redirected to the
18095/// new load/store.
18096bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18097 if (Level < AfterLegalizeDAG)
18098 return false;
18099
18100 bool IsLoad = true;
18101 bool IsMasked = false;
18102 SDValue Ptr;
18103 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18104 Ptr, TLI))
18105 return false;
18106
18107 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18108 // out. There is no reason to make this a preinc/predec.
18109 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18110 Ptr->hasOneUse())
18111 return false;
18112
18113 // Ask the target to do addressing mode selection.
18117 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18118 return false;
18119
18120 // Backends without true r+i pre-indexed forms may need to pass a
18121 // constant base with a variable offset so that constant coercion
18122 // will work with the patterns in canonical form.
18123 bool Swapped = false;
18124 if (isa<ConstantSDNode>(BasePtr)) {
18125 std::swap(BasePtr, Offset);
18126 Swapped = true;
18127 }
18128
18129 // Don't create a indexed load / store with zero offset.
18131 return false;
18132
18133 // Try turning it into a pre-indexed load / store except when:
18134 // 1) The new base ptr is a frame index.
18135 // 2) If N is a store and the new base ptr is either the same as or is a
18136 // predecessor of the value being stored.
18137 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18138 // that would create a cycle.
18139 // 4) All uses are load / store ops that use it as old base ptr.
18140
18141 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18142 // (plus the implicit offset) to a register to preinc anyway.
18143 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18144 return false;
18145
18146 // Check #2.
18147 if (!IsLoad) {
18148 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18149 : cast<StoreSDNode>(N)->getValue();
18150
18151 // Would require a copy.
18152 if (Val == BasePtr)
18153 return false;
18154
18155 // Would create a cycle.
18156 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18157 return false;
18158 }
18159
18160 // Caches for hasPredecessorHelper.
18163 Worklist.push_back(N);
18164
18165 // If the offset is a constant, there may be other adds of constants that
18166 // can be folded with this one. We should do this to avoid having to keep
18167 // a copy of the original base pointer.
18168 SmallVector<SDNode *, 16> OtherUses;
18169 constexpr unsigned int MaxSteps = 8192;
18170 if (isa<ConstantSDNode>(Offset))
18171 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18172 UE = BasePtr->use_end();
18173 UI != UE; ++UI) {
18174 SDUse &Use = UI.getUse();
18175 // Skip the use that is Ptr and uses of other results from BasePtr's
18176 // node (important for nodes that return multiple results).
18177 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18178 continue;
18179
18180 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18181 MaxSteps))
18182 continue;
18183
18184 if (Use.getUser()->getOpcode() != ISD::ADD &&
18185 Use.getUser()->getOpcode() != ISD::SUB) {
18186 OtherUses.clear();
18187 break;
18188 }
18189
18190 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18191 if (!isa<ConstantSDNode>(Op1)) {
18192 OtherUses.clear();
18193 break;
18194 }
18195
18196 // FIXME: In some cases, we can be smarter about this.
18197 if (Op1.getValueType() != Offset.getValueType()) {
18198 OtherUses.clear();
18199 break;
18200 }
18201
18202 OtherUses.push_back(Use.getUser());
18203 }
18204
18205 if (Swapped)
18206 std::swap(BasePtr, Offset);
18207
18208 // Now check for #3 and #4.
18209 bool RealUse = false;
18210
18211 for (SDNode *Use : Ptr->uses()) {
18212 if (Use == N)
18213 continue;
18214 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18215 return false;
18216
18217 // If Ptr may be folded in addressing mode of other use, then it's
18218 // not profitable to do this transformation.
18219 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18220 RealUse = true;
18221 }
18222
18223 if (!RealUse)
18224 return false;
18225
18227 if (!IsMasked) {
18228 if (IsLoad)
18229 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18230 else
18231 Result =
18232 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18233 } else {
18234 if (IsLoad)
18235 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18236 Offset, AM);
18237 else
18238 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18239 Offset, AM);
18240 }
18241 ++PreIndexedNodes;
18242 ++NodesCombined;
18243 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18244 Result.dump(&DAG); dbgs() << '\n');
18245 WorklistRemover DeadNodes(*this);
18246 if (IsLoad) {
18247 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18248 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18249 } else {
18250 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18251 }
18252
18253 // Finally, since the node is now dead, remove it from the graph.
18254 deleteAndRecombine(N);
18255
18256 if (Swapped)
18257 std::swap(BasePtr, Offset);
18258
18259 // Replace other uses of BasePtr that can be updated to use Ptr
18260 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18261 unsigned OffsetIdx = 1;
18262 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18263 OffsetIdx = 0;
18264 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18265 BasePtr.getNode() && "Expected BasePtr operand");
18266
18267 // We need to replace ptr0 in the following expression:
18268 // x0 * offset0 + y0 * ptr0 = t0
18269 // knowing that
18270 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18271 //
18272 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18273 // indexed load/store and the expression that needs to be re-written.
18274 //
18275 // Therefore, we have:
18276 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18277
18278 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18279 const APInt &Offset0 = CN->getAPIntValue();
18280 const APInt &Offset1 = Offset->getAsAPIntVal();
18281 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18282 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18283 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18284 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18285
18286 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18287
18288 APInt CNV = Offset0;
18289 if (X0 < 0) CNV = -CNV;
18290 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18291 else CNV = CNV - Offset1;
18292
18293 SDLoc DL(OtherUses[i]);
18294
18295 // We can now generate the new expression.
18296 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18297 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18298
18299 SDValue NewUse = DAG.getNode(Opcode,
18300 DL,
18301 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18302 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18303 deleteAndRecombine(OtherUses[i]);
18304 }
18305
18306 // Replace the uses of Ptr with uses of the updated base value.
18307 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18308 deleteAndRecombine(Ptr.getNode());
18309 AddToWorklist(Result.getNode());
18310
18311 return true;
18312}
18313
18315 SDValue &BasePtr, SDValue &Offset,
18317 SelectionDAG &DAG,
18318 const TargetLowering &TLI) {
18319 if (PtrUse == N ||
18320 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18321 return false;
18322
18323 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18324 return false;
18325
18326 // Don't create a indexed load / store with zero offset.
18328 return false;
18329
18330 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18331 return false;
18332
18334 for (SDNode *Use : BasePtr->uses()) {
18335 if (Use == Ptr.getNode())
18336 continue;
18337
18338 // No if there's a later user which could perform the index instead.
18339 if (isa<MemSDNode>(Use)) {
18340 bool IsLoad = true;
18341 bool IsMasked = false;
18342 SDValue OtherPtr;
18344 IsMasked, OtherPtr, TLI)) {
18346 Worklist.push_back(Use);
18347 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18348 return false;
18349 }
18350 }
18351
18352 // If all the uses are load / store addresses, then don't do the
18353 // transformation.
18354 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18355 for (SDNode *UseUse : Use->uses())
18356 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18357 return false;
18358 }
18359 }
18360 return true;
18361}
18362
18364 bool &IsMasked, SDValue &Ptr,
18365 SDValue &BasePtr, SDValue &Offset,
18367 SelectionDAG &DAG,
18368 const TargetLowering &TLI) {
18370 IsMasked, Ptr, TLI) ||
18371 Ptr->hasOneUse())
18372 return nullptr;
18373
18374 // Try turning it into a post-indexed load / store except when
18375 // 1) All uses are load / store ops that use it as base ptr (and
18376 // it may be folded as addressing mmode).
18377 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18378 // nor a successor of N. Otherwise, if Op is folded that would
18379 // create a cycle.
18380 for (SDNode *Op : Ptr->uses()) {
18381 // Check for #1.
18382 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18383 continue;
18384
18385 // Check for #2.
18388 constexpr unsigned int MaxSteps = 8192;
18389 // Ptr is predecessor to both N and Op.
18390 Visited.insert(Ptr.getNode());
18391 Worklist.push_back(N);
18392 Worklist.push_back(Op);
18393 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18394 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18395 return Op;
18396 }
18397 return nullptr;
18398}
18399
18400/// Try to combine a load/store with a add/sub of the base pointer node into a
18401/// post-indexed load/store. The transformation folded the add/subtract into the
18402/// new indexed load/store effectively and all of its uses are redirected to the
18403/// new load/store.
18404bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18405 if (Level < AfterLegalizeDAG)
18406 return false;
18407
18408 bool IsLoad = true;
18409 bool IsMasked = false;
18410 SDValue Ptr;
18414 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18415 Offset, AM, DAG, TLI);
18416 if (!Op)
18417 return false;
18418
18420 if (!IsMasked)
18421 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18422 Offset, AM)
18423 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18424 BasePtr, Offset, AM);
18425 else
18426 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18427 BasePtr, Offset, AM)
18429 BasePtr, Offset, AM);
18430 ++PostIndexedNodes;
18431 ++NodesCombined;
18432 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18433 Result.dump(&DAG); dbgs() << '\n');
18434 WorklistRemover DeadNodes(*this);
18435 if (IsLoad) {
18436 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18437 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18438 } else {
18439 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18440 }
18441
18442 // Finally, since the node is now dead, remove it from the graph.
18443 deleteAndRecombine(N);
18444
18445 // Replace the uses of Use with uses of the updated base value.
18447 Result.getValue(IsLoad ? 1 : 0));
18448 deleteAndRecombine(Op);
18449 return true;
18450}
18451
18452/// Return the base-pointer arithmetic from an indexed \p LD.
18453SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18454 ISD::MemIndexedMode AM = LD->getAddressingMode();
18455 assert(AM != ISD::UNINDEXED);
18456 SDValue BP = LD->getOperand(1);
18457 SDValue Inc = LD->getOperand(2);
18458
18459 // Some backends use TargetConstants for load offsets, but don't expect
18460 // TargetConstants in general ADD nodes. We can convert these constants into
18461 // regular Constants (if the constant is not opaque).
18463 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18464 "Cannot split out indexing using opaque target constants");
18465 if (Inc.getOpcode() == ISD::TargetConstant) {
18466 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18467 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18468 ConstInc->getValueType(0));
18469 }
18470
18471 unsigned Opc =
18472 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18473 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18474}
18475
18477 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18478}
18479
18480bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18481 EVT STType = Val.getValueType();
18482 EVT STMemType = ST->getMemoryVT();
18483 if (STType == STMemType)
18484 return true;
18485 if (isTypeLegal(STMemType))
18486 return false; // fail.
18487 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18488 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18489 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18490 return true;
18491 }
18492 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18493 STType.isInteger() && STMemType.isInteger()) {
18494 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18495 return true;
18496 }
18497 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18498 Val = DAG.getBitcast(STMemType, Val);
18499 return true;
18500 }
18501 return false; // fail.
18502}
18503
18504bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18505 EVT LDMemType = LD->getMemoryVT();
18506 EVT LDType = LD->getValueType(0);
18507 assert(Val.getValueType() == LDMemType &&
18508 "Attempting to extend value of non-matching type");
18509 if (LDType == LDMemType)
18510 return true;
18511 if (LDMemType.isInteger() && LDType.isInteger()) {
18512 switch (LD->getExtensionType()) {
18513 case ISD::NON_EXTLOAD:
18514 Val = DAG.getBitcast(LDType, Val);
18515 return true;
18516 case ISD::EXTLOAD:
18517 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18518 return true;
18519 case ISD::SEXTLOAD:
18520 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18521 return true;
18522 case ISD::ZEXTLOAD:
18523 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18524 return true;
18525 }
18526 }
18527 return false;
18528}
18529
18530StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18531 int64_t &Offset) {
18532 SDValue Chain = LD->getOperand(0);
18533
18534 // Look through CALLSEQ_START.
18535 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18536 Chain = Chain->getOperand(0);
18537
18538 StoreSDNode *ST = nullptr;
18540 if (Chain.getOpcode() == ISD::TokenFactor) {
18541 // Look for unique store within the TokenFactor.
18542 for (SDValue Op : Chain->ops()) {
18543 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18544 if (!Store)
18545 continue;
18546 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18547 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18548 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18549 continue;
18550 // Make sure the store is not aliased with any nodes in TokenFactor.
18551 GatherAllAliases(Store, Chain, Aliases);
18552 if (Aliases.empty() ||
18553 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18554 ST = Store;
18555 break;
18556 }
18557 } else {
18558 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18559 if (Store) {
18560 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18561 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18562 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18563 ST = Store;
18564 }
18565 }
18566
18567 return ST;
18568}
18569
18570SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18571 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18572 return SDValue();
18573 SDValue Chain = LD->getOperand(0);
18574 int64_t Offset;
18575
18576 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18577 // TODO: Relax this restriction for unordered atomics (see D66309)
18578 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18579 return SDValue();
18580
18581 EVT LDType = LD->getValueType(0);
18582 EVT LDMemType = LD->getMemoryVT();
18583 EVT STMemType = ST->getMemoryVT();
18584 EVT STType = ST->getValue().getValueType();
18585
18586 // There are two cases to consider here:
18587 // 1. The store is fixed width and the load is scalable. In this case we
18588 // don't know at compile time if the store completely envelops the load
18589 // so we abandon the optimisation.
18590 // 2. The store is scalable and the load is fixed width. We could
18591 // potentially support a limited number of cases here, but there has been
18592 // no cost-benefit analysis to prove it's worth it.
18593 bool LdStScalable = LDMemType.isScalableVT();
18594 if (LdStScalable != STMemType.isScalableVT())
18595 return SDValue();
18596
18597 // If we are dealing with scalable vectors on a big endian platform the
18598 // calculation of offsets below becomes trickier, since we do not know at
18599 // compile time the absolute size of the vector. Until we've done more
18600 // analysis on big-endian platforms it seems better to bail out for now.
18601 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18602 return SDValue();
18603
18604 // Normalize for Endianness. After this Offset=0 will denote that the least
18605 // significant bit in the loaded value maps to the least significant bit in
18606 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18607 // n:th least significant byte of the stored value.
18608 int64_t OrigOffset = Offset;
18609 if (DAG.getDataLayout().isBigEndian())
18610 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18611 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18612 8 -
18613 Offset;
18614
18615 // Check that the stored value cover all bits that are loaded.
18616 bool STCoversLD;
18617
18618 TypeSize LdMemSize = LDMemType.getSizeInBits();
18619 TypeSize StMemSize = STMemType.getSizeInBits();
18620 if (LdStScalable)
18621 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18622 else
18623 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18624 StMemSize.getFixedValue());
18625
18626 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18627 if (LD->isIndexed()) {
18628 // Cannot handle opaque target constants and we must respect the user's
18629 // request not to split indexes from loads.
18630 if (!canSplitIdx(LD))
18631 return SDValue();
18632 SDValue Idx = SplitIndexingFromLoad(LD);
18633 SDValue Ops[] = {Val, Idx, Chain};
18634 return CombineTo(LD, Ops, 3);
18635 }
18636 return CombineTo(LD, Val, Chain);
18637 };
18638
18639 if (!STCoversLD)
18640 return SDValue();
18641
18642 // Memory as copy space (potentially masked).
18643 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18644 // Simple case: Direct non-truncating forwarding
18645 if (LDType.getSizeInBits() == LdMemSize)
18646 return ReplaceLd(LD, ST->getValue(), Chain);
18647 // Can we model the truncate and extension with an and mask?
18648 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18649 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18650 // Mask to size of LDMemType
18651 auto Mask =
18653 StMemSize.getFixedValue()),
18654 SDLoc(ST), STType);
18655 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18656 return ReplaceLd(LD, Val, Chain);
18657 }
18658 }
18659
18660 // Handle some cases for big-endian that would be Offset 0 and handled for
18661 // little-endian.
18662 SDValue Val = ST->getValue();
18663 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18664 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18665 !LDType.isVector() && isTypeLegal(STType) &&
18666 TLI.isOperationLegal(ISD::SRL, STType)) {
18667 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18668 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18669 Offset = 0;
18670 }
18671 }
18672
18673 // TODO: Deal with nonzero offset.
18674 if (LD->getBasePtr().isUndef() || Offset != 0)
18675 return SDValue();
18676 // Model necessary truncations / extenstions.
18677 // Truncate Value To Stored Memory Size.
18678 do {
18679 if (!getTruncatedStoreValue(ST, Val))
18680 continue;
18681 if (!isTypeLegal(LDMemType))
18682 continue;
18683 if (STMemType != LDMemType) {
18684 // TODO: Support vectors? This requires extract_subvector/bitcast.
18685 if (!STMemType.isVector() && !LDMemType.isVector() &&
18686 STMemType.isInteger() && LDMemType.isInteger())
18687 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18688 else
18689 continue;
18690 }
18691 if (!extendLoadedValueToExtension(LD, Val))
18692 continue;
18693 return ReplaceLd(LD, Val, Chain);
18694 } while (false);
18695
18696 // On failure, cleanup dead nodes we may have created.
18697 if (Val->use_empty())
18698 deleteAndRecombine(Val.getNode());
18699 return SDValue();
18700}
18701
18702SDValue DAGCombiner::visitLOAD(SDNode *N) {
18703 LoadSDNode *LD = cast<LoadSDNode>(N);
18704 SDValue Chain = LD->getChain();
18705 SDValue Ptr = LD->getBasePtr();
18706
18707 // If load is not volatile and there are no uses of the loaded value (and
18708 // the updated indexed value in case of indexed loads), change uses of the
18709 // chain value into uses of the chain input (i.e. delete the dead load).
18710 // TODO: Allow this for unordered atomics (see D66309)
18711 if (LD->isSimple()) {
18712 if (N->getValueType(1) == MVT::Other) {
18713 // Unindexed loads.
18714 if (!N->hasAnyUseOfValue(0)) {
18715 // It's not safe to use the two value CombineTo variant here. e.g.
18716 // v1, chain2 = load chain1, loc
18717 // v2, chain3 = load chain2, loc
18718 // v3 = add v2, c
18719 // Now we replace use of chain2 with chain1. This makes the second load
18720 // isomorphic to the one we are deleting, and thus makes this load live.
18721 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18722 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18723 dbgs() << "\n");
18724 WorklistRemover DeadNodes(*this);
18725 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18726 AddUsersToWorklist(Chain.getNode());
18727 if (N->use_empty())
18728 deleteAndRecombine(N);
18729
18730 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18731 }
18732 } else {
18733 // Indexed loads.
18734 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18735
18736 // If this load has an opaque TargetConstant offset, then we cannot split
18737 // the indexing into an add/sub directly (that TargetConstant may not be
18738 // valid for a different type of node, and we cannot convert an opaque
18739 // target constant into a regular constant).
18740 bool CanSplitIdx = canSplitIdx(LD);
18741
18742 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18743 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18744 SDValue Index;
18745 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18746 Index = SplitIndexingFromLoad(LD);
18747 // Try to fold the base pointer arithmetic into subsequent loads and
18748 // stores.
18749 AddUsersToWorklist(N);
18750 } else
18751 Index = DAG.getUNDEF(N->getValueType(1));
18752 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18753 dbgs() << "\nWith: "; Undef.dump(&DAG);
18754 dbgs() << " and 2 other values\n");
18755 WorklistRemover DeadNodes(*this);
18756 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18758 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18759 deleteAndRecombine(N);
18760 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18761 }
18762 }
18763 }
18764
18765 // If this load is directly stored, replace the load value with the stored
18766 // value.
18767 if (auto V = ForwardStoreValueToDirectLoad(LD))
18768 return V;
18769
18770 // Try to infer better alignment information than the load already has.
18771 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
18772 !LD->isAtomic()) {
18773 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18774 if (*Alignment > LD->getAlign() &&
18775 isAligned(*Alignment, LD->getSrcValueOffset())) {
18776 SDValue NewLoad = DAG.getExtLoad(
18777 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
18778 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
18779 LD->getMemOperand()->getFlags(), LD->getAAInfo());
18780 // NewLoad will always be N as we are only refining the alignment
18781 assert(NewLoad.getNode() == N);
18782 (void)NewLoad;
18783 }
18784 }
18785 }
18786
18787 if (LD->isUnindexed()) {
18788 // Walk up chain skipping non-aliasing memory nodes.
18789 SDValue BetterChain = FindBetterChain(LD, Chain);
18790
18791 // If there is a better chain.
18792 if (Chain != BetterChain) {
18793 SDValue ReplLoad;
18794
18795 // Replace the chain to void dependency.
18796 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
18797 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
18798 BetterChain, Ptr, LD->getMemOperand());
18799 } else {
18800 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
18801 LD->getValueType(0),
18802 BetterChain, Ptr, LD->getMemoryVT(),
18803 LD->getMemOperand());
18804 }
18805
18806 // Create token factor to keep old chain connected.
18807 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
18808 MVT::Other, Chain, ReplLoad.getValue(1));
18809
18810 // Replace uses with load result and token factor
18811 return CombineTo(N, ReplLoad.getValue(0), Token);
18812 }
18813 }
18814
18815 // Try transforming N to an indexed load.
18816 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18817 return SDValue(N, 0);
18818
18819 // Try to slice up N to more direct loads if the slices are mapped to
18820 // different register banks or pairing can take place.
18821 if (SliceUpLoad(N))
18822 return SDValue(N, 0);
18823
18824 return SDValue();
18825}
18826
18827namespace {
18828
18829/// Helper structure used to slice a load in smaller loads.
18830/// Basically a slice is obtained from the following sequence:
18831/// Origin = load Ty1, Base
18832/// Shift = srl Ty1 Origin, CstTy Amount
18833/// Inst = trunc Shift to Ty2
18834///
18835/// Then, it will be rewritten into:
18836/// Slice = load SliceTy, Base + SliceOffset
18837/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
18838///
18839/// SliceTy is deduced from the number of bits that are actually used to
18840/// build Inst.
18841struct LoadedSlice {
18842 /// Helper structure used to compute the cost of a slice.
18843 struct Cost {
18844 /// Are we optimizing for code size.
18845 bool ForCodeSize = false;
18846
18847 /// Various cost.
18848 unsigned Loads = 0;
18849 unsigned Truncates = 0;
18850 unsigned CrossRegisterBanksCopies = 0;
18851 unsigned ZExts = 0;
18852 unsigned Shift = 0;
18853
18854 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
18855
18856 /// Get the cost of one isolated slice.
18857 Cost(const LoadedSlice &LS, bool ForCodeSize)
18858 : ForCodeSize(ForCodeSize), Loads(1) {
18859 EVT TruncType = LS.Inst->getValueType(0);
18860 EVT LoadedType = LS.getLoadedType();
18861 if (TruncType != LoadedType &&
18862 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
18863 ZExts = 1;
18864 }
18865
18866 /// Account for slicing gain in the current cost.
18867 /// Slicing provide a few gains like removing a shift or a
18868 /// truncate. This method allows to grow the cost of the original
18869 /// load with the gain from this slice.
18870 void addSliceGain(const LoadedSlice &LS) {
18871 // Each slice saves a truncate.
18872 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
18873 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
18874 ++Truncates;
18875 // If there is a shift amount, this slice gets rid of it.
18876 if (LS.Shift)
18877 ++Shift;
18878 // If this slice can merge a cross register bank copy, account for it.
18879 if (LS.canMergeExpensiveCrossRegisterBankCopy())
18880 ++CrossRegisterBanksCopies;
18881 }
18882
18883 Cost &operator+=(const Cost &RHS) {
18884 Loads += RHS.Loads;
18885 Truncates += RHS.Truncates;
18886 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
18887 ZExts += RHS.ZExts;
18888 Shift += RHS.Shift;
18889 return *this;
18890 }
18891
18892 bool operator==(const Cost &RHS) const {
18893 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
18894 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
18895 ZExts == RHS.ZExts && Shift == RHS.Shift;
18896 }
18897
18898 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
18899
18900 bool operator<(const Cost &RHS) const {
18901 // Assume cross register banks copies are as expensive as loads.
18902 // FIXME: Do we want some more target hooks?
18903 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
18904 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
18905 // Unless we are optimizing for code size, consider the
18906 // expensive operation first.
18907 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
18908 return ExpensiveOpsLHS < ExpensiveOpsRHS;
18909 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
18910 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
18911 }
18912
18913 bool operator>(const Cost &RHS) const { return RHS < *this; }
18914
18915 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
18916
18917 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
18918 };
18919
18920 // The last instruction that represent the slice. This should be a
18921 // truncate instruction.
18922 SDNode *Inst;
18923
18924 // The original load instruction.
18925 LoadSDNode *Origin;
18926
18927 // The right shift amount in bits from the original load.
18928 unsigned Shift;
18929
18930 // The DAG from which Origin came from.
18931 // This is used to get some contextual information about legal types, etc.
18932 SelectionDAG *DAG;
18933
18934 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
18935 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
18936 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
18937
18938 /// Get the bits used in a chunk of bits \p BitWidth large.
18939 /// \return Result is \p BitWidth and has used bits set to 1 and
18940 /// not used bits set to 0.
18941 APInt getUsedBits() const {
18942 // Reproduce the trunc(lshr) sequence:
18943 // - Start from the truncated value.
18944 // - Zero extend to the desired bit width.
18945 // - Shift left.
18946 assert(Origin && "No original load to compare against.");
18947 unsigned BitWidth = Origin->getValueSizeInBits(0);
18948 assert(Inst && "This slice is not bound to an instruction");
18949 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
18950 "Extracted slice is bigger than the whole type!");
18951 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
18952 UsedBits.setAllBits();
18953 UsedBits = UsedBits.zext(BitWidth);
18954 UsedBits <<= Shift;
18955 return UsedBits;
18956 }
18957
18958 /// Get the size of the slice to be loaded in bytes.
18959 unsigned getLoadedSize() const {
18960 unsigned SliceSize = getUsedBits().popcount();
18961 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
18962 return SliceSize / 8;
18963 }
18964
18965 /// Get the type that will be loaded for this slice.
18966 /// Note: This may not be the final type for the slice.
18967 EVT getLoadedType() const {
18968 assert(DAG && "Missing context");
18969 LLVMContext &Ctxt = *DAG->getContext();
18970 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
18971 }
18972
18973 /// Get the alignment of the load used for this slice.
18974 Align getAlign() const {
18975 Align Alignment = Origin->getAlign();
18976 uint64_t Offset = getOffsetFromBase();
18977 if (Offset != 0)
18978 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
18979 return Alignment;
18980 }
18981
18982 /// Check if this slice can be rewritten with legal operations.
18983 bool isLegal() const {
18984 // An invalid slice is not legal.
18985 if (!Origin || !Inst || !DAG)
18986 return false;
18987
18988 // Offsets are for indexed load only, we do not handle that.
18989 if (!Origin->getOffset().isUndef())
18990 return false;
18991
18992 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
18993
18994 // Check that the type is legal.
18995 EVT SliceType = getLoadedType();
18996 if (!TLI.isTypeLegal(SliceType))
18997 return false;
18998
18999 // Check that the load is legal for this type.
19000 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19001 return false;
19002
19003 // Check that the offset can be computed.
19004 // 1. Check its type.
19005 EVT PtrType = Origin->getBasePtr().getValueType();
19006 if (PtrType == MVT::Untyped || PtrType.isExtended())
19007 return false;
19008
19009 // 2. Check that it fits in the immediate.
19010 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19011 return false;
19012
19013 // 3. Check that the computation is legal.
19014 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19015 return false;
19016
19017 // Check that the zext is legal if it needs one.
19018 EVT TruncateType = Inst->getValueType(0);
19019 if (TruncateType != SliceType &&
19020 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19021 return false;
19022
19023 return true;
19024 }
19025
19026 /// Get the offset in bytes of this slice in the original chunk of
19027 /// bits.
19028 /// \pre DAG != nullptr.
19029 uint64_t getOffsetFromBase() const {
19030 assert(DAG && "Missing context.");
19031 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19032 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19033 uint64_t Offset = Shift / 8;
19034 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19035 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19036 "The size of the original loaded type is not a multiple of a"
19037 " byte.");
19038 // If Offset is bigger than TySizeInBytes, it means we are loading all
19039 // zeros. This should have been optimized before in the process.
19040 assert(TySizeInBytes > Offset &&
19041 "Invalid shift amount for given loaded size");
19042 if (IsBigEndian)
19043 Offset = TySizeInBytes - Offset - getLoadedSize();
19044 return Offset;
19045 }
19046
19047 /// Generate the sequence of instructions to load the slice
19048 /// represented by this object and redirect the uses of this slice to
19049 /// this new sequence of instructions.
19050 /// \pre this->Inst && this->Origin are valid Instructions and this
19051 /// object passed the legal check: LoadedSlice::isLegal returned true.
19052 /// \return The last instruction of the sequence used to load the slice.
19053 SDValue loadSlice() const {
19054 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19055 const SDValue &OldBaseAddr = Origin->getBasePtr();
19056 SDValue BaseAddr = OldBaseAddr;
19057 // Get the offset in that chunk of bytes w.r.t. the endianness.
19058 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19059 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19060 if (Offset) {
19061 // BaseAddr = BaseAddr + Offset.
19062 EVT ArithType = BaseAddr.getValueType();
19063 SDLoc DL(Origin);
19064 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19065 DAG->getConstant(Offset, DL, ArithType));
19066 }
19067
19068 // Create the type of the loaded slice according to its size.
19069 EVT SliceType = getLoadedType();
19070
19071 // Create the load for the slice.
19072 SDValue LastInst =
19073 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19075 Origin->getMemOperand()->getFlags());
19076 // If the final type is not the same as the loaded type, this means that
19077 // we have to pad with zero. Create a zero extend for that.
19078 EVT FinalType = Inst->getValueType(0);
19079 if (SliceType != FinalType)
19080 LastInst =
19081 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19082 return LastInst;
19083 }
19084
19085 /// Check if this slice can be merged with an expensive cross register
19086 /// bank copy. E.g.,
19087 /// i = load i32
19088 /// f = bitcast i32 i to float
19089 bool canMergeExpensiveCrossRegisterBankCopy() const {
19090 if (!Inst || !Inst->hasOneUse())
19091 return false;
19092 SDNode *Use = *Inst->use_begin();
19093 if (Use->getOpcode() != ISD::BITCAST)
19094 return false;
19095 assert(DAG && "Missing context");
19096 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19097 EVT ResVT = Use->getValueType(0);
19098 const TargetRegisterClass *ResRC =
19099 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19100 const TargetRegisterClass *ArgRC =
19101 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19102 Use->getOperand(0)->isDivergent());
19103 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19104 return false;
19105
19106 // At this point, we know that we perform a cross-register-bank copy.
19107 // Check if it is expensive.
19109 // Assume bitcasts are cheap, unless both register classes do not
19110 // explicitly share a common sub class.
19111 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19112 return false;
19113
19114 // Check if it will be merged with the load.
19115 // 1. Check the alignment / fast memory access constraint.
19116 unsigned IsFast = 0;
19117 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19118 Origin->getAddressSpace(), getAlign(),
19119 Origin->getMemOperand()->getFlags(), &IsFast) ||
19120 !IsFast)
19121 return false;
19122
19123 // 2. Check that the load is a legal operation for that type.
19124 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19125 return false;
19126
19127 // 3. Check that we do not have a zext in the way.
19128 if (Inst->getValueType(0) != getLoadedType())
19129 return false;
19130
19131 return true;
19132 }
19133};
19134
19135} // end anonymous namespace
19136
19137/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19138/// \p UsedBits looks like 0..0 1..1 0..0.
19139static bool areUsedBitsDense(const APInt &UsedBits) {
19140 // If all the bits are one, this is dense!
19141 if (UsedBits.isAllOnes())
19142 return true;
19143
19144 // Get rid of the unused bits on the right.
19145 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19146 // Get rid of the unused bits on the left.
19147 if (NarrowedUsedBits.countl_zero())
19148 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19149 // Check that the chunk of bits is completely used.
19150 return NarrowedUsedBits.isAllOnes();
19151}
19152
19153/// Check whether or not \p First and \p Second are next to each other
19154/// in memory. This means that there is no hole between the bits loaded
19155/// by \p First and the bits loaded by \p Second.
19156static bool areSlicesNextToEachOther(const LoadedSlice &First,
19157 const LoadedSlice &Second) {
19158 assert(First.Origin == Second.Origin && First.Origin &&
19159 "Unable to match different memory origins.");
19160 APInt UsedBits = First.getUsedBits();
19161 assert((UsedBits & Second.getUsedBits()) == 0 &&
19162 "Slices are not supposed to overlap.");
19163 UsedBits |= Second.getUsedBits();
19164 return areUsedBitsDense(UsedBits);
19165}
19166
19167/// Adjust the \p GlobalLSCost according to the target
19168/// paring capabilities and the layout of the slices.
19169/// \pre \p GlobalLSCost should account for at least as many loads as
19170/// there is in the slices in \p LoadedSlices.
19172 LoadedSlice::Cost &GlobalLSCost) {
19173 unsigned NumberOfSlices = LoadedSlices.size();
19174 // If there is less than 2 elements, no pairing is possible.
19175 if (NumberOfSlices < 2)
19176 return;
19177
19178 // Sort the slices so that elements that are likely to be next to each
19179 // other in memory are next to each other in the list.
19180 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19181 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19182 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19183 });
19184 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19185 // First (resp. Second) is the first (resp. Second) potentially candidate
19186 // to be placed in a paired load.
19187 const LoadedSlice *First = nullptr;
19188 const LoadedSlice *Second = nullptr;
19189 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19190 // Set the beginning of the pair.
19191 First = Second) {
19192 Second = &LoadedSlices[CurrSlice];
19193
19194 // If First is NULL, it means we start a new pair.
19195 // Get to the next slice.
19196 if (!First)
19197 continue;
19198
19199 EVT LoadedType = First->getLoadedType();
19200
19201 // If the types of the slices are different, we cannot pair them.
19202 if (LoadedType != Second->getLoadedType())
19203 continue;
19204
19205 // Check if the target supplies paired loads for this type.
19206 Align RequiredAlignment;
19207 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19208 // move to the next pair, this type is hopeless.
19209 Second = nullptr;
19210 continue;
19211 }
19212 // Check if we meet the alignment requirement.
19213 if (First->getAlign() < RequiredAlignment)
19214 continue;
19215
19216 // Check that both loads are next to each other in memory.
19217 if (!areSlicesNextToEachOther(*First, *Second))
19218 continue;
19219
19220 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19221 --GlobalLSCost.Loads;
19222 // Move to the next pair.
19223 Second = nullptr;
19224 }
19225}
19226
19227/// Check the profitability of all involved LoadedSlice.
19228/// Currently, it is considered profitable if there is exactly two
19229/// involved slices (1) which are (2) next to each other in memory, and
19230/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19231///
19232/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19233/// the elements themselves.
19234///
19235/// FIXME: When the cost model will be mature enough, we can relax
19236/// constraints (1) and (2).
19238 const APInt &UsedBits, bool ForCodeSize) {
19239 unsigned NumberOfSlices = LoadedSlices.size();
19241 return NumberOfSlices > 1;
19242
19243 // Check (1).
19244 if (NumberOfSlices != 2)
19245 return false;
19246
19247 // Check (2).
19248 if (!areUsedBitsDense(UsedBits))
19249 return false;
19250
19251 // Check (3).
19252 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19253 // The original code has one big load.
19254 OrigCost.Loads = 1;
19255 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19256 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19257 // Accumulate the cost of all the slices.
19258 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19259 GlobalSlicingCost += SliceCost;
19260
19261 // Account as cost in the original configuration the gain obtained
19262 // with the current slices.
19263 OrigCost.addSliceGain(LS);
19264 }
19265
19266 // If the target supports paired load, adjust the cost accordingly.
19267 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19268 return OrigCost > GlobalSlicingCost;
19269}
19270
19271/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19272/// operations, split it in the various pieces being extracted.
19273///
19274/// This sort of thing is introduced by SROA.
19275/// This slicing takes care not to insert overlapping loads.
19276/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19277bool DAGCombiner::SliceUpLoad(SDNode *N) {
19278 if (Level < AfterLegalizeDAG)
19279 return false;
19280
19281 LoadSDNode *LD = cast<LoadSDNode>(N);
19282 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19283 !LD->getValueType(0).isInteger())
19284 return false;
19285
19286 // The algorithm to split up a load of a scalable vector into individual
19287 // elements currently requires knowing the length of the loaded type,
19288 // so will need adjusting to work on scalable vectors.
19289 if (LD->getValueType(0).isScalableVector())
19290 return false;
19291
19292 // Keep track of already used bits to detect overlapping values.
19293 // In that case, we will just abort the transformation.
19294 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19295
19296 SmallVector<LoadedSlice, 4> LoadedSlices;
19297
19298 // Check if this load is used as several smaller chunks of bits.
19299 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19300 // of computation for each trunc.
19301 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19302 UI != UIEnd; ++UI) {
19303 // Skip the uses of the chain.
19304 if (UI.getUse().getResNo() != 0)
19305 continue;
19306
19307 SDNode *User = *UI;
19308 unsigned Shift = 0;
19309
19310 // Check if this is a trunc(lshr).
19311 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19312 isa<ConstantSDNode>(User->getOperand(1))) {
19313 Shift = User->getConstantOperandVal(1);
19314 User = *User->use_begin();
19315 }
19316
19317 // At this point, User is a Truncate, iff we encountered, trunc or
19318 // trunc(lshr).
19319 if (User->getOpcode() != ISD::TRUNCATE)
19320 return false;
19321
19322 // The width of the type must be a power of 2 and greater than 8-bits.
19323 // Otherwise the load cannot be represented in LLVM IR.
19324 // Moreover, if we shifted with a non-8-bits multiple, the slice
19325 // will be across several bytes. We do not support that.
19326 unsigned Width = User->getValueSizeInBits(0);
19327 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19328 return false;
19329
19330 // Build the slice for this chain of computations.
19331 LoadedSlice LS(User, LD, Shift, &DAG);
19332 APInt CurrentUsedBits = LS.getUsedBits();
19333
19334 // Check if this slice overlaps with another.
19335 if ((CurrentUsedBits & UsedBits) != 0)
19336 return false;
19337 // Update the bits used globally.
19338 UsedBits |= CurrentUsedBits;
19339
19340 // Check if the new slice would be legal.
19341 if (!LS.isLegal())
19342 return false;
19343
19344 // Record the slice.
19345 LoadedSlices.push_back(LS);
19346 }
19347
19348 // Abort slicing if it does not seem to be profitable.
19349 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19350 return false;
19351
19352 ++SlicedLoads;
19353
19354 // Rewrite each chain to use an independent load.
19355 // By construction, each chain can be represented by a unique load.
19356
19357 // Prepare the argument for the new token factor for all the slices.
19358 SmallVector<SDValue, 8> ArgChains;
19359 for (const LoadedSlice &LS : LoadedSlices) {
19360 SDValue SliceInst = LS.loadSlice();
19361 CombineTo(LS.Inst, SliceInst, true);
19362 if (SliceInst.getOpcode() != ISD::LOAD)
19363 SliceInst = SliceInst.getOperand(0);
19364 assert(SliceInst->getOpcode() == ISD::LOAD &&
19365 "It takes more than a zext to get to the loaded slice!!");
19366 ArgChains.push_back(SliceInst.getValue(1));
19367 }
19368
19369 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19370 ArgChains);
19371 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19372 AddToWorklist(Chain.getNode());
19373 return true;
19374}
19375
19376/// Check to see if V is (and load (ptr), imm), where the load is having
19377/// specific bytes cleared out. If so, return the byte size being masked out
19378/// and the shift amount.
19379static std::pair<unsigned, unsigned>
19381 std::pair<unsigned, unsigned> Result(0, 0);
19382
19383 // Check for the structure we're looking for.
19384 if (V->getOpcode() != ISD::AND ||
19385 !isa<ConstantSDNode>(V->getOperand(1)) ||
19386 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19387 return Result;
19388
19389 // Check the chain and pointer.
19390 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19391 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19392
19393 // This only handles simple types.
19394 if (V.getValueType() != MVT::i16 &&
19395 V.getValueType() != MVT::i32 &&
19396 V.getValueType() != MVT::i64)
19397 return Result;
19398
19399 // Check the constant mask. Invert it so that the bits being masked out are
19400 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19401 // follow the sign bit for uniformity.
19402 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19403 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19404 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19405 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19406 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19407 if (NotMaskLZ == 64) return Result; // All zero mask.
19408
19409 // See if we have a continuous run of bits. If so, we have 0*1+0*
19410 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19411 return Result;
19412
19413 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19414 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19415 NotMaskLZ -= 64-V.getValueSizeInBits();
19416
19417 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19418 switch (MaskedBytes) {
19419 case 1:
19420 case 2:
19421 case 4: break;
19422 default: return Result; // All one mask, or 5-byte mask.
19423 }
19424
19425 // Verify that the first bit starts at a multiple of mask so that the access
19426 // is aligned the same as the access width.
19427 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19428
19429 // For narrowing to be valid, it must be the case that the load the
19430 // immediately preceding memory operation before the store.
19431 if (LD == Chain.getNode())
19432 ; // ok.
19433 else if (Chain->getOpcode() == ISD::TokenFactor &&
19434 SDValue(LD, 1).hasOneUse()) {
19435 // LD has only 1 chain use so they are no indirect dependencies.
19436 if (!LD->isOperandOf(Chain.getNode()))
19437 return Result;
19438 } else
19439 return Result; // Fail.
19440
19441 Result.first = MaskedBytes;
19442 Result.second = NotMaskTZ/8;
19443 return Result;
19444}
19445
19446/// Check to see if IVal is something that provides a value as specified by
19447/// MaskInfo. If so, replace the specified store with a narrower store of
19448/// truncated IVal.
19449static SDValue
19450ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19451 SDValue IVal, StoreSDNode *St,
19452 DAGCombiner *DC) {
19453 unsigned NumBytes = MaskInfo.first;
19454 unsigned ByteShift = MaskInfo.second;
19455 SelectionDAG &DAG = DC->getDAG();
19456
19457 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19458 // that uses this. If not, this is not a replacement.
19459 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19460 ByteShift*8, (ByteShift+NumBytes)*8);
19461 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19462
19463 // Check that it is legal on the target to do this. It is legal if the new
19464 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19465 // legalization. If the source type is legal, but the store type isn't, see
19466 // if we can use a truncating store.
19467 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19468 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19469 bool UseTruncStore;
19470 if (DC->isTypeLegal(VT))
19471 UseTruncStore = false;
19472 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19473 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19474 UseTruncStore = true;
19475 else
19476 return SDValue();
19477
19478 // Can't do this for indexed stores.
19479 if (St->isIndexed())
19480 return SDValue();
19481
19482 // Check that the target doesn't think this is a bad idea.
19483 if (St->getMemOperand() &&
19484 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19485 *St->getMemOperand()))
19486 return SDValue();
19487
19488 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19489 // shifted by ByteShift and truncated down to NumBytes.
19490 if (ByteShift) {
19491 SDLoc DL(IVal);
19492 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19493 DAG.getConstant(ByteShift*8, DL,
19494 DC->getShiftAmountTy(IVal.getValueType())));
19495 }
19496
19497 // Figure out the offset for the store and the alignment of the access.
19498 unsigned StOffset;
19499 if (DAG.getDataLayout().isLittleEndian())
19500 StOffset = ByteShift;
19501 else
19502 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19503
19504 SDValue Ptr = St->getBasePtr();
19505 if (StOffset) {
19506 SDLoc DL(IVal);
19508 }
19509
19510 ++OpsNarrowed;
19511 if (UseTruncStore)
19512 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19513 St->getPointerInfo().getWithOffset(StOffset),
19514 VT, St->getOriginalAlign());
19515
19516 // Truncate down to the new size.
19517 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19518
19519 return DAG
19520 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19521 St->getPointerInfo().getWithOffset(StOffset),
19522 St->getOriginalAlign());
19523}
19524
19525/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19526/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19527/// narrowing the load and store if it would end up being a win for performance
19528/// or code size.
19529SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19530 StoreSDNode *ST = cast<StoreSDNode>(N);
19531 if (!ST->isSimple())
19532 return SDValue();
19533
19534 SDValue Chain = ST->getChain();
19535 SDValue Value = ST->getValue();
19536 SDValue Ptr = ST->getBasePtr();
19537 EVT VT = Value.getValueType();
19538
19539 if (ST->isTruncatingStore() || VT.isVector())
19540 return SDValue();
19541
19542 unsigned Opc = Value.getOpcode();
19543
19544 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19545 !Value.hasOneUse())
19546 return SDValue();
19547
19548 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19549 // is a byte mask indicating a consecutive number of bytes, check to see if
19550 // Y is known to provide just those bytes. If so, we try to replace the
19551 // load + replace + store sequence with a single (narrower) store, which makes
19552 // the load dead.
19554 std::pair<unsigned, unsigned> MaskedLoad;
19555 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19556 if (MaskedLoad.first)
19557 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19558 Value.getOperand(1), ST,this))
19559 return NewST;
19560
19561 // Or is commutative, so try swapping X and Y.
19562 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19563 if (MaskedLoad.first)
19564 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19565 Value.getOperand(0), ST,this))
19566 return NewST;
19567 }
19568
19570 return SDValue();
19571
19572 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19573 return SDValue();
19574
19575 SDValue N0 = Value.getOperand(0);
19576 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19577 Chain == SDValue(N0.getNode(), 1)) {
19578 LoadSDNode *LD = cast<LoadSDNode>(N0);
19579 if (LD->getBasePtr() != Ptr ||
19580 LD->getPointerInfo().getAddrSpace() !=
19581 ST->getPointerInfo().getAddrSpace())
19582 return SDValue();
19583
19584 // Find the type to narrow it the load / op / store to.
19585 SDValue N1 = Value.getOperand(1);
19586 unsigned BitWidth = N1.getValueSizeInBits();
19587 APInt Imm = N1->getAsAPIntVal();
19588 if (Opc == ISD::AND)
19590 if (Imm == 0 || Imm.isAllOnes())
19591 return SDValue();
19592 unsigned ShAmt = Imm.countr_zero();
19593 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19594 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19595 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19596 // The narrowing should be profitable, the load/store operation should be
19597 // legal (or custom) and the store size should be equal to the NewVT width.
19598 while (NewBW < BitWidth &&
19599 (NewVT.getStoreSizeInBits() != NewBW ||
19600 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19601 !TLI.isNarrowingProfitable(VT, NewVT))) {
19602 NewBW = NextPowerOf2(NewBW);
19603 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19604 }
19605 if (NewBW >= BitWidth)
19606 return SDValue();
19607
19608 // If the lsb changed does not start at the type bitwidth boundary,
19609 // start at the previous one.
19610 if (ShAmt % NewBW)
19611 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19613 std::min(BitWidth, ShAmt + NewBW));
19614 if ((Imm & Mask) == Imm) {
19615 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19616 if (Opc == ISD::AND)
19617 NewImm ^= APInt::getAllOnes(NewBW);
19618 uint64_t PtrOff = ShAmt / 8;
19619 // For big endian targets, we need to adjust the offset to the pointer to
19620 // load the correct bytes.
19621 if (DAG.getDataLayout().isBigEndian())
19622 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19623
19624 unsigned IsFast = 0;
19625 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19626 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19627 LD->getAddressSpace(), NewAlign,
19628 LD->getMemOperand()->getFlags(), &IsFast) ||
19629 !IsFast)
19630 return SDValue();
19631
19632 SDValue NewPtr =
19634 SDValue NewLD =
19635 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19636 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19637 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19638 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19639 DAG.getConstant(NewImm, SDLoc(Value),
19640 NewVT));
19641 SDValue NewST =
19642 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19643 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19644
19645 AddToWorklist(NewPtr.getNode());
19646 AddToWorklist(NewLD.getNode());
19647 AddToWorklist(NewVal.getNode());
19648 WorklistRemover DeadNodes(*this);
19649 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19650 ++OpsNarrowed;
19651 return NewST;
19652 }
19653 }
19654
19655 return SDValue();
19656}
19657
19658/// For a given floating point load / store pair, if the load value isn't used
19659/// by any other operations, then consider transforming the pair to integer
19660/// load / store operations if the target deems the transformation profitable.
19661SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19662 StoreSDNode *ST = cast<StoreSDNode>(N);
19663 SDValue Value = ST->getValue();
19664 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19665 Value.hasOneUse()) {
19666 LoadSDNode *LD = cast<LoadSDNode>(Value);
19667 EVT VT = LD->getMemoryVT();
19668 if (!VT.isFloatingPoint() ||
19669 VT != ST->getMemoryVT() ||
19670 LD->isNonTemporal() ||
19671 ST->isNonTemporal() ||
19672 LD->getPointerInfo().getAddrSpace() != 0 ||
19673 ST->getPointerInfo().getAddrSpace() != 0)
19674 return SDValue();
19675
19676 TypeSize VTSize = VT.getSizeInBits();
19677
19678 // We don't know the size of scalable types at compile time so we cannot
19679 // create an integer of the equivalent size.
19680 if (VTSize.isScalable())
19681 return SDValue();
19682
19683 unsigned FastLD = 0, FastST = 0;
19684 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19685 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19686 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19689 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19690 *LD->getMemOperand(), &FastLD) ||
19691 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19692 *ST->getMemOperand(), &FastST) ||
19693 !FastLD || !FastST)
19694 return SDValue();
19695
19696 SDValue NewLD =
19697 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19698 LD->getPointerInfo(), LD->getAlign());
19699
19700 SDValue NewST =
19701 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19702 ST->getPointerInfo(), ST->getAlign());
19703
19704 AddToWorklist(NewLD.getNode());
19705 AddToWorklist(NewST.getNode());
19706 WorklistRemover DeadNodes(*this);
19707 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19708 ++LdStFP2Int;
19709 return NewST;
19710 }
19711
19712 return SDValue();
19713}
19714
19715// This is a helper function for visitMUL to check the profitability
19716// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19717// MulNode is the original multiply, AddNode is (add x, c1),
19718// and ConstNode is c2.
19719//
19720// If the (add x, c1) has multiple uses, we could increase
19721// the number of adds if we make this transformation.
19722// It would only be worth doing this if we can remove a
19723// multiply in the process. Check for that here.
19724// To illustrate:
19725// (A + c1) * c3
19726// (A + c2) * c3
19727// We're checking for cases where we have common "c3 * A" expressions.
19728bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19729 SDValue ConstNode) {
19730 APInt Val;
19731
19732 // If the add only has one use, and the target thinks the folding is
19733 // profitable or does not lead to worse code, this would be OK to do.
19734 if (AddNode->hasOneUse() &&
19735 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19736 return true;
19737
19738 // Walk all the users of the constant with which we're multiplying.
19739 for (SDNode *Use : ConstNode->uses()) {
19740 if (Use == MulNode) // This use is the one we're on right now. Skip it.
19741 continue;
19742
19743 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19744 SDNode *OtherOp;
19745 SDNode *MulVar = AddNode.getOperand(0).getNode();
19746
19747 // OtherOp is what we're multiplying against the constant.
19748 if (Use->getOperand(0) == ConstNode)
19749 OtherOp = Use->getOperand(1).getNode();
19750 else
19751 OtherOp = Use->getOperand(0).getNode();
19752
19753 // Check to see if multiply is with the same operand of our "add".
19754 //
19755 // ConstNode = CONST
19756 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
19757 // ...
19758 // AddNode = (A + c1) <-- MulVar is A.
19759 // = AddNode * ConstNode <-- current visiting instruction.
19760 //
19761 // If we make this transformation, we will have a common
19762 // multiply (ConstNode * A) that we can save.
19763 if (OtherOp == MulVar)
19764 return true;
19765
19766 // Now check to see if a future expansion will give us a common
19767 // multiply.
19768 //
19769 // ConstNode = CONST
19770 // AddNode = (A + c1)
19771 // ... = AddNode * ConstNode <-- current visiting instruction.
19772 // ...
19773 // OtherOp = (A + c2)
19774 // Use = OtherOp * ConstNode <-- visiting Use.
19775 //
19776 // If we make this transformation, we will have a common
19777 // multiply (CONST * A) after we also do the same transformation
19778 // to the "t2" instruction.
19779 if (OtherOp->getOpcode() == ISD::ADD &&
19781 OtherOp->getOperand(0).getNode() == MulVar)
19782 return true;
19783 }
19784 }
19785
19786 // Didn't find a case where this would be profitable.
19787 return false;
19788}
19789
19790SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
19791 unsigned NumStores) {
19794 SDLoc StoreDL(StoreNodes[0].MemNode);
19795
19796 for (unsigned i = 0; i < NumStores; ++i) {
19797 Visited.insert(StoreNodes[i].MemNode);
19798 }
19799
19800 // don't include nodes that are children or repeated nodes.
19801 for (unsigned i = 0; i < NumStores; ++i) {
19802 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
19803 Chains.push_back(StoreNodes[i].MemNode->getChain());
19804 }
19805
19806 assert(!Chains.empty() && "Chain should have generated a chain");
19807 return DAG.getTokenFactor(StoreDL, Chains);
19808}
19809
19810bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
19811 const Value *UnderlyingObj = nullptr;
19812 for (const auto &MemOp : StoreNodes) {
19813 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
19814 // Pseudo value like stack frame has its own frame index and size, should
19815 // not use the first store's frame index for other frames.
19816 if (MMO->getPseudoValue())
19817 return false;
19818
19819 if (!MMO->getValue())
19820 return false;
19821
19822 const Value *Obj = getUnderlyingObject(MMO->getValue());
19823
19824 if (UnderlyingObj && UnderlyingObj != Obj)
19825 return false;
19826
19827 if (!UnderlyingObj)
19828 UnderlyingObj = Obj;
19829 }
19830
19831 return true;
19832}
19833
19834bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
19835 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
19836 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
19837 // Make sure we have something to merge.
19838 if (NumStores < 2)
19839 return false;
19840
19841 assert((!UseTrunc || !UseVector) &&
19842 "This optimization cannot emit a vector truncating store");
19843
19844 // The latest Node in the DAG.
19845 SDLoc DL(StoreNodes[0].MemNode);
19846
19847 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
19848 unsigned SizeInBits = NumStores * ElementSizeBits;
19849 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19850
19851 std::optional<MachineMemOperand::Flags> Flags;
19852 AAMDNodes AAInfo;
19853 for (unsigned I = 0; I != NumStores; ++I) {
19854 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19855 if (!Flags) {
19856 Flags = St->getMemOperand()->getFlags();
19857 AAInfo = St->getAAInfo();
19858 continue;
19859 }
19860 // Skip merging if there's an inconsistent flag.
19861 if (Flags != St->getMemOperand()->getFlags())
19862 return false;
19863 // Concatenate AA metadata.
19864 AAInfo = AAInfo.concat(St->getAAInfo());
19865 }
19866
19867 EVT StoreTy;
19868 if (UseVector) {
19869 unsigned Elts = NumStores * NumMemElts;
19870 // Get the type for the merged vector store.
19871 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19872 } else
19873 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
19874
19875 SDValue StoredVal;
19876 if (UseVector) {
19877 if (IsConstantSrc) {
19878 SmallVector<SDValue, 8> BuildVector;
19879 for (unsigned I = 0; I != NumStores; ++I) {
19880 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19881 SDValue Val = St->getValue();
19882 // If constant is of the wrong type, convert it now. This comes up
19883 // when one of our stores was truncating.
19884 if (MemVT != Val.getValueType()) {
19885 Val = peekThroughBitcasts(Val);
19886 // Deal with constants of wrong size.
19887 if (ElementSizeBits != Val.getValueSizeInBits()) {
19888 auto *C = dyn_cast<ConstantSDNode>(Val);
19889 if (!C)
19890 // Not clear how to truncate FP values.
19891 // TODO: Handle truncation of build_vector constants
19892 return false;
19893
19894 EVT IntMemVT =
19896 Val = DAG.getConstant(C->getAPIntValue()
19897 .zextOrTrunc(Val.getValueSizeInBits())
19898 .zextOrTrunc(ElementSizeBits),
19899 SDLoc(C), IntMemVT);
19900 }
19901 // Make sure correctly size type is the correct type.
19902 Val = DAG.getBitcast(MemVT, Val);
19903 }
19904 BuildVector.push_back(Val);
19905 }
19906 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19908 DL, StoreTy, BuildVector);
19909 } else {
19911 for (unsigned i = 0; i < NumStores; ++i) {
19912 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
19914 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
19915 // type MemVT. If the underlying value is not the correct
19916 // type, but it is an extraction of an appropriate vector we
19917 // can recast Val to be of the correct type. This may require
19918 // converting between EXTRACT_VECTOR_ELT and
19919 // EXTRACT_SUBVECTOR.
19920 if ((MemVT != Val.getValueType()) &&
19923 EVT MemVTScalarTy = MemVT.getScalarType();
19924 // We may need to add a bitcast here to get types to line up.
19925 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
19926 Val = DAG.getBitcast(MemVT, Val);
19927 } else if (MemVT.isVector() &&
19929 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
19930 } else {
19931 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
19933 SDValue Vec = Val.getOperand(0);
19934 SDValue Idx = Val.getOperand(1);
19935 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
19936 }
19937 }
19938 Ops.push_back(Val);
19939 }
19940
19941 // Build the extracted vector elements back into a vector.
19942 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19944 DL, StoreTy, Ops);
19945 }
19946 } else {
19947 // We should always use a vector store when merging extracted vector
19948 // elements, so this path implies a store of constants.
19949 assert(IsConstantSrc && "Merged vector elements should use vector store");
19950
19951 APInt StoreInt(SizeInBits, 0);
19952
19953 // Construct a single integer constant which is made of the smaller
19954 // constant inputs.
19955 bool IsLE = DAG.getDataLayout().isLittleEndian();
19956 for (unsigned i = 0; i < NumStores; ++i) {
19957 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
19958 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
19959
19960 SDValue Val = St->getValue();
19961 Val = peekThroughBitcasts(Val);
19962 StoreInt <<= ElementSizeBits;
19963 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
19964 StoreInt |= C->getAPIntValue()
19965 .zextOrTrunc(ElementSizeBits)
19966 .zextOrTrunc(SizeInBits);
19967 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
19968 StoreInt |= C->getValueAPF()
19969 .bitcastToAPInt()
19970 .zextOrTrunc(ElementSizeBits)
19971 .zextOrTrunc(SizeInBits);
19972 // If fp truncation is necessary give up for now.
19973 if (MemVT.getSizeInBits() != ElementSizeBits)
19974 return false;
19975 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
19977 // Not yet handled
19978 return false;
19979 } else {
19980 llvm_unreachable("Invalid constant element type");
19981 }
19982 }
19983
19984 // Create the new Load and Store operations.
19985 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
19986 }
19987
19988 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
19989 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
19990 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
19991
19992 // make sure we use trunc store if it's necessary to be legal.
19993 // When generate the new widen store, if the first store's pointer info can
19994 // not be reused, discard the pointer info except the address space because
19995 // now the widen store can not be represented by the original pointer info
19996 // which is for the narrow memory object.
19997 SDValue NewStore;
19998 if (!UseTrunc) {
19999 NewStore = DAG.getStore(
20000 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20001 CanReusePtrInfo
20002 ? FirstInChain->getPointerInfo()
20003 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20004 FirstInChain->getAlign(), *Flags, AAInfo);
20005 } else { // Must be realized as a trunc store
20006 EVT LegalizedStoredValTy =
20007 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20008 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20009 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20010 SDValue ExtendedStoreVal =
20011 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20012 LegalizedStoredValTy);
20013 NewStore = DAG.getTruncStore(
20014 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20015 CanReusePtrInfo
20016 ? FirstInChain->getPointerInfo()
20017 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20018 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20019 AAInfo);
20020 }
20021
20022 // Replace all merged stores with the new store.
20023 for (unsigned i = 0; i < NumStores; ++i)
20024 CombineTo(StoreNodes[i].MemNode, NewStore);
20025
20026 AddToWorklist(NewChain.getNode());
20027 return true;
20028}
20029
20030void DAGCombiner::getStoreMergeCandidates(
20031 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20032 SDNode *&RootNode) {
20033 // This holds the base pointer, index, and the offset in bytes from the base
20034 // pointer. We must have a base and an offset. Do not handle stores to undef
20035 // base pointers.
20037 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20038 return;
20039
20041 StoreSource StoreSrc = getStoreSource(Val);
20042 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20043
20044 // Match on loadbaseptr if relevant.
20045 EVT MemVT = St->getMemoryVT();
20046 BaseIndexOffset LBasePtr;
20047 EVT LoadVT;
20048 if (StoreSrc == StoreSource::Load) {
20049 auto *Ld = cast<LoadSDNode>(Val);
20050 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20051 LoadVT = Ld->getMemoryVT();
20052 // Load and store should be the same type.
20053 if (MemVT != LoadVT)
20054 return;
20055 // Loads must only have one use.
20056 if (!Ld->hasNUsesOfValue(1, 0))
20057 return;
20058 // The memory operands must not be volatile/indexed/atomic.
20059 // TODO: May be able to relax for unordered atomics (see D66309)
20060 if (!Ld->isSimple() || Ld->isIndexed())
20061 return;
20062 }
20063 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20064 int64_t &Offset) -> bool {
20065 // The memory operands must not be volatile/indexed/atomic.
20066 // TODO: May be able to relax for unordered atomics (see D66309)
20067 if (!Other->isSimple() || Other->isIndexed())
20068 return false;
20069 // Don't mix temporal stores with non-temporal stores.
20070 if (St->isNonTemporal() != Other->isNonTemporal())
20071 return false;
20073 return false;
20074 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20075 // Allow merging constants of different types as integers.
20076 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20077 : Other->getMemoryVT() != MemVT;
20078 switch (StoreSrc) {
20079 case StoreSource::Load: {
20080 if (NoTypeMatch)
20081 return false;
20082 // The Load's Base Ptr must also match.
20083 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20084 if (!OtherLd)
20085 return false;
20086 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20087 if (LoadVT != OtherLd->getMemoryVT())
20088 return false;
20089 // Loads must only have one use.
20090 if (!OtherLd->hasNUsesOfValue(1, 0))
20091 return false;
20092 // The memory operands must not be volatile/indexed/atomic.
20093 // TODO: May be able to relax for unordered atomics (see D66309)
20094 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20095 return false;
20096 // Don't mix temporal loads with non-temporal loads.
20097 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20098 return false;
20099 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20100 *OtherLd))
20101 return false;
20102 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20103 return false;
20104 break;
20105 }
20106 case StoreSource::Constant:
20107 if (NoTypeMatch)
20108 return false;
20109 if (getStoreSource(OtherBC) != StoreSource::Constant)
20110 return false;
20111 break;
20112 case StoreSource::Extract:
20113 // Do not merge truncated stores here.
20114 if (Other->isTruncatingStore())
20115 return false;
20116 if (!MemVT.bitsEq(OtherBC.getValueType()))
20117 return false;
20118 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20119 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20120 return false;
20121 break;
20122 default:
20123 llvm_unreachable("Unhandled store source for merging");
20124 }
20126 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20127 };
20128
20129 // Check if the pair of StoreNode and the RootNode already bail out many
20130 // times which is over the limit in dependence check.
20131 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20132 SDNode *RootNode) -> bool {
20133 auto RootCount = StoreRootCountMap.find(StoreNode);
20134 return RootCount != StoreRootCountMap.end() &&
20135 RootCount->second.first == RootNode &&
20136 RootCount->second.second > StoreMergeDependenceLimit;
20137 };
20138
20139 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20140 // This must be a chain use.
20141 if (UseIter.getOperandNo() != 0)
20142 return;
20143 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20145 int64_t PtrDiff;
20146 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20147 !OverLimitInDependenceCheck(OtherStore, RootNode))
20148 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20149 }
20150 };
20151
20152 // We looking for a root node which is an ancestor to all mergable
20153 // stores. We search up through a load, to our root and then down
20154 // through all children. For instance we will find Store{1,2,3} if
20155 // St is Store1, Store2. or Store3 where the root is not a load
20156 // which always true for nonvolatile ops. TODO: Expand
20157 // the search to find all valid candidates through multiple layers of loads.
20158 //
20159 // Root
20160 // |-------|-------|
20161 // Load Load Store3
20162 // | |
20163 // Store1 Store2
20164 //
20165 // FIXME: We should be able to climb and
20166 // descend TokenFactors to find candidates as well.
20167
20168 RootNode = St->getChain().getNode();
20169
20170 unsigned NumNodesExplored = 0;
20171 const unsigned MaxSearchNodes = 1024;
20172 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20173 RootNode = Ldn->getChain().getNode();
20174 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20175 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20176 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20177 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20178 TryToAddCandidate(I2);
20179 }
20180 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20181 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20182 TryToAddCandidate(I);
20183 }
20184 }
20185 } else {
20186 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20187 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20188 TryToAddCandidate(I);
20189 }
20190}
20191
20192// We need to check that merging these stores does not cause a loop in the
20193// DAG. Any store candidate may depend on another candidate indirectly through
20194// its operands. Check in parallel by searching up from operands of candidates.
20195bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20196 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20197 SDNode *RootNode) {
20198 // FIXME: We should be able to truncate a full search of
20199 // predecessors by doing a BFS and keeping tabs the originating
20200 // stores from which worklist nodes come from in a similar way to
20201 // TokenFactor simplfication.
20202
20205
20206 // RootNode is a predecessor to all candidates so we need not search
20207 // past it. Add RootNode (peeking through TokenFactors). Do not count
20208 // these towards size check.
20209
20210 Worklist.push_back(RootNode);
20211 while (!Worklist.empty()) {
20212 auto N = Worklist.pop_back_val();
20213 if (!Visited.insert(N).second)
20214 continue; // Already present in Visited.
20215 if (N->getOpcode() == ISD::TokenFactor) {
20216 for (SDValue Op : N->ops())
20217 Worklist.push_back(Op.getNode());
20218 }
20219 }
20220
20221 // Don't count pruning nodes towards max.
20222 unsigned int Max = 1024 + Visited.size();
20223 // Search Ops of store candidates.
20224 for (unsigned i = 0; i < NumStores; ++i) {
20225 SDNode *N = StoreNodes[i].MemNode;
20226 // Of the 4 Store Operands:
20227 // * Chain (Op 0) -> We have already considered these
20228 // in candidate selection, but only by following the
20229 // chain dependencies. We could still have a chain
20230 // dependency to a load, that has a non-chain dep to
20231 // another load, that depends on a store, etc. So it is
20232 // possible to have dependencies that consist of a mix
20233 // of chain and non-chain deps, and we need to include
20234 // chain operands in the analysis here..
20235 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20236 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20237 // but aren't necessarily fromt the same base node, so
20238 // cycles possible (e.g. via indexed store).
20239 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20240 // non-indexed stores). Not constant on all targets (e.g. ARM)
20241 // and so can participate in a cycle.
20242 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20243 Worklist.push_back(N->getOperand(j).getNode());
20244 }
20245 // Search through DAG. We can stop early if we find a store node.
20246 for (unsigned i = 0; i < NumStores; ++i)
20247 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20248 Max)) {
20249 // If the searching bail out, record the StoreNode and RootNode in the
20250 // StoreRootCountMap. If we have seen the pair many times over a limit,
20251 // we won't add the StoreNode into StoreNodes set again.
20252 if (Visited.size() >= Max) {
20253 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20254 if (RootCount.first == RootNode)
20255 RootCount.second++;
20256 else
20257 RootCount = {RootNode, 1};
20258 }
20259 return false;
20260 }
20261 return true;
20262}
20263
20264unsigned
20265DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20266 int64_t ElementSizeBytes) const {
20267 while (true) {
20268 // Find a store past the width of the first store.
20269 size_t StartIdx = 0;
20270 while ((StartIdx + 1 < StoreNodes.size()) &&
20271 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20272 StoreNodes[StartIdx + 1].OffsetFromBase)
20273 ++StartIdx;
20274
20275 // Bail if we don't have enough candidates to merge.
20276 if (StartIdx + 1 >= StoreNodes.size())
20277 return 0;
20278
20279 // Trim stores that overlapped with the first store.
20280 if (StartIdx)
20281 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20282
20283 // Scan the memory operations on the chain and find the first
20284 // non-consecutive store memory address.
20285 unsigned NumConsecutiveStores = 1;
20286 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20287 // Check that the addresses are consecutive starting from the second
20288 // element in the list of stores.
20289 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20290 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20291 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20292 break;
20293 NumConsecutiveStores = i + 1;
20294 }
20295 if (NumConsecutiveStores > 1)
20296 return NumConsecutiveStores;
20297
20298 // There are no consecutive stores at the start of the list.
20299 // Remove the first store and try again.
20300 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20301 }
20302}
20303
20304bool DAGCombiner::tryStoreMergeOfConstants(
20305 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20306 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20307 LLVMContext &Context = *DAG.getContext();
20308 const DataLayout &DL = DAG.getDataLayout();
20309 int64_t ElementSizeBytes = MemVT.getStoreSize();
20310 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20311 bool MadeChange = false;
20312
20313 // Store the constants into memory as one consecutive store.
20314 while (NumConsecutiveStores >= 2) {
20315 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20316 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20317 Align FirstStoreAlign = FirstInChain->getAlign();
20318 unsigned LastLegalType = 1;
20319 unsigned LastLegalVectorType = 1;
20320 bool LastIntegerTrunc = false;
20321 bool NonZero = false;
20322 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20323 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20324 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20325 SDValue StoredVal = ST->getValue();
20326 bool IsElementZero = false;
20327 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20328 IsElementZero = C->isZero();
20329 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20330 IsElementZero = C->getConstantFPValue()->isNullValue();
20331 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20332 IsElementZero = true;
20333 if (IsElementZero) {
20334 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20335 FirstZeroAfterNonZero = i;
20336 }
20337 NonZero |= !IsElementZero;
20338
20339 // Find a legal type for the constant store.
20340 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20341 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20342 unsigned IsFast = 0;
20343
20344 // Break early when size is too large to be legal.
20345 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20346 break;
20347
20348 if (TLI.isTypeLegal(StoreTy) &&
20349 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20350 DAG.getMachineFunction()) &&
20351 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20352 *FirstInChain->getMemOperand(), &IsFast) &&
20353 IsFast) {
20354 LastIntegerTrunc = false;
20355 LastLegalType = i + 1;
20356 // Or check whether a truncstore is legal.
20357 } else if (TLI.getTypeAction(Context, StoreTy) ==
20359 EVT LegalizedStoredValTy =
20360 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20361 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20362 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20363 DAG.getMachineFunction()) &&
20364 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20365 *FirstInChain->getMemOperand(), &IsFast) &&
20366 IsFast) {
20367 LastIntegerTrunc = true;
20368 LastLegalType = i + 1;
20369 }
20370 }
20371
20372 // We only use vectors if the target allows it and the function is not
20373 // marked with the noimplicitfloat attribute.
20374 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20375 AllowVectors) {
20376 // Find a legal type for the vector store.
20377 unsigned Elts = (i + 1) * NumMemElts;
20378 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20379 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20380 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20381 TLI.allowsMemoryAccess(Context, DL, Ty,
20382 *FirstInChain->getMemOperand(), &IsFast) &&
20383 IsFast)
20384 LastLegalVectorType = i + 1;
20385 }
20386 }
20387
20388 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20389 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20390 bool UseTrunc = LastIntegerTrunc && !UseVector;
20391
20392 // Check if we found a legal integer type that creates a meaningful
20393 // merge.
20394 if (NumElem < 2) {
20395 // We know that candidate stores are in order and of correct
20396 // shape. While there is no mergeable sequence from the
20397 // beginning one may start later in the sequence. The only
20398 // reason a merge of size N could have failed where another of
20399 // the same size would not have, is if the alignment has
20400 // improved or we've dropped a non-zero value. Drop as many
20401 // candidates as we can here.
20402 unsigned NumSkip = 1;
20403 while ((NumSkip < NumConsecutiveStores) &&
20404 (NumSkip < FirstZeroAfterNonZero) &&
20405 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20406 NumSkip++;
20407
20408 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20409 NumConsecutiveStores -= NumSkip;
20410 continue;
20411 }
20412
20413 // Check that we can merge these candidates without causing a cycle.
20414 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20415 RootNode)) {
20416 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20417 NumConsecutiveStores -= NumElem;
20418 continue;
20419 }
20420
20421 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20422 /*IsConstantSrc*/ true,
20423 UseVector, UseTrunc);
20424
20425 // Remove merged stores for next iteration.
20426 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20427 NumConsecutiveStores -= NumElem;
20428 }
20429 return MadeChange;
20430}
20431
20432bool DAGCombiner::tryStoreMergeOfExtracts(
20433 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20434 EVT MemVT, SDNode *RootNode) {
20435 LLVMContext &Context = *DAG.getContext();
20436 const DataLayout &DL = DAG.getDataLayout();
20437 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20438 bool MadeChange = false;
20439
20440 // Loop on Consecutive Stores on success.
20441 while (NumConsecutiveStores >= 2) {
20442 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20443 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20444 Align FirstStoreAlign = FirstInChain->getAlign();
20445 unsigned NumStoresToMerge = 1;
20446 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20447 // Find a legal type for the vector store.
20448 unsigned Elts = (i + 1) * NumMemElts;
20449 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20450 unsigned IsFast = 0;
20451
20452 // Break early when size is too large to be legal.
20453 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20454 break;
20455
20456 if (TLI.isTypeLegal(Ty) &&
20457 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20458 TLI.allowsMemoryAccess(Context, DL, Ty,
20459 *FirstInChain->getMemOperand(), &IsFast) &&
20460 IsFast)
20461 NumStoresToMerge = i + 1;
20462 }
20463
20464 // Check if we found a legal integer type creating a meaningful
20465 // merge.
20466 if (NumStoresToMerge < 2) {
20467 // We know that candidate stores are in order and of correct
20468 // shape. While there is no mergeable sequence from the
20469 // beginning one may start later in the sequence. The only
20470 // reason a merge of size N could have failed where another of
20471 // the same size would not have, is if the alignment has
20472 // improved. Drop as many candidates as we can here.
20473 unsigned NumSkip = 1;
20474 while ((NumSkip < NumConsecutiveStores) &&
20475 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20476 NumSkip++;
20477
20478 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20479 NumConsecutiveStores -= NumSkip;
20480 continue;
20481 }
20482
20483 // Check that we can merge these candidates without causing a cycle.
20484 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20485 RootNode)) {
20486 StoreNodes.erase(StoreNodes.begin(),
20487 StoreNodes.begin() + NumStoresToMerge);
20488 NumConsecutiveStores -= NumStoresToMerge;
20489 continue;
20490 }
20491
20492 MadeChange |= mergeStoresOfConstantsOrVecElts(
20493 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20494 /*UseVector*/ true, /*UseTrunc*/ false);
20495
20496 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20497 NumConsecutiveStores -= NumStoresToMerge;
20498 }
20499 return MadeChange;
20500}
20501
20502bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20503 unsigned NumConsecutiveStores, EVT MemVT,
20504 SDNode *RootNode, bool AllowVectors,
20505 bool IsNonTemporalStore,
20506 bool IsNonTemporalLoad) {
20507 LLVMContext &Context = *DAG.getContext();
20508 const DataLayout &DL = DAG.getDataLayout();
20509 int64_t ElementSizeBytes = MemVT.getStoreSize();
20510 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20511 bool MadeChange = false;
20512
20513 // Look for load nodes which are used by the stored values.
20514 SmallVector<MemOpLink, 8> LoadNodes;
20515
20516 // Find acceptable loads. Loads need to have the same chain (token factor),
20517 // must not be zext, volatile, indexed, and they must be consecutive.
20518 BaseIndexOffset LdBasePtr;
20519
20520 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20521 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20523 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20524
20525 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20526 // If this is not the first ptr that we check.
20527 int64_t LdOffset = 0;
20528 if (LdBasePtr.getBase().getNode()) {
20529 // The base ptr must be the same.
20530 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20531 break;
20532 } else {
20533 // Check that all other base pointers are the same as this one.
20534 LdBasePtr = LdPtr;
20535 }
20536
20537 // We found a potential memory operand to merge.
20538 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20539 }
20540
20541 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20542 Align RequiredAlignment;
20543 bool NeedRotate = false;
20544 if (LoadNodes.size() == 2) {
20545 // If we have load/store pair instructions and we only have two values,
20546 // don't bother merging.
20547 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20548 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20549 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20550 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20551 break;
20552 }
20553 // If the loads are reversed, see if we can rotate the halves into place.
20554 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20555 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20556 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20557 if (Offset0 - Offset1 == ElementSizeBytes &&
20558 (hasOperation(ISD::ROTL, PairVT) ||
20559 hasOperation(ISD::ROTR, PairVT))) {
20560 std::swap(LoadNodes[0], LoadNodes[1]);
20561 NeedRotate = true;
20562 }
20563 }
20564 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20565 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20566 Align FirstStoreAlign = FirstInChain->getAlign();
20567 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20568
20569 // Scan the memory operations on the chain and find the first
20570 // non-consecutive load memory address. These variables hold the index in
20571 // the store node array.
20572
20573 unsigned LastConsecutiveLoad = 1;
20574
20575 // This variable refers to the size and not index in the array.
20576 unsigned LastLegalVectorType = 1;
20577 unsigned LastLegalIntegerType = 1;
20578 bool isDereferenceable = true;
20579 bool DoIntegerTruncate = false;
20580 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20581 SDValue LoadChain = FirstLoad->getChain();
20582 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20583 // All loads must share the same chain.
20584 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20585 break;
20586
20587 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20588 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20589 break;
20590 LastConsecutiveLoad = i;
20591
20592 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20593 isDereferenceable = false;
20594
20595 // Find a legal type for the vector store.
20596 unsigned Elts = (i + 1) * NumMemElts;
20597 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20598
20599 // Break early when size is too large to be legal.
20600 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20601 break;
20602
20603 unsigned IsFastSt = 0;
20604 unsigned IsFastLd = 0;
20605 // Don't try vector types if we need a rotate. We may still fail the
20606 // legality checks for the integer type, but we can't handle the rotate
20607 // case with vectors.
20608 // FIXME: We could use a shuffle in place of the rotate.
20609 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20610 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20611 DAG.getMachineFunction()) &&
20612 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20613 *FirstInChain->getMemOperand(), &IsFastSt) &&
20614 IsFastSt &&
20615 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20616 *FirstLoad->getMemOperand(), &IsFastLd) &&
20617 IsFastLd) {
20618 LastLegalVectorType = i + 1;
20619 }
20620
20621 // Find a legal type for the integer store.
20622 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20623 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20624 if (TLI.isTypeLegal(StoreTy) &&
20625 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20626 DAG.getMachineFunction()) &&
20627 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20628 *FirstInChain->getMemOperand(), &IsFastSt) &&
20629 IsFastSt &&
20630 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20631 *FirstLoad->getMemOperand(), &IsFastLd) &&
20632 IsFastLd) {
20633 LastLegalIntegerType = i + 1;
20634 DoIntegerTruncate = false;
20635 // Or check whether a truncstore and extload is legal.
20636 } else if (TLI.getTypeAction(Context, StoreTy) ==
20638 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20639 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20640 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20641 DAG.getMachineFunction()) &&
20642 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20643 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20644 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20645 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20646 *FirstInChain->getMemOperand(), &IsFastSt) &&
20647 IsFastSt &&
20648 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20649 *FirstLoad->getMemOperand(), &IsFastLd) &&
20650 IsFastLd) {
20651 LastLegalIntegerType = i + 1;
20652 DoIntegerTruncate = true;
20653 }
20654 }
20655 }
20656
20657 // Only use vector types if the vector type is larger than the integer
20658 // type. If they are the same, use integers.
20659 bool UseVectorTy =
20660 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20661 unsigned LastLegalType =
20662 std::max(LastLegalVectorType, LastLegalIntegerType);
20663
20664 // We add +1 here because the LastXXX variables refer to location while
20665 // the NumElem refers to array/index size.
20666 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20667 NumElem = std::min(LastLegalType, NumElem);
20668 Align FirstLoadAlign = FirstLoad->getAlign();
20669
20670 if (NumElem < 2) {
20671 // We know that candidate stores are in order and of correct
20672 // shape. While there is no mergeable sequence from the
20673 // beginning one may start later in the sequence. The only
20674 // reason a merge of size N could have failed where another of
20675 // the same size would not have is if the alignment or either
20676 // the load or store has improved. Drop as many candidates as we
20677 // can here.
20678 unsigned NumSkip = 1;
20679 while ((NumSkip < LoadNodes.size()) &&
20680 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20681 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20682 NumSkip++;
20683 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20684 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20685 NumConsecutiveStores -= NumSkip;
20686 continue;
20687 }
20688
20689 // Check that we can merge these candidates without causing a cycle.
20690 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20691 RootNode)) {
20692 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20693 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20694 NumConsecutiveStores -= NumElem;
20695 continue;
20696 }
20697
20698 // Find if it is better to use vectors or integers to load and store
20699 // to memory.
20700 EVT JointMemOpVT;
20701 if (UseVectorTy) {
20702 // Find a legal type for the vector store.
20703 unsigned Elts = NumElem * NumMemElts;
20704 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20705 } else {
20706 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20707 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20708 }
20709
20710 SDLoc LoadDL(LoadNodes[0].MemNode);
20711 SDLoc StoreDL(StoreNodes[0].MemNode);
20712
20713 // The merged loads are required to have the same incoming chain, so
20714 // using the first's chain is acceptable.
20715
20716 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20717 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20718 AddToWorklist(NewStoreChain.getNode());
20719
20720 MachineMemOperand::Flags LdMMOFlags =
20721 isDereferenceable ? MachineMemOperand::MODereferenceable
20723 if (IsNonTemporalLoad)
20725
20726 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20727
20728 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20731
20732 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20733
20734 SDValue NewLoad, NewStore;
20735 if (UseVectorTy || !DoIntegerTruncate) {
20736 NewLoad = DAG.getLoad(
20737 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20738 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20739 SDValue StoreOp = NewLoad;
20740 if (NeedRotate) {
20741 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20742 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20743 "Unexpected type for rotate-able load pair");
20744 SDValue RotAmt =
20745 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20746 // Target can convert to the identical ROTR if it does not have ROTL.
20747 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20748 }
20749 NewStore = DAG.getStore(
20750 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20751 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20752 : MachinePointerInfo(FirstStoreAS),
20753 FirstStoreAlign, StMMOFlags);
20754 } else { // This must be the truncstore/extload case
20755 EVT ExtendedTy =
20756 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20757 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20758 FirstLoad->getChain(), FirstLoad->getBasePtr(),
20759 FirstLoad->getPointerInfo(), JointMemOpVT,
20760 FirstLoadAlign, LdMMOFlags);
20761 NewStore = DAG.getTruncStore(
20762 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20763 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20764 : MachinePointerInfo(FirstStoreAS),
20765 JointMemOpVT, FirstInChain->getAlign(),
20766 FirstInChain->getMemOperand()->getFlags());
20767 }
20768
20769 // Transfer chain users from old loads to the new load.
20770 for (unsigned i = 0; i < NumElem; ++i) {
20771 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
20773 SDValue(NewLoad.getNode(), 1));
20774 }
20775
20776 // Replace all stores with the new store. Recursively remove corresponding
20777 // values if they are no longer used.
20778 for (unsigned i = 0; i < NumElem; ++i) {
20779 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
20780 CombineTo(StoreNodes[i].MemNode, NewStore);
20781 if (Val->use_empty())
20782 recursivelyDeleteUnusedNodes(Val.getNode());
20783 }
20784
20785 MadeChange = true;
20786 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20787 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20788 NumConsecutiveStores -= NumElem;
20789 }
20790 return MadeChange;
20791}
20792
20793bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
20794 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
20795 return false;
20796
20797 // TODO: Extend this function to merge stores of scalable vectors.
20798 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
20799 // store since we know <vscale x 16 x i8> is exactly twice as large as
20800 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
20801 EVT MemVT = St->getMemoryVT();
20802 if (MemVT.isScalableVT())
20803 return false;
20804 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
20805 return false;
20806
20807 // This function cannot currently deal with non-byte-sized memory sizes.
20808 int64_t ElementSizeBytes = MemVT.getStoreSize();
20809 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
20810 return false;
20811
20812 // Do not bother looking at stored values that are not constants, loads, or
20813 // extracted vector elements.
20814 SDValue StoredVal = peekThroughBitcasts(St->getValue());
20815 const StoreSource StoreSrc = getStoreSource(StoredVal);
20816 if (StoreSrc == StoreSource::Unknown)
20817 return false;
20818
20819 SmallVector<MemOpLink, 8> StoreNodes;
20820 SDNode *RootNode;
20821 // Find potential store merge candidates by searching through chain sub-DAG
20822 getStoreMergeCandidates(St, StoreNodes, RootNode);
20823
20824 // Check if there is anything to merge.
20825 if (StoreNodes.size() < 2)
20826 return false;
20827
20828 // Sort the memory operands according to their distance from the
20829 // base pointer.
20830 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
20831 return LHS.OffsetFromBase < RHS.OffsetFromBase;
20832 });
20833
20834 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
20835 Attribute::NoImplicitFloat);
20836 bool IsNonTemporalStore = St->isNonTemporal();
20837 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
20838 cast<LoadSDNode>(StoredVal)->isNonTemporal();
20839
20840 // Store Merge attempts to merge the lowest stores. This generally
20841 // works out as if successful, as the remaining stores are checked
20842 // after the first collection of stores is merged. However, in the
20843 // case that a non-mergeable store is found first, e.g., {p[-2],
20844 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
20845 // mergeable cases. To prevent this, we prune such stores from the
20846 // front of StoreNodes here.
20847 bool MadeChange = false;
20848 while (StoreNodes.size() > 1) {
20849 unsigned NumConsecutiveStores =
20850 getConsecutiveStores(StoreNodes, ElementSizeBytes);
20851 // There are no more stores in the list to examine.
20852 if (NumConsecutiveStores == 0)
20853 return MadeChange;
20854
20855 // We have at least 2 consecutive stores. Try to merge them.
20856 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
20857 switch (StoreSrc) {
20858 case StoreSource::Constant:
20859 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
20860 MemVT, RootNode, AllowVectors);
20861 break;
20862
20863 case StoreSource::Extract:
20864 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
20865 MemVT, RootNode);
20866 break;
20867
20868 case StoreSource::Load:
20869 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
20870 MemVT, RootNode, AllowVectors,
20871 IsNonTemporalStore, IsNonTemporalLoad);
20872 break;
20873
20874 default:
20875 llvm_unreachable("Unhandled store source type");
20876 }
20877 }
20878 return MadeChange;
20879}
20880
20881SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
20882 SDLoc SL(ST);
20883 SDValue ReplStore;
20884
20885 // Replace the chain to avoid dependency.
20886 if (ST->isTruncatingStore()) {
20887 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
20888 ST->getBasePtr(), ST->getMemoryVT(),
20889 ST->getMemOperand());
20890 } else {
20891 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
20892 ST->getMemOperand());
20893 }
20894
20895 // Create token to keep both nodes around.
20896 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
20897 MVT::Other, ST->getChain(), ReplStore);
20898
20899 // Make sure the new and old chains are cleaned up.
20900 AddToWorklist(Token.getNode());
20901
20902 // Don't add users to work list.
20903 return CombineTo(ST, Token, false);
20904}
20905
20906SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
20907 SDValue Value = ST->getValue();
20908 if (Value.getOpcode() == ISD::TargetConstantFP)
20909 return SDValue();
20910
20911 if (!ISD::isNormalStore(ST))
20912 return SDValue();
20913
20914 SDLoc DL(ST);
20915
20916 SDValue Chain = ST->getChain();
20917 SDValue Ptr = ST->getBasePtr();
20918
20919 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
20920
20921 // NOTE: If the original store is volatile, this transform must not increase
20922 // the number of stores. For example, on x86-32 an f64 can be stored in one
20923 // processor operation but an i64 (which is not legal) requires two. So the
20924 // transform should not be done in this case.
20925
20926 SDValue Tmp;
20927 switch (CFP->getSimpleValueType(0).SimpleTy) {
20928 default:
20929 llvm_unreachable("Unknown FP type");
20930 case MVT::f16: // We don't do this for these yet.
20931 case MVT::bf16:
20932 case MVT::f80:
20933 case MVT::f128:
20934 case MVT::ppcf128:
20935 return SDValue();
20936 case MVT::f32:
20937 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
20938 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
20939 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
20940 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
20941 MVT::i32);
20942 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
20943 }
20944
20945 return SDValue();
20946 case MVT::f64:
20947 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
20948 ST->isSimple()) ||
20949 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
20950 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
20951 getZExtValue(), SDLoc(CFP), MVT::i64);
20952 return DAG.getStore(Chain, DL, Tmp,
20953 Ptr, ST->getMemOperand());
20954 }
20955
20956 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
20957 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
20958 // Many FP stores are not made apparent until after legalize, e.g. for
20959 // argument passing. Since this is so common, custom legalize the
20960 // 64-bit integer store into two 32-bit stores.
20962 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
20963 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
20964 if (DAG.getDataLayout().isBigEndian())
20965 std::swap(Lo, Hi);
20966
20967 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
20968 AAMDNodes AAInfo = ST->getAAInfo();
20969
20970 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
20971 ST->getOriginalAlign(), MMOFlags, AAInfo);
20973 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
20974 ST->getPointerInfo().getWithOffset(4),
20975 ST->getOriginalAlign(), MMOFlags, AAInfo);
20976 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
20977 St0, St1);
20978 }
20979
20980 return SDValue();
20981 }
20982}
20983
20984// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
20985//
20986// If a store of a load with an element inserted into it has no other
20987// uses in between the chain, then we can consider the vector store
20988// dead and replace it with just the single scalar element store.
20989SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
20990 SDLoc DL(ST);
20991 SDValue Value = ST->getValue();
20992 SDValue Ptr = ST->getBasePtr();
20993 SDValue Chain = ST->getChain();
20994 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
20995 return SDValue();
20996
20997 SDValue Elt = Value.getOperand(1);
20998 SDValue Idx = Value.getOperand(2);
20999
21000 // If the element isn't byte sized or is implicitly truncated then we can't
21001 // compute an offset.
21002 EVT EltVT = Elt.getValueType();
21003 if (!EltVT.isByteSized() ||
21004 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21005 return SDValue();
21006
21007 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21008 if (!Ld || Ld->getBasePtr() != Ptr ||
21009 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21010 !ISD::isNormalStore(ST) ||
21011 Ld->getAddressSpace() != ST->getAddressSpace() ||
21013 return SDValue();
21014
21015 unsigned IsFast;
21016 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21017 Elt.getValueType(), ST->getAddressSpace(),
21018 ST->getAlign(), ST->getMemOperand()->getFlags(),
21019 &IsFast) ||
21020 !IsFast)
21021 return SDValue();
21022
21023 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21024
21025 // If the offset is a known constant then try to recover the pointer
21026 // info
21027 SDValue NewPtr;
21028 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21029 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21030 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21031 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21032 } else {
21033 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21034 }
21035
21036 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21037 ST->getMemOperand()->getFlags());
21038}
21039
21040SDValue DAGCombiner::visitSTORE(SDNode *N) {
21041 StoreSDNode *ST = cast<StoreSDNode>(N);
21042 SDValue Chain = ST->getChain();
21043 SDValue Value = ST->getValue();
21044 SDValue Ptr = ST->getBasePtr();
21045
21046 // If this is a store of a bit convert, store the input value if the
21047 // resultant store does not need a higher alignment than the original.
21048 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21049 ST->isUnindexed()) {
21050 EVT SVT = Value.getOperand(0).getValueType();
21051 // If the store is volatile, we only want to change the store type if the
21052 // resulting store is legal. Otherwise we might increase the number of
21053 // memory accesses. We don't care if the original type was legal or not
21054 // as we assume software couldn't rely on the number of accesses of an
21055 // illegal type.
21056 // TODO: May be able to relax for unordered atomics (see D66309)
21057 if (((!LegalOperations && ST->isSimple()) ||
21058 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21059 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21060 DAG, *ST->getMemOperand())) {
21061 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21062 ST->getMemOperand());
21063 }
21064 }
21065
21066 // Turn 'store undef, Ptr' -> nothing.
21067 if (Value.isUndef() && ST->isUnindexed())
21068 return Chain;
21069
21070 // Try to infer better alignment information than the store already has.
21071 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21072 !ST->isAtomic()) {
21073 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21074 if (*Alignment > ST->getAlign() &&
21075 isAligned(*Alignment, ST->getSrcValueOffset())) {
21076 SDValue NewStore =
21077 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21078 ST->getMemoryVT(), *Alignment,
21079 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21080 // NewStore will always be N as we are only refining the alignment
21081 assert(NewStore.getNode() == N);
21082 (void)NewStore;
21083 }
21084 }
21085 }
21086
21087 // Try transforming a pair floating point load / store ops to integer
21088 // load / store ops.
21089 if (SDValue NewST = TransformFPLoadStorePair(N))
21090 return NewST;
21091
21092 // Try transforming several stores into STORE (BSWAP).
21093 if (SDValue Store = mergeTruncStores(ST))
21094 return Store;
21095
21096 if (ST->isUnindexed()) {
21097 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21098 // adjacent stores.
21099 if (findBetterNeighborChains(ST)) {
21100 // replaceStoreChain uses CombineTo, which handled all of the worklist
21101 // manipulation. Return the original node to not do anything else.
21102 return SDValue(ST, 0);
21103 }
21104 Chain = ST->getChain();
21105 }
21106
21107 // FIXME: is there such a thing as a truncating indexed store?
21108 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21109 Value.getValueType().isInteger() &&
21110 (!isa<ConstantSDNode>(Value) ||
21111 !cast<ConstantSDNode>(Value)->isOpaque())) {
21112 // Convert a truncating store of a extension into a standard store.
21113 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21114 Value.getOpcode() == ISD::SIGN_EXTEND ||
21115 Value.getOpcode() == ISD::ANY_EXTEND) &&
21116 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21117 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21118 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21119 ST->getMemOperand());
21120
21121 APInt TruncDemandedBits =
21122 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21123 ST->getMemoryVT().getScalarSizeInBits());
21124
21125 // See if we can simplify the operation with SimplifyDemandedBits, which
21126 // only works if the value has a single use.
21127 AddToWorklist(Value.getNode());
21128 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21129 // Re-visit the store if anything changed and the store hasn't been merged
21130 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21131 // node back to the worklist if necessary, but we also need to re-visit
21132 // the Store node itself.
21133 if (N->getOpcode() != ISD::DELETED_NODE)
21134 AddToWorklist(N);
21135 return SDValue(N, 0);
21136 }
21137
21138 // Otherwise, see if we can simplify the input to this truncstore with
21139 // knowledge that only the low bits are being used. For example:
21140 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21141 if (SDValue Shorter =
21142 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21143 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21144 ST->getMemOperand());
21145
21146 // If we're storing a truncated constant, see if we can simplify it.
21147 // TODO: Move this to targetShrinkDemandedConstant?
21148 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21149 if (!Cst->isOpaque()) {
21150 const APInt &CValue = Cst->getAPIntValue();
21151 APInt NewVal = CValue & TruncDemandedBits;
21152 if (NewVal != CValue) {
21153 SDValue Shorter =
21154 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21155 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21156 ST->getMemoryVT(), ST->getMemOperand());
21157 }
21158 }
21159 }
21160
21161 // If this is a load followed by a store to the same location, then the store
21162 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21163 // TODO: Add big-endian truncate support with test coverage.
21164 // TODO: Can relax for unordered atomics (see D66309)
21165 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21167 : Value;
21168 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21169 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21170 ST->isUnindexed() && ST->isSimple() &&
21171 Ld->getAddressSpace() == ST->getAddressSpace() &&
21172 // There can't be any side effects between the load and store, such as
21173 // a call or store.
21175 // The store is dead, remove it.
21176 return Chain;
21177 }
21178 }
21179
21180 // Try scalarizing vector stores of loads where we only change one element
21181 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21182 return NewST;
21183
21184 // TODO: Can relax for unordered atomics (see D66309)
21185 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21186 if (ST->isUnindexed() && ST->isSimple() &&
21187 ST1->isUnindexed() && ST1->isSimple()) {
21188 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21189 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21190 ST->getAddressSpace() == ST1->getAddressSpace()) {
21191 // If this is a store followed by a store with the same value to the
21192 // same location, then the store is dead/noop.
21193 return Chain;
21194 }
21195
21196 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21197 !ST1->getBasePtr().isUndef() &&
21198 ST->getAddressSpace() == ST1->getAddressSpace()) {
21199 // If we consider two stores and one smaller in size is a scalable
21200 // vector type and another one a bigger size store with a fixed type,
21201 // then we could not allow the scalable store removal because we don't
21202 // know its final size in the end.
21203 if (ST->getMemoryVT().isScalableVector() ||
21204 ST1->getMemoryVT().isScalableVector()) {
21205 if (ST1->getBasePtr() == Ptr &&
21206 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21207 ST->getMemoryVT().getStoreSize())) {
21208 CombineTo(ST1, ST1->getChain());
21209 return SDValue(N, 0);
21210 }
21211 } else {
21212 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21213 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21214 // If this is a store who's preceding store to a subset of the current
21215 // location and no one other node is chained to that store we can
21216 // effectively drop the store. Do not remove stores to undef as they
21217 // may be used as data sinks.
21218 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21219 ChainBase,
21220 ST1->getMemoryVT().getFixedSizeInBits())) {
21221 CombineTo(ST1, ST1->getChain());
21222 return SDValue(N, 0);
21223 }
21224 }
21225 }
21226 }
21227 }
21228
21229 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21230 // truncating store. We can do this even if this is already a truncstore.
21231 if ((Value.getOpcode() == ISD::FP_ROUND ||
21232 Value.getOpcode() == ISD::TRUNCATE) &&
21233 Value->hasOneUse() && ST->isUnindexed() &&
21234 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21235 ST->getMemoryVT(), LegalOperations)) {
21236 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21237 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21238 }
21239
21240 // Always perform this optimization before types are legal. If the target
21241 // prefers, also try this after legalization to catch stores that were created
21242 // by intrinsics or other nodes.
21243 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21244 while (true) {
21245 // There can be multiple store sequences on the same chain.
21246 // Keep trying to merge store sequences until we are unable to do so
21247 // or until we merge the last store on the chain.
21248 bool Changed = mergeConsecutiveStores(ST);
21249 if (!Changed) break;
21250 // Return N as merge only uses CombineTo and no worklist clean
21251 // up is necessary.
21252 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21253 return SDValue(N, 0);
21254 }
21255 }
21256
21257 // Try transforming N to an indexed store.
21258 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21259 return SDValue(N, 0);
21260
21261 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21262 //
21263 // Make sure to do this only after attempting to merge stores in order to
21264 // avoid changing the types of some subset of stores due to visit order,
21265 // preventing their merging.
21266 if (isa<ConstantFPSDNode>(ST->getValue())) {
21267 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21268 return NewSt;
21269 }
21270
21271 if (SDValue NewSt = splitMergedValStore(ST))
21272 return NewSt;
21273
21274 return ReduceLoadOpStoreWidth(N);
21275}
21276
21277SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21278 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21279 if (!LifetimeEnd->hasOffset())
21280 return SDValue();
21281
21282 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21283 LifetimeEnd->getOffset(), false);
21284
21285 // We walk up the chains to find stores.
21286 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21287 while (!Chains.empty()) {
21288 SDValue Chain = Chains.pop_back_val();
21289 if (!Chain.hasOneUse())
21290 continue;
21291 switch (Chain.getOpcode()) {
21292 case ISD::TokenFactor:
21293 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21294 Chains.push_back(Chain.getOperand(--Nops));
21295 break;
21297 case ISD::LIFETIME_END:
21298 // We can forward past any lifetime start/end that can be proven not to
21299 // alias the node.
21300 if (!mayAlias(Chain.getNode(), N))
21301 Chains.push_back(Chain.getOperand(0));
21302 break;
21303 case ISD::STORE: {
21304 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21305 // TODO: Can relax for unordered atomics (see D66309)
21306 if (!ST->isSimple() || ST->isIndexed())
21307 continue;
21308 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21309 // The bounds of a scalable store are not known until runtime, so this
21310 // store cannot be elided.
21311 if (StoreSize.isScalable())
21312 continue;
21313 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21314 // If we store purely within object bounds just before its lifetime ends,
21315 // we can remove the store.
21316 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21317 StoreSize.getFixedValue() * 8)) {
21318 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21319 dbgs() << "\nwithin LIFETIME_END of : ";
21320 LifetimeEndBase.dump(); dbgs() << "\n");
21321 CombineTo(ST, ST->getChain());
21322 return SDValue(N, 0);
21323 }
21324 }
21325 }
21326 }
21327 return SDValue();
21328}
21329
21330/// For the instruction sequence of store below, F and I values
21331/// are bundled together as an i64 value before being stored into memory.
21332/// Sometimes it is more efficent to generate separate stores for F and I,
21333/// which can remove the bitwise instructions or sink them to colder places.
21334///
21335/// (store (or (zext (bitcast F to i32) to i64),
21336/// (shl (zext I to i64), 32)), addr) -->
21337/// (store F, addr) and (store I, addr+4)
21338///
21339/// Similarly, splitting for other merged store can also be beneficial, like:
21340/// For pair of {i32, i32}, i64 store --> two i32 stores.
21341/// For pair of {i32, i16}, i64 store --> two i32 stores.
21342/// For pair of {i16, i16}, i32 store --> two i16 stores.
21343/// For pair of {i16, i8}, i32 store --> two i16 stores.
21344/// For pair of {i8, i8}, i16 store --> two i8 stores.
21345///
21346/// We allow each target to determine specifically which kind of splitting is
21347/// supported.
21348///
21349/// The store patterns are commonly seen from the simple code snippet below
21350/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21351/// void goo(const std::pair<int, float> &);
21352/// hoo() {
21353/// ...
21354/// goo(std::make_pair(tmp, ftmp));
21355/// ...
21356/// }
21357///
21358SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21359 if (OptLevel == CodeGenOptLevel::None)
21360 return SDValue();
21361
21362 // Can't change the number of memory accesses for a volatile store or break
21363 // atomicity for an atomic one.
21364 if (!ST->isSimple())
21365 return SDValue();
21366
21367 SDValue Val = ST->getValue();
21368 SDLoc DL(ST);
21369
21370 // Match OR operand.
21371 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21372 return SDValue();
21373
21374 // Match SHL operand and get Lower and Higher parts of Val.
21375 SDValue Op1 = Val.getOperand(0);
21376 SDValue Op2 = Val.getOperand(1);
21377 SDValue Lo, Hi;
21378 if (Op1.getOpcode() != ISD::SHL) {
21379 std::swap(Op1, Op2);
21380 if (Op1.getOpcode() != ISD::SHL)
21381 return SDValue();
21382 }
21383 Lo = Op2;
21384 Hi = Op1.getOperand(0);
21385 if (!Op1.hasOneUse())
21386 return SDValue();
21387
21388 // Match shift amount to HalfValBitSize.
21389 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21390 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21391 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21392 return SDValue();
21393
21394 // Lo and Hi are zero-extended from int with size less equal than 32
21395 // to i64.
21396 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21397 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21398 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21399 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21400 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21401 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21402 return SDValue();
21403
21404 // Use the EVT of low and high parts before bitcast as the input
21405 // of target query.
21406 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21407 ? Lo.getOperand(0).getValueType()
21408 : Lo.getValueType();
21409 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21410 ? Hi.getOperand(0).getValueType()
21411 : Hi.getValueType();
21412 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21413 return SDValue();
21414
21415 // Start to split store.
21416 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21417 AAMDNodes AAInfo = ST->getAAInfo();
21418
21419 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21420 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21421 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21422 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21423
21424 SDValue Chain = ST->getChain();
21425 SDValue Ptr = ST->getBasePtr();
21426 // Lower value store.
21427 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21428 ST->getOriginalAlign(), MMOFlags, AAInfo);
21429 Ptr =
21430 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21431 // Higher value store.
21432 SDValue St1 = DAG.getStore(
21433 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21434 ST->getOriginalAlign(), MMOFlags, AAInfo);
21435 return St1;
21436}
21437
21438// Merge an insertion into an existing shuffle:
21439// (insert_vector_elt (vector_shuffle X, Y, Mask),
21440// .(extract_vector_elt X, N), InsIndex)
21441// --> (vector_shuffle X, Y, NewMask)
21442// and variations where shuffle operands may be CONCAT_VECTORS.
21444 SmallVectorImpl<int> &NewMask, SDValue Elt,
21445 unsigned InsIndex) {
21446 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21447 !isa<ConstantSDNode>(Elt.getOperand(1)))
21448 return false;
21449
21450 // Vec's operand 0 is using indices from 0 to N-1 and
21451 // operand 1 from N to 2N - 1, where N is the number of
21452 // elements in the vectors.
21453 SDValue InsertVal0 = Elt.getOperand(0);
21454 int ElementOffset = -1;
21455
21456 // We explore the inputs of the shuffle in order to see if we find the
21457 // source of the extract_vector_elt. If so, we can use it to modify the
21458 // shuffle rather than perform an insert_vector_elt.
21460 ArgWorkList.emplace_back(Mask.size(), Y);
21461 ArgWorkList.emplace_back(0, X);
21462
21463 while (!ArgWorkList.empty()) {
21464 int ArgOffset;
21465 SDValue ArgVal;
21466 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21467
21468 if (ArgVal == InsertVal0) {
21469 ElementOffset = ArgOffset;
21470 break;
21471 }
21472
21473 // Peek through concat_vector.
21474 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21475 int CurrentArgOffset =
21476 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21477 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21478 for (SDValue Op : reverse(ArgVal->ops())) {
21479 CurrentArgOffset -= Step;
21480 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21481 }
21482
21483 // Make sure we went through all the elements and did not screw up index
21484 // computation.
21485 assert(CurrentArgOffset == ArgOffset);
21486 }
21487 }
21488
21489 // If we failed to find a match, see if we can replace an UNDEF shuffle
21490 // operand.
21491 if (ElementOffset == -1) {
21492 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21493 return false;
21494 ElementOffset = Mask.size();
21495 Y = InsertVal0;
21496 }
21497
21498 NewMask.assign(Mask.begin(), Mask.end());
21499 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21500 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21501 "NewMask[InsIndex] is out of bound");
21502 return true;
21503}
21504
21505// Merge an insertion into an existing shuffle:
21506// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21507// InsIndex)
21508// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21509// CONCAT_VECTORS.
21510SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21511 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21512 "Expected extract_vector_elt");
21513 SDValue InsertVal = N->getOperand(1);
21514 SDValue Vec = N->getOperand(0);
21515
21516 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21517 if (!SVN || !Vec.hasOneUse())
21518 return SDValue();
21519
21520 ArrayRef<int> Mask = SVN->getMask();
21521 SDValue X = Vec.getOperand(0);
21522 SDValue Y = Vec.getOperand(1);
21523
21524 SmallVector<int, 16> NewMask(Mask);
21525 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21526 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21527 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21528 if (LegalShuffle)
21529 return LegalShuffle;
21530 }
21531
21532 return SDValue();
21533}
21534
21535// Convert a disguised subvector insertion into a shuffle:
21536// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21537// bitcast(shuffle (bitcast V), (extended X), Mask)
21538// Note: We do not use an insert_subvector node because that requires a
21539// legal subvector type.
21540SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21541 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21542 "Expected extract_vector_elt");
21543 SDValue InsertVal = N->getOperand(1);
21544
21545 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21546 !InsertVal.getOperand(0).getValueType().isVector())
21547 return SDValue();
21548
21549 SDValue SubVec = InsertVal.getOperand(0);
21550 SDValue DestVec = N->getOperand(0);
21551 EVT SubVecVT = SubVec.getValueType();
21552 EVT VT = DestVec.getValueType();
21553 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21554 // If the source only has a single vector element, the cost of creating adding
21555 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21556 if (NumSrcElts == 1)
21557 return SDValue();
21558 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21559 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21560
21561 // Step 1: Create a shuffle mask that implements this insert operation. The
21562 // vector that we are inserting into will be operand 0 of the shuffle, so
21563 // those elements are just 'i'. The inserted subvector is in the first
21564 // positions of operand 1 of the shuffle. Example:
21565 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21566 SmallVector<int, 16> Mask(NumMaskVals);
21567 for (unsigned i = 0; i != NumMaskVals; ++i) {
21568 if (i / NumSrcElts == InsIndex)
21569 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21570 else
21571 Mask[i] = i;
21572 }
21573
21574 // Bail out if the target can not handle the shuffle we want to create.
21575 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21576 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21577 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21578 return SDValue();
21579
21580 // Step 2: Create a wide vector from the inserted source vector by appending
21581 // undefined elements. This is the same size as our destination vector.
21582 SDLoc DL(N);
21583 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21584 ConcatOps[0] = SubVec;
21585 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21586
21587 // Step 3: Shuffle in the padded subvector.
21588 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21589 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21590 AddToWorklist(PaddedSubV.getNode());
21591 AddToWorklist(DestVecBC.getNode());
21592 AddToWorklist(Shuf.getNode());
21593 return DAG.getBitcast(VT, Shuf);
21594}
21595
21596// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21597// possible and the new load will be quick. We use more loads but less shuffles
21598// and inserts.
21599SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21600 EVT VT = N->getValueType(0);
21601
21602 // InsIndex is expected to be the first of last lane.
21603 if (!VT.isFixedLengthVector() ||
21604 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21605 return SDValue();
21606
21607 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21608 // depending on the InsIndex.
21609 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21610 SDValue Scalar = N->getOperand(1);
21611 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21612 return InsIndex == P.index() || P.value() < 0 ||
21613 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21614 (InsIndex == VT.getVectorNumElements() - 1 &&
21615 P.value() == (int)P.index() + 1);
21616 }))
21617 return SDValue();
21618
21619 // We optionally skip over an extend so long as both loads are extended in the
21620 // same way from the same type.
21621 unsigned Extend = 0;
21622 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21623 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21624 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21625 Extend = Scalar.getOpcode();
21626 Scalar = Scalar.getOperand(0);
21627 }
21628
21629 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21630 if (!ScalarLoad)
21631 return SDValue();
21632
21633 SDValue Vec = Shuffle->getOperand(0);
21634 if (Extend) {
21635 if (Vec.getOpcode() != Extend)
21636 return SDValue();
21637 Vec = Vec.getOperand(0);
21638 }
21639 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21640 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21641 return SDValue();
21642
21643 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21644 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21645 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21646 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21647 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21648 return SDValue();
21649
21650 // Check that the offset between the pointers to produce a single continuous
21651 // load.
21652 if (InsIndex == 0) {
21653 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21654 -1))
21655 return SDValue();
21656 } else {
21658 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21659 return SDValue();
21660 }
21661
21662 // And that the new unaligned load will be fast.
21663 unsigned IsFast = 0;
21664 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21665 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21666 Vec.getValueType(), VecLoad->getAddressSpace(),
21667 NewAlign, VecLoad->getMemOperand()->getFlags(),
21668 &IsFast) ||
21669 !IsFast)
21670 return SDValue();
21671
21672 // Calculate the new Ptr and create the new load.
21673 SDLoc DL(N);
21674 SDValue Ptr = ScalarLoad->getBasePtr();
21675 if (InsIndex != 0)
21676 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21677 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21678 MachinePointerInfo PtrInfo =
21679 InsIndex == 0 ? ScalarLoad->getPointerInfo()
21680 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21681
21682 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21683 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21684 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21685 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21686 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21687}
21688
21689SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21690 SDValue InVec = N->getOperand(0);
21691 SDValue InVal = N->getOperand(1);
21692 SDValue EltNo = N->getOperand(2);
21693 SDLoc DL(N);
21694
21695 EVT VT = InVec.getValueType();
21696 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21697
21698 // Insert into out-of-bounds element is undefined.
21699 if (IndexC && VT.isFixedLengthVector() &&
21700 IndexC->getZExtValue() >= VT.getVectorNumElements())
21701 return DAG.getUNDEF(VT);
21702
21703 // Remove redundant insertions:
21704 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21705 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21706 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21707 return InVec;
21708
21709 if (!IndexC) {
21710 // If this is variable insert to undef vector, it might be better to splat:
21711 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21712 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21713 return DAG.getSplat(VT, DL, InVal);
21714 return SDValue();
21715 }
21716
21717 if (VT.isScalableVector())
21718 return SDValue();
21719
21720 unsigned NumElts = VT.getVectorNumElements();
21721
21722 // We must know which element is being inserted for folds below here.
21723 unsigned Elt = IndexC->getZExtValue();
21724
21725 // Handle <1 x ???> vector insertion special cases.
21726 if (NumElts == 1) {
21727 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21728 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21729 InVal.getOperand(0).getValueType() == VT &&
21730 isNullConstant(InVal.getOperand(1)))
21731 return InVal.getOperand(0);
21732 }
21733
21734 // Canonicalize insert_vector_elt dag nodes.
21735 // Example:
21736 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21737 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21738 //
21739 // Do this only if the child insert_vector node has one use; also
21740 // do this only if indices are both constants and Idx1 < Idx0.
21741 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21742 && isa<ConstantSDNode>(InVec.getOperand(2))) {
21743 unsigned OtherElt = InVec.getConstantOperandVal(2);
21744 if (Elt < OtherElt) {
21745 // Swap nodes.
21746 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
21747 InVec.getOperand(0), InVal, EltNo);
21748 AddToWorklist(NewOp.getNode());
21749 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
21750 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
21751 }
21752 }
21753
21754 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
21755 return Shuf;
21756
21757 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
21758 return Shuf;
21759
21760 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
21761 return Shuf;
21762
21763 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
21764 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
21765 // vXi1 vector - we don't need to recurse.
21766 if (NumElts == 1)
21767 return DAG.getBuildVector(VT, DL, {InVal});
21768
21769 // If we haven't already collected the element, insert into the op list.
21770 EVT MaxEltVT = InVal.getValueType();
21771 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
21772 unsigned Idx) {
21773 if (!Ops[Idx]) {
21774 Ops[Idx] = Elt;
21775 if (VT.isInteger()) {
21776 EVT EltVT = Elt.getValueType();
21777 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
21778 }
21779 }
21780 };
21781
21782 // Ensure all the operands are the same value type, fill any missing
21783 // operands with UNDEF and create the BUILD_VECTOR.
21784 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
21785 assert(Ops.size() == NumElts && "Unexpected vector size");
21786 for (SDValue &Op : Ops) {
21787 if (Op)
21788 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
21789 else
21790 Op = DAG.getUNDEF(MaxEltVT);
21791 }
21792 return DAG.getBuildVector(VT, DL, Ops);
21793 };
21794
21795 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
21796 Ops[Elt] = InVal;
21797
21798 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
21799 for (SDValue CurVec = InVec; CurVec;) {
21800 // UNDEF - build new BUILD_VECTOR from already inserted operands.
21801 if (CurVec.isUndef())
21802 return CanonicalizeBuildVector(Ops);
21803
21804 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
21805 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
21806 for (unsigned I = 0; I != NumElts; ++I)
21807 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
21808 return CanonicalizeBuildVector(Ops);
21809 }
21810
21811 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
21812 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
21813 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
21814 return CanonicalizeBuildVector(Ops);
21815 }
21816
21817 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
21818 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
21819 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
21820 if (CurIdx->getAPIntValue().ult(NumElts)) {
21821 unsigned Idx = CurIdx->getZExtValue();
21822 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
21823
21824 // Found entire BUILD_VECTOR.
21825 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
21826 return CanonicalizeBuildVector(Ops);
21827
21828 CurVec = CurVec->getOperand(0);
21829 continue;
21830 }
21831
21832 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
21833 // update the shuffle mask (and second operand if we started with unary
21834 // shuffle) and create a new legal shuffle.
21835 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
21836 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
21837 SDValue LHS = SVN->getOperand(0);
21838 SDValue RHS = SVN->getOperand(1);
21840 bool Merged = true;
21841 for (auto I : enumerate(Ops)) {
21842 SDValue &Op = I.value();
21843 if (Op) {
21844 SmallVector<int, 16> NewMask;
21845 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
21846 Merged = false;
21847 break;
21848 }
21849 Mask = std::move(NewMask);
21850 }
21851 }
21852 if (Merged)
21853 if (SDValue NewShuffle =
21854 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
21855 return NewShuffle;
21856 }
21857
21858 // If all insertions are zero value, try to convert to AND mask.
21859 // TODO: Do this for -1 with OR mask?
21860 if (!LegalOperations && llvm::isNullConstant(InVal) &&
21861 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
21862 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
21863 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
21864 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
21866 for (unsigned I = 0; I != NumElts; ++I)
21867 Mask[I] = Ops[I] ? Zero : AllOnes;
21868 return DAG.getNode(ISD::AND, DL, VT, CurVec,
21869 DAG.getBuildVector(VT, DL, Mask));
21870 }
21871
21872 // Failed to find a match in the chain - bail.
21873 break;
21874 }
21875
21876 // See if we can fill in the missing constant elements as zeros.
21877 // TODO: Should we do this for any constant?
21878 APInt DemandedZeroElts = APInt::getZero(NumElts);
21879 for (unsigned I = 0; I != NumElts; ++I)
21880 if (!Ops[I])
21881 DemandedZeroElts.setBit(I);
21882
21883 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
21884 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
21885 : DAG.getConstantFP(0, DL, MaxEltVT);
21886 for (unsigned I = 0; I != NumElts; ++I)
21887 if (!Ops[I])
21888 Ops[I] = Zero;
21889
21890 return CanonicalizeBuildVector(Ops);
21891 }
21892 }
21893
21894 return SDValue();
21895}
21896
21897SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
21898 SDValue EltNo,
21899 LoadSDNode *OriginalLoad) {
21900 assert(OriginalLoad->isSimple());
21901
21902 EVT ResultVT = EVE->getValueType(0);
21903 EVT VecEltVT = InVecVT.getVectorElementType();
21904
21905 // If the vector element type is not a multiple of a byte then we are unable
21906 // to correctly compute an address to load only the extracted element as a
21907 // scalar.
21908 if (!VecEltVT.isByteSized())
21909 return SDValue();
21910
21911 ISD::LoadExtType ExtTy =
21912 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
21913 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
21914 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
21915 return SDValue();
21916
21917 Align Alignment = OriginalLoad->getAlign();
21919 SDLoc DL(EVE);
21920 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
21921 int Elt = ConstEltNo->getZExtValue();
21922 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
21923 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
21924 Alignment = commonAlignment(Alignment, PtrOff);
21925 } else {
21926 // Discard the pointer info except the address space because the memory
21927 // operand can't represent this new access since the offset is variable.
21928 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
21929 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
21930 }
21931
21932 unsigned IsFast = 0;
21933 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
21934 OriginalLoad->getAddressSpace(), Alignment,
21935 OriginalLoad->getMemOperand()->getFlags(),
21936 &IsFast) ||
21937 !IsFast)
21938 return SDValue();
21939
21940 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
21941 InVecVT, EltNo);
21942
21943 // We are replacing a vector load with a scalar load. The new load must have
21944 // identical memory op ordering to the original.
21945 SDValue Load;
21946 if (ResultVT.bitsGT(VecEltVT)) {
21947 // If the result type of vextract is wider than the load, then issue an
21948 // extending load instead.
21949 ISD::LoadExtType ExtType =
21950 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
21951 : ISD::EXTLOAD;
21952 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
21953 NewPtr, MPI, VecEltVT, Alignment,
21954 OriginalLoad->getMemOperand()->getFlags(),
21955 OriginalLoad->getAAInfo());
21956 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
21957 } else {
21958 // The result type is narrower or the same width as the vector element
21959 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
21960 Alignment, OriginalLoad->getMemOperand()->getFlags(),
21961 OriginalLoad->getAAInfo());
21962 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
21963 if (ResultVT.bitsLT(VecEltVT))
21964 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
21965 else
21966 Load = DAG.getBitcast(ResultVT, Load);
21967 }
21968 ++OpsNarrowed;
21969 return Load;
21970}
21971
21972/// Transform a vector binary operation into a scalar binary operation by moving
21973/// the math/logic after an extract element of a vector.
21975 bool LegalOperations) {
21976 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21977 SDValue Vec = ExtElt->getOperand(0);
21978 SDValue Index = ExtElt->getOperand(1);
21979 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
21980 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
21981 Vec->getNumValues() != 1)
21982 return SDValue();
21983
21984 // Targets may want to avoid this to prevent an expensive register transfer.
21985 if (!TLI.shouldScalarizeBinop(Vec))
21986 return SDValue();
21987
21988 // Extracting an element of a vector constant is constant-folded, so this
21989 // transform is just replacing a vector op with a scalar op while moving the
21990 // extract.
21991 SDValue Op0 = Vec.getOperand(0);
21992 SDValue Op1 = Vec.getOperand(1);
21993 APInt SplatVal;
21994 if (isAnyConstantBuildVector(Op0, true) ||
21995 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
21996 isAnyConstantBuildVector(Op1, true) ||
21997 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
21998 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
21999 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22000 SDLoc DL(ExtElt);
22001 EVT VT = ExtElt->getValueType(0);
22002 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22003 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22004 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22005 }
22006
22007 return SDValue();
22008}
22009
22010// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22011// recursively analyse all of it's users. and try to model themselves as
22012// bit sequence extractions. If all of them agree on the new, narrower element
22013// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22014// new element type, do so now.
22015// This is mainly useful to recover from legalization that scalarized
22016// the vector as wide elements, but tries to rebuild it with narrower elements.
22017//
22018// Some more nodes could be modelled if that helps cover interesting patterns.
22019bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22020 SDNode *N) {
22021 // We perform this optimization post type-legalization because
22022 // the type-legalizer often scalarizes integer-promoted vectors.
22023 // Performing this optimization before may cause legalizaton cycles.
22024 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22025 return false;
22026
22027 // TODO: Add support for big-endian.
22028 if (DAG.getDataLayout().isBigEndian())
22029 return false;
22030
22031 SDValue VecOp = N->getOperand(0);
22032 EVT VecVT = VecOp.getValueType();
22033 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22034
22035 // We must start with a constant extraction index.
22036 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22037 if (!IndexC)
22038 return false;
22039
22040 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22041 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22042
22043 // TODO: deal with the case of implicit anyext of the extraction.
22044 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22045 EVT ScalarVT = N->getValueType(0);
22046 if (VecVT.getScalarType() != ScalarVT)
22047 return false;
22048
22049 // TODO: deal with the cases other than everything being integer-typed.
22050 if (!ScalarVT.isScalarInteger())
22051 return false;
22052
22053 struct Entry {
22055
22056 // Which bits of VecOp does it contain?
22057 unsigned BitPos;
22058 int NumBits;
22059 // NOTE: the actual width of \p Producer may be wider than NumBits!
22060
22061 Entry(Entry &&) = default;
22062 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22063 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22064
22065 Entry() = delete;
22066 Entry(const Entry &) = delete;
22067 Entry &operator=(const Entry &) = delete;
22068 Entry &operator=(Entry &&) = delete;
22069 };
22070 SmallVector<Entry, 32> Worklist;
22072
22073 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22074 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22075 /*NumBits=*/VecEltBitWidth);
22076
22077 while (!Worklist.empty()) {
22078 Entry E = Worklist.pop_back_val();
22079 // Does the node not even use any of the VecOp bits?
22080 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22081 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22082 return false; // Let's allow the other combines clean this up first.
22083 // Did we fail to model any of the users of the Producer?
22084 bool ProducerIsLeaf = false;
22085 // Look at each user of this Producer.
22086 for (SDNode *User : E.Producer->uses()) {
22087 switch (User->getOpcode()) {
22088 // TODO: support ISD::BITCAST
22089 // TODO: support ISD::ANY_EXTEND
22090 // TODO: support ISD::ZERO_EXTEND
22091 // TODO: support ISD::SIGN_EXTEND
22092 case ISD::TRUNCATE:
22093 // Truncation simply means we keep position, but extract less bits.
22094 Worklist.emplace_back(User, E.BitPos,
22095 /*NumBits=*/User->getValueSizeInBits(0));
22096 break;
22097 // TODO: support ISD::SRA
22098 // TODO: support ISD::SHL
22099 case ISD::SRL:
22100 // We should be shifting the Producer by a constant amount.
22101 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22102 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22103 // Logical right-shift means that we start extraction later,
22104 // but stop it at the same position we did previously.
22105 unsigned ShAmt = ShAmtC->getZExtValue();
22106 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22107 break;
22108 }
22109 [[fallthrough]];
22110 default:
22111 // We can not model this user of the Producer.
22112 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22113 ProducerIsLeaf = true;
22114 // Profitability check: all users that we can not model
22115 // must be ISD::BUILD_VECTOR's.
22116 if (User->getOpcode() != ISD::BUILD_VECTOR)
22117 return false;
22118 break;
22119 }
22120 }
22121 if (ProducerIsLeaf)
22122 Leafs.emplace_back(std::move(E));
22123 }
22124
22125 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22126
22127 // If we are still at the same element granularity, give up,
22128 if (NewVecEltBitWidth == VecEltBitWidth)
22129 return false;
22130
22131 // The vector width must be a multiple of the new element width.
22132 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22133 return false;
22134
22135 // All leafs must agree on the new element width.
22136 // All leafs must not expect any "padding" bits ontop of that width.
22137 // All leafs must start extraction from multiple of that width.
22138 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22139 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22140 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22141 E.BitPos % NewVecEltBitWidth == 0;
22142 }))
22143 return false;
22144
22145 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22146 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22147 VecVT.getSizeInBits() / NewVecEltBitWidth);
22148
22149 if (LegalTypes &&
22150 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22151 return false;
22152
22153 if (LegalOperations &&
22154 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22156 return false;
22157
22158 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22159 for (const Entry &E : Leafs) {
22160 SDLoc DL(E.Producer);
22161 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22162 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22163 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22164 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22165 DAG.getVectorIdxConstant(NewIndex, DL));
22166 CombineTo(E.Producer, V);
22167 }
22168
22169 return true;
22170}
22171
22172SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22173 SDValue VecOp = N->getOperand(0);
22174 SDValue Index = N->getOperand(1);
22175 EVT ScalarVT = N->getValueType(0);
22176 EVT VecVT = VecOp.getValueType();
22177 if (VecOp.isUndef())
22178 return DAG.getUNDEF(ScalarVT);
22179
22180 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22181 //
22182 // This only really matters if the index is non-constant since other combines
22183 // on the constant elements already work.
22184 SDLoc DL(N);
22185 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22186 Index == VecOp.getOperand(2)) {
22187 SDValue Elt = VecOp.getOperand(1);
22188 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22189 }
22190
22191 // (vextract (scalar_to_vector val, 0) -> val
22192 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22193 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22194 if (DAG.isKnownNeverZero(Index))
22195 return DAG.getUNDEF(ScalarVT);
22196
22197 // Check if the result type doesn't match the inserted element type.
22198 // The inserted element and extracted element may have mismatched bitwidth.
22199 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22200 SDValue InOp = VecOp.getOperand(0);
22201 if (InOp.getValueType() != ScalarVT) {
22202 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22203 if (InOp.getValueType().bitsGT(ScalarVT))
22204 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22205 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22206 }
22207 return InOp;
22208 }
22209
22210 // extract_vector_elt of out-of-bounds element -> UNDEF
22211 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22212 if (IndexC && VecVT.isFixedLengthVector() &&
22213 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22214 return DAG.getUNDEF(ScalarVT);
22215
22216 // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
22217 if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
22218 return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
22219 VecOp.getOperand(0), Index));
22220 }
22221
22222 // extract_vector_elt (build_vector x, y), 1 -> y
22223 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22224 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22225 TLI.isTypeLegal(VecVT)) {
22226 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22227 VecVT.isFixedLengthVector()) &&
22228 "BUILD_VECTOR used for scalable vectors");
22229 unsigned IndexVal =
22230 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22231 SDValue Elt = VecOp.getOperand(IndexVal);
22232 EVT InEltVT = Elt.getValueType();
22233
22234 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22235 isNullConstant(Elt)) {
22236 // Sometimes build_vector's scalar input types do not match result type.
22237 if (ScalarVT == InEltVT)
22238 return Elt;
22239
22240 // TODO: It may be useful to truncate if free if the build_vector
22241 // implicitly converts.
22242 }
22243 }
22244
22245 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
22246 return BO;
22247
22248 if (VecVT.isScalableVector())
22249 return SDValue();
22250
22251 // All the code from this point onwards assumes fixed width vectors, but it's
22252 // possible that some of the combinations could be made to work for scalable
22253 // vectors too.
22254 unsigned NumElts = VecVT.getVectorNumElements();
22255 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22256
22257 // See if the extracted element is constant, in which case fold it if its
22258 // a legal fp immediate.
22259 if (IndexC && ScalarVT.isFloatingPoint()) {
22260 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22261 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22262 if (KnownElt.isConstant()) {
22263 APFloat CstFP =
22264 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22265 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22266 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22267 }
22268 }
22269
22270 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22271 // there are regressions on multiple targets without it. We can end up with a
22272 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22273 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22274 VecOp.hasOneUse()) {
22275 // The vector index of the LSBs of the source depend on the endian-ness.
22276 bool IsLE = DAG.getDataLayout().isLittleEndian();
22277 unsigned ExtractIndex = IndexC->getZExtValue();
22278 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22279 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22280 SDValue BCSrc = VecOp.getOperand(0);
22281 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22282 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22283
22284 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22285 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22286 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22287 // trunc i64 X to i32
22288 SDValue X = BCSrc.getOperand(0);
22289 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22290 "Extract element and scalar to vector can't change element type "
22291 "from FP to integer.");
22292 unsigned XBitWidth = X.getValueSizeInBits();
22293 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22294
22295 // An extract element return value type can be wider than its vector
22296 // operand element type. In that case, the high bits are undefined, so
22297 // it's possible that we may need to extend rather than truncate.
22298 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22299 assert(XBitWidth % VecEltBitWidth == 0 &&
22300 "Scalar bitwidth must be a multiple of vector element bitwidth");
22301 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22302 }
22303 }
22304 }
22305
22306 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22307 // We only perform this optimization before the op legalization phase because
22308 // we may introduce new vector instructions which are not backed by TD
22309 // patterns. For example on AVX, extracting elements from a wide vector
22310 // without using extract_subvector. However, if we can find an underlying
22311 // scalar value, then we can always use that.
22312 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22313 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22314 // Find the new index to extract from.
22315 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22316
22317 // Extracting an undef index is undef.
22318 if (OrigElt == -1)
22319 return DAG.getUNDEF(ScalarVT);
22320
22321 // Select the right vector half to extract from.
22322 SDValue SVInVec;
22323 if (OrigElt < (int)NumElts) {
22324 SVInVec = VecOp.getOperand(0);
22325 } else {
22326 SVInVec = VecOp.getOperand(1);
22327 OrigElt -= NumElts;
22328 }
22329
22330 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22331 SDValue InOp = SVInVec.getOperand(OrigElt);
22332 if (InOp.getValueType() != ScalarVT) {
22333 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22334 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22335 }
22336
22337 return InOp;
22338 }
22339
22340 // FIXME: We should handle recursing on other vector shuffles and
22341 // scalar_to_vector here as well.
22342
22343 if (!LegalOperations ||
22344 // FIXME: Should really be just isOperationLegalOrCustom.
22347 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22348 DAG.getVectorIdxConstant(OrigElt, DL));
22349 }
22350 }
22351
22352 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22353 // simplify it based on the (valid) extraction indices.
22354 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22355 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22356 Use->getOperand(0) == VecOp &&
22357 isa<ConstantSDNode>(Use->getOperand(1));
22358 })) {
22359 APInt DemandedElts = APInt::getZero(NumElts);
22360 for (SDNode *Use : VecOp->uses()) {
22361 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22362 if (CstElt->getAPIntValue().ult(NumElts))
22363 DemandedElts.setBit(CstElt->getZExtValue());
22364 }
22365 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22366 // We simplified the vector operand of this extract element. If this
22367 // extract is not dead, visit it again so it is folded properly.
22368 if (N->getOpcode() != ISD::DELETED_NODE)
22369 AddToWorklist(N);
22370 return SDValue(N, 0);
22371 }
22372 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22373 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22374 // We simplified the vector operand of this extract element. If this
22375 // extract is not dead, visit it again so it is folded properly.
22376 if (N->getOpcode() != ISD::DELETED_NODE)
22377 AddToWorklist(N);
22378 return SDValue(N, 0);
22379 }
22380 }
22381
22382 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22383 return SDValue(N, 0);
22384
22385 // Everything under here is trying to match an extract of a loaded value.
22386 // If the result of load has to be truncated, then it's not necessarily
22387 // profitable.
22388 bool BCNumEltsChanged = false;
22389 EVT ExtVT = VecVT.getVectorElementType();
22390 EVT LVT = ExtVT;
22391 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22392 return SDValue();
22393
22394 if (VecOp.getOpcode() == ISD::BITCAST) {
22395 // Don't duplicate a load with other uses.
22396 if (!VecOp.hasOneUse())
22397 return SDValue();
22398
22399 EVT BCVT = VecOp.getOperand(0).getValueType();
22400 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22401 return SDValue();
22402 if (NumElts != BCVT.getVectorNumElements())
22403 BCNumEltsChanged = true;
22404 VecOp = VecOp.getOperand(0);
22405 ExtVT = BCVT.getVectorElementType();
22406 }
22407
22408 // extract (vector load $addr), i --> load $addr + i * size
22409 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22410 ISD::isNormalLoad(VecOp.getNode()) &&
22411 !Index->hasPredecessor(VecOp.getNode())) {
22412 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22413 if (VecLoad && VecLoad->isSimple())
22414 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22415 }
22416
22417 // Perform only after legalization to ensure build_vector / vector_shuffle
22418 // optimizations have already been done.
22419 if (!LegalOperations || !IndexC)
22420 return SDValue();
22421
22422 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22423 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22424 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22425 int Elt = IndexC->getZExtValue();
22426 LoadSDNode *LN0 = nullptr;
22427 if (ISD::isNormalLoad(VecOp.getNode())) {
22428 LN0 = cast<LoadSDNode>(VecOp);
22429 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22430 VecOp.getOperand(0).getValueType() == ExtVT &&
22431 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22432 // Don't duplicate a load with other uses.
22433 if (!VecOp.hasOneUse())
22434 return SDValue();
22435
22436 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22437 }
22438 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22439 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22440 // =>
22441 // (load $addr+1*size)
22442
22443 // Don't duplicate a load with other uses.
22444 if (!VecOp.hasOneUse())
22445 return SDValue();
22446
22447 // If the bit convert changed the number of elements, it is unsafe
22448 // to examine the mask.
22449 if (BCNumEltsChanged)
22450 return SDValue();
22451
22452 // Select the input vector, guarding against out of range extract vector.
22453 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22454 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22455
22456 if (VecOp.getOpcode() == ISD::BITCAST) {
22457 // Don't duplicate a load with other uses.
22458 if (!VecOp.hasOneUse())
22459 return SDValue();
22460
22461 VecOp = VecOp.getOperand(0);
22462 }
22463 if (ISD::isNormalLoad(VecOp.getNode())) {
22464 LN0 = cast<LoadSDNode>(VecOp);
22465 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22466 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22467 }
22468 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22469 VecVT.getVectorElementType() == ScalarVT &&
22470 (!LegalTypes ||
22471 TLI.isTypeLegal(
22473 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22474 // -> extract_vector_elt a, 0
22475 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22476 // -> extract_vector_elt a, 1
22477 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22478 // -> extract_vector_elt b, 0
22479 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22480 // -> extract_vector_elt b, 1
22481 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22482 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22483 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22484 Index.getValueType());
22485
22486 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22488 ConcatVT.getVectorElementType(),
22489 ConcatOp, NewIdx);
22490 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22491 }
22492
22493 // Make sure we found a non-volatile load and the extractelement is
22494 // the only use.
22495 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22496 return SDValue();
22497
22498 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22499 if (Elt == -1)
22500 return DAG.getUNDEF(LVT);
22501
22502 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22503}
22504
22505// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22506SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22507 // We perform this optimization post type-legalization because
22508 // the type-legalizer often scalarizes integer-promoted vectors.
22509 // Performing this optimization before may create bit-casts which
22510 // will be type-legalized to complex code sequences.
22511 // We perform this optimization only before the operation legalizer because we
22512 // may introduce illegal operations.
22513 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22514 return SDValue();
22515
22516 unsigned NumInScalars = N->getNumOperands();
22517 SDLoc DL(N);
22518 EVT VT = N->getValueType(0);
22519
22520 // Check to see if this is a BUILD_VECTOR of a bunch of values
22521 // which come from any_extend or zero_extend nodes. If so, we can create
22522 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22523 // optimizations. We do not handle sign-extend because we can't fill the sign
22524 // using shuffles.
22525 EVT SourceType = MVT::Other;
22526 bool AllAnyExt = true;
22527
22528 for (unsigned i = 0; i != NumInScalars; ++i) {
22529 SDValue In = N->getOperand(i);
22530 // Ignore undef inputs.
22531 if (In.isUndef()) continue;
22532
22533 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22534 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22535
22536 // Abort if the element is not an extension.
22537 if (!ZeroExt && !AnyExt) {
22538 SourceType = MVT::Other;
22539 break;
22540 }
22541
22542 // The input is a ZeroExt or AnyExt. Check the original type.
22543 EVT InTy = In.getOperand(0).getValueType();
22544
22545 // Check that all of the widened source types are the same.
22546 if (SourceType == MVT::Other)
22547 // First time.
22548 SourceType = InTy;
22549 else if (InTy != SourceType) {
22550 // Multiple income types. Abort.
22551 SourceType = MVT::Other;
22552 break;
22553 }
22554
22555 // Check if all of the extends are ANY_EXTENDs.
22556 AllAnyExt &= AnyExt;
22557 }
22558
22559 // In order to have valid types, all of the inputs must be extended from the
22560 // same source type and all of the inputs must be any or zero extend.
22561 // Scalar sizes must be a power of two.
22562 EVT OutScalarTy = VT.getScalarType();
22563 bool ValidTypes =
22564 SourceType != MVT::Other &&
22565 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22566 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22567
22568 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22569 // turn into a single shuffle instruction.
22570 if (!ValidTypes)
22571 return SDValue();
22572
22573 // If we already have a splat buildvector, then don't fold it if it means
22574 // introducing zeros.
22575 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22576 return SDValue();
22577
22578 bool isLE = DAG.getDataLayout().isLittleEndian();
22579 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22580 assert(ElemRatio > 1 && "Invalid element size ratio");
22581 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22582 DAG.getConstant(0, DL, SourceType);
22583
22584 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22585 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22586
22587 // Populate the new build_vector
22588 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22589 SDValue Cast = N->getOperand(i);
22590 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22591 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22592 Cast.isUndef()) && "Invalid cast opcode");
22593 SDValue In;
22594 if (Cast.isUndef())
22595 In = DAG.getUNDEF(SourceType);
22596 else
22597 In = Cast->getOperand(0);
22598 unsigned Index = isLE ? (i * ElemRatio) :
22599 (i * ElemRatio + (ElemRatio - 1));
22600
22601 assert(Index < Ops.size() && "Invalid index");
22602 Ops[Index] = In;
22603 }
22604
22605 // The type of the new BUILD_VECTOR node.
22606 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22607 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22608 "Invalid vector size");
22609 // Check if the new vector type is legal.
22610 if (!isTypeLegal(VecVT) ||
22611 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22613 return SDValue();
22614
22615 // Make the new BUILD_VECTOR.
22616 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22617
22618 // The new BUILD_VECTOR node has the potential to be further optimized.
22619 AddToWorklist(BV.getNode());
22620 // Bitcast to the desired type.
22621 return DAG.getBitcast(VT, BV);
22622}
22623
22624// Simplify (build_vec (trunc $1)
22625// (trunc (srl $1 half-width))
22626// (trunc (srl $1 (2 * half-width))))
22627// to (bitcast $1)
22628SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22629 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22630
22631 EVT VT = N->getValueType(0);
22632
22633 // Don't run this before LegalizeTypes if VT is legal.
22634 // Targets may have other preferences.
22635 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22636 return SDValue();
22637
22638 // Only for little endian
22639 if (!DAG.getDataLayout().isLittleEndian())
22640 return SDValue();
22641
22642 SDLoc DL(N);
22643 EVT OutScalarTy = VT.getScalarType();
22644 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22645
22646 // Only for power of two types to be sure that bitcast works well
22647 if (!isPowerOf2_64(ScalarTypeBitsize))
22648 return SDValue();
22649
22650 unsigned NumInScalars = N->getNumOperands();
22651
22652 // Look through bitcasts
22653 auto PeekThroughBitcast = [](SDValue Op) {
22654 if (Op.getOpcode() == ISD::BITCAST)
22655 return Op.getOperand(0);
22656 return Op;
22657 };
22658
22659 // The source value where all the parts are extracted.
22660 SDValue Src;
22661 for (unsigned i = 0; i != NumInScalars; ++i) {
22662 SDValue In = PeekThroughBitcast(N->getOperand(i));
22663 // Ignore undef inputs.
22664 if (In.isUndef()) continue;
22665
22666 if (In.getOpcode() != ISD::TRUNCATE)
22667 return SDValue();
22668
22669 In = PeekThroughBitcast(In.getOperand(0));
22670
22671 if (In.getOpcode() != ISD::SRL) {
22672 // For now only build_vec without shuffling, handle shifts here in the
22673 // future.
22674 if (i != 0)
22675 return SDValue();
22676
22677 Src = In;
22678 } else {
22679 // In is SRL
22680 SDValue part = PeekThroughBitcast(In.getOperand(0));
22681
22682 if (!Src) {
22683 Src = part;
22684 } else if (Src != part) {
22685 // Vector parts do not stem from the same variable
22686 return SDValue();
22687 }
22688
22689 SDValue ShiftAmtVal = In.getOperand(1);
22690 if (!isa<ConstantSDNode>(ShiftAmtVal))
22691 return SDValue();
22692
22693 uint64_t ShiftAmt = In.getConstantOperandVal(1);
22694
22695 // The extracted value is not extracted at the right position
22696 if (ShiftAmt != i * ScalarTypeBitsize)
22697 return SDValue();
22698 }
22699 }
22700
22701 // Only cast if the size is the same
22702 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22703 return SDValue();
22704
22705 return DAG.getBitcast(VT, Src);
22706}
22707
22708SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22709 ArrayRef<int> VectorMask,
22710 SDValue VecIn1, SDValue VecIn2,
22711 unsigned LeftIdx, bool DidSplitVec) {
22712 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22713
22714 EVT VT = N->getValueType(0);
22715 EVT InVT1 = VecIn1.getValueType();
22716 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22717
22718 unsigned NumElems = VT.getVectorNumElements();
22719 unsigned ShuffleNumElems = NumElems;
22720
22721 // If we artificially split a vector in two already, then the offsets in the
22722 // operands will all be based off of VecIn1, even those in VecIn2.
22723 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22724
22725 uint64_t VTSize = VT.getFixedSizeInBits();
22726 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22727 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22728
22729 assert(InVT2Size <= InVT1Size &&
22730 "Inputs must be sorted to be in non-increasing vector size order.");
22731
22732 // We can't generate a shuffle node with mismatched input and output types.
22733 // Try to make the types match the type of the output.
22734 if (InVT1 != VT || InVT2 != VT) {
22735 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22736 // If the output vector length is a multiple of both input lengths,
22737 // we can concatenate them and pad the rest with undefs.
22738 unsigned NumConcats = VTSize / InVT1Size;
22739 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22740 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22741 ConcatOps[0] = VecIn1;
22742 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22743 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22744 VecIn2 = SDValue();
22745 } else if (InVT1Size == VTSize * 2) {
22746 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22747 return SDValue();
22748
22749 if (!VecIn2.getNode()) {
22750 // If we only have one input vector, and it's twice the size of the
22751 // output, split it in two.
22752 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
22753 DAG.getVectorIdxConstant(NumElems, DL));
22754 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
22755 // Since we now have shorter input vectors, adjust the offset of the
22756 // second vector's start.
22757 Vec2Offset = NumElems;
22758 } else {
22759 assert(InVT2Size <= InVT1Size &&
22760 "Second input is not going to be larger than the first one.");
22761
22762 // VecIn1 is wider than the output, and we have another, possibly
22763 // smaller input. Pad the smaller input with undefs, shuffle at the
22764 // input vector width, and extract the output.
22765 // The shuffle type is different than VT, so check legality again.
22766 if (LegalOperations &&
22768 return SDValue();
22769
22770 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
22771 // lower it back into a BUILD_VECTOR. So if the inserted type is
22772 // illegal, don't even try.
22773 if (InVT1 != InVT2) {
22774 if (!TLI.isTypeLegal(InVT2))
22775 return SDValue();
22776 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22777 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22778 }
22779 ShuffleNumElems = NumElems * 2;
22780 }
22781 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
22782 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
22783 ConcatOps[0] = VecIn2;
22784 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22785 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
22786 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
22787 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
22788 return SDValue();
22789 // If dest vector has less than two elements, then use shuffle and extract
22790 // from larger regs will cost even more.
22791 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
22792 return SDValue();
22793 assert(InVT2Size <= InVT1Size &&
22794 "Second input is not going to be larger than the first one.");
22795
22796 // VecIn1 is wider than the output, and we have another, possibly
22797 // smaller input. Pad the smaller input with undefs, shuffle at the
22798 // input vector width, and extract the output.
22799 // The shuffle type is different than VT, so check legality again.
22800 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22801 return SDValue();
22802
22803 if (InVT1 != InVT2) {
22804 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22805 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22806 }
22807 ShuffleNumElems = InVT1Size / VTSize * NumElems;
22808 } else {
22809 // TODO: Support cases where the length mismatch isn't exactly by a
22810 // factor of 2.
22811 // TODO: Move this check upwards, so that if we have bad type
22812 // mismatches, we don't create any DAG nodes.
22813 return SDValue();
22814 }
22815 }
22816
22817 // Initialize mask to undef.
22818 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
22819
22820 // Only need to run up to the number of elements actually used, not the
22821 // total number of elements in the shuffle - if we are shuffling a wider
22822 // vector, the high lanes should be set to undef.
22823 for (unsigned i = 0; i != NumElems; ++i) {
22824 if (VectorMask[i] <= 0)
22825 continue;
22826
22827 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
22828 if (VectorMask[i] == (int)LeftIdx) {
22829 Mask[i] = ExtIndex;
22830 } else if (VectorMask[i] == (int)LeftIdx + 1) {
22831 Mask[i] = Vec2Offset + ExtIndex;
22832 }
22833 }
22834
22835 // The type the input vectors may have changed above.
22836 InVT1 = VecIn1.getValueType();
22837
22838 // If we already have a VecIn2, it should have the same type as VecIn1.
22839 // If we don't, get an undef/zero vector of the appropriate type.
22840 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
22841 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
22842
22843 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
22844 if (ShuffleNumElems > NumElems)
22845 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
22846
22847 return Shuffle;
22848}
22849
22851 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22852
22853 // First, determine where the build vector is not undef.
22854 // TODO: We could extend this to handle zero elements as well as undefs.
22855 int NumBVOps = BV->getNumOperands();
22856 int ZextElt = -1;
22857 for (int i = 0; i != NumBVOps; ++i) {
22858 SDValue Op = BV->getOperand(i);
22859 if (Op.isUndef())
22860 continue;
22861 if (ZextElt == -1)
22862 ZextElt = i;
22863 else
22864 return SDValue();
22865 }
22866 // Bail out if there's no non-undef element.
22867 if (ZextElt == -1)
22868 return SDValue();
22869
22870 // The build vector contains some number of undef elements and exactly
22871 // one other element. That other element must be a zero-extended scalar
22872 // extracted from a vector at a constant index to turn this into a shuffle.
22873 // Also, require that the build vector does not implicitly truncate/extend
22874 // its elements.
22875 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
22876 EVT VT = BV->getValueType(0);
22877 SDValue Zext = BV->getOperand(ZextElt);
22878 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
22880 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
22882 return SDValue();
22883
22884 // The zero-extend must be a multiple of the source size, and we must be
22885 // building a vector of the same size as the source of the extract element.
22886 SDValue Extract = Zext.getOperand(0);
22887 unsigned DestSize = Zext.getValueSizeInBits();
22888 unsigned SrcSize = Extract.getValueSizeInBits();
22889 if (DestSize % SrcSize != 0 ||
22890 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
22891 return SDValue();
22892
22893 // Create a shuffle mask that will combine the extracted element with zeros
22894 // and undefs.
22895 int ZextRatio = DestSize / SrcSize;
22896 int NumMaskElts = NumBVOps * ZextRatio;
22897 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
22898 for (int i = 0; i != NumMaskElts; ++i) {
22899 if (i / ZextRatio == ZextElt) {
22900 // The low bits of the (potentially translated) extracted element map to
22901 // the source vector. The high bits map to zero. We will use a zero vector
22902 // as the 2nd source operand of the shuffle, so use the 1st element of
22903 // that vector (mask value is number-of-elements) for the high bits.
22904 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
22905 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
22906 : NumMaskElts;
22907 }
22908
22909 // Undef elements of the build vector remain undef because we initialize
22910 // the shuffle mask with -1.
22911 }
22912
22913 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
22914 // bitcast (shuffle V, ZeroVec, VectorMask)
22915 SDLoc DL(BV);
22916 EVT VecVT = Extract.getOperand(0).getValueType();
22917 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
22918 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22919 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
22920 ZeroVec, ShufMask, DAG);
22921 if (!Shuf)
22922 return SDValue();
22923 return DAG.getBitcast(VT, Shuf);
22924}
22925
22926// FIXME: promote to STLExtras.
22927template <typename R, typename T>
22928static auto getFirstIndexOf(R &&Range, const T &Val) {
22929 auto I = find(Range, Val);
22930 if (I == Range.end())
22931 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
22932 return std::distance(Range.begin(), I);
22933}
22934
22935// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
22936// operations. If the types of the vectors we're extracting from allow it,
22937// turn this into a vector_shuffle node.
22938SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
22939 SDLoc DL(N);
22940 EVT VT = N->getValueType(0);
22941
22942 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
22943 if (!isTypeLegal(VT))
22944 return SDValue();
22945
22947 return V;
22948
22949 // May only combine to shuffle after legalize if shuffle is legal.
22950 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
22951 return SDValue();
22952
22953 bool UsesZeroVector = false;
22954 unsigned NumElems = N->getNumOperands();
22955
22956 // Record, for each element of the newly built vector, which input vector
22957 // that element comes from. -1 stands for undef, 0 for the zero vector,
22958 // and positive values for the input vectors.
22959 // VectorMask maps each element to its vector number, and VecIn maps vector
22960 // numbers to their initial SDValues.
22961
22962 SmallVector<int, 8> VectorMask(NumElems, -1);
22964 VecIn.push_back(SDValue());
22965
22966 for (unsigned i = 0; i != NumElems; ++i) {
22967 SDValue Op = N->getOperand(i);
22968
22969 if (Op.isUndef())
22970 continue;
22971
22972 // See if we can use a blend with a zero vector.
22973 // TODO: Should we generalize this to a blend with an arbitrary constant
22974 // vector?
22976 UsesZeroVector = true;
22977 VectorMask[i] = 0;
22978 continue;
22979 }
22980
22981 // Not an undef or zero. If the input is something other than an
22982 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
22983 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22984 !isa<ConstantSDNode>(Op.getOperand(1)))
22985 return SDValue();
22986 SDValue ExtractedFromVec = Op.getOperand(0);
22987
22988 if (ExtractedFromVec.getValueType().isScalableVector())
22989 return SDValue();
22990
22991 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
22992 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
22993 return SDValue();
22994
22995 // All inputs must have the same element type as the output.
22996 if (VT.getVectorElementType() !=
22997 ExtractedFromVec.getValueType().getVectorElementType())
22998 return SDValue();
22999
23000 // Have we seen this input vector before?
23001 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23002 // a map back from SDValues to numbers isn't worth it.
23003 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23004 if (Idx == -1) { // A new source vector?
23005 Idx = VecIn.size();
23006 VecIn.push_back(ExtractedFromVec);
23007 }
23008
23009 VectorMask[i] = Idx;
23010 }
23011
23012 // If we didn't find at least one input vector, bail out.
23013 if (VecIn.size() < 2)
23014 return SDValue();
23015
23016 // If all the Operands of BUILD_VECTOR extract from same
23017 // vector, then split the vector efficiently based on the maximum
23018 // vector access index and adjust the VectorMask and
23019 // VecIn accordingly.
23020 bool DidSplitVec = false;
23021 if (VecIn.size() == 2) {
23022 unsigned MaxIndex = 0;
23023 unsigned NearestPow2 = 0;
23024 SDValue Vec = VecIn.back();
23025 EVT InVT = Vec.getValueType();
23026 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23027
23028 for (unsigned i = 0; i < NumElems; i++) {
23029 if (VectorMask[i] <= 0)
23030 continue;
23031 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23032 IndexVec[i] = Index;
23033 MaxIndex = std::max(MaxIndex, Index);
23034 }
23035
23036 NearestPow2 = PowerOf2Ceil(MaxIndex);
23037 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23038 NumElems * 2 < NearestPow2) {
23039 unsigned SplitSize = NearestPow2 / 2;
23040 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23041 InVT.getVectorElementType(), SplitSize);
23042 if (TLI.isTypeLegal(SplitVT) &&
23043 SplitSize + SplitVT.getVectorNumElements() <=
23044 InVT.getVectorNumElements()) {
23045 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23046 DAG.getVectorIdxConstant(SplitSize, DL));
23047 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23048 DAG.getVectorIdxConstant(0, DL));
23049 VecIn.pop_back();
23050 VecIn.push_back(VecIn1);
23051 VecIn.push_back(VecIn2);
23052 DidSplitVec = true;
23053
23054 for (unsigned i = 0; i < NumElems; i++) {
23055 if (VectorMask[i] <= 0)
23056 continue;
23057 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23058 }
23059 }
23060 }
23061 }
23062
23063 // Sort input vectors by decreasing vector element count,
23064 // while preserving the relative order of equally-sized vectors.
23065 // Note that we keep the first "implicit zero vector as-is.
23066 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23067 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23068 [](const SDValue &a, const SDValue &b) {
23069 return a.getValueType().getVectorNumElements() >
23070 b.getValueType().getVectorNumElements();
23071 });
23072
23073 // We now also need to rebuild the VectorMask, because it referenced element
23074 // order in VecIn, and we just sorted them.
23075 for (int &SourceVectorIndex : VectorMask) {
23076 if (SourceVectorIndex <= 0)
23077 continue;
23078 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23079 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23080 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23081 SourceVectorIndex = Idx;
23082 }
23083
23084 VecIn = std::move(SortedVecIn);
23085
23086 // TODO: Should this fire if some of the input vectors has illegal type (like
23087 // it does now), or should we let legalization run its course first?
23088
23089 // Shuffle phase:
23090 // Take pairs of vectors, and shuffle them so that the result has elements
23091 // from these vectors in the correct places.
23092 // For example, given:
23093 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23094 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23095 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23096 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23097 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23098 // We will generate:
23099 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23100 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23101 SmallVector<SDValue, 4> Shuffles;
23102 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23103 unsigned LeftIdx = 2 * In + 1;
23104 SDValue VecLeft = VecIn[LeftIdx];
23105 SDValue VecRight =
23106 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23107
23108 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23109 VecRight, LeftIdx, DidSplitVec))
23110 Shuffles.push_back(Shuffle);
23111 else
23112 return SDValue();
23113 }
23114
23115 // If we need the zero vector as an "ingredient" in the blend tree, add it
23116 // to the list of shuffles.
23117 if (UsesZeroVector)
23118 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23119 : DAG.getConstantFP(0.0, DL, VT));
23120
23121 // If we only have one shuffle, we're done.
23122 if (Shuffles.size() == 1)
23123 return Shuffles[0];
23124
23125 // Update the vector mask to point to the post-shuffle vectors.
23126 for (int &Vec : VectorMask)
23127 if (Vec == 0)
23128 Vec = Shuffles.size() - 1;
23129 else
23130 Vec = (Vec - 1) / 2;
23131
23132 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23133 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23134 // generate:
23135 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23136 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23137 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23138 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23139 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23140 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23141 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23142
23143 // Make sure the initial size of the shuffle list is even.
23144 if (Shuffles.size() % 2)
23145 Shuffles.push_back(DAG.getUNDEF(VT));
23146
23147 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23148 if (CurSize % 2) {
23149 Shuffles[CurSize] = DAG.getUNDEF(VT);
23150 CurSize++;
23151 }
23152 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23153 int Left = 2 * In;
23154 int Right = 2 * In + 1;
23155 SmallVector<int, 8> Mask(NumElems, -1);
23156 SDValue L = Shuffles[Left];
23157 ArrayRef<int> LMask;
23158 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23159 L.use_empty() && L.getOperand(1).isUndef() &&
23160 L.getOperand(0).getValueType() == L.getValueType();
23161 if (IsLeftShuffle) {
23162 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23163 L = L.getOperand(0);
23164 }
23165 SDValue R = Shuffles[Right];
23166 ArrayRef<int> RMask;
23167 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23168 R.use_empty() && R.getOperand(1).isUndef() &&
23169 R.getOperand(0).getValueType() == R.getValueType();
23170 if (IsRightShuffle) {
23171 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23172 R = R.getOperand(0);
23173 }
23174 for (unsigned I = 0; I != NumElems; ++I) {
23175 if (VectorMask[I] == Left) {
23176 Mask[I] = I;
23177 if (IsLeftShuffle)
23178 Mask[I] = LMask[I];
23179 VectorMask[I] = In;
23180 } else if (VectorMask[I] == Right) {
23181 Mask[I] = I + NumElems;
23182 if (IsRightShuffle)
23183 Mask[I] = RMask[I] + NumElems;
23184 VectorMask[I] = In;
23185 }
23186 }
23187
23188 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23189 }
23190 }
23191 return Shuffles[0];
23192}
23193
23194// Try to turn a build vector of zero extends of extract vector elts into a
23195// a vector zero extend and possibly an extract subvector.
23196// TODO: Support sign extend?
23197// TODO: Allow undef elements?
23198SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23199 if (LegalOperations)
23200 return SDValue();
23201
23202 EVT VT = N->getValueType(0);
23203
23204 bool FoundZeroExtend = false;
23205 SDValue Op0 = N->getOperand(0);
23206 auto checkElem = [&](SDValue Op) -> int64_t {
23207 unsigned Opc = Op.getOpcode();
23208 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23209 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23210 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23211 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23212 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23213 return C->getZExtValue();
23214 return -1;
23215 };
23216
23217 // Make sure the first element matches
23218 // (zext (extract_vector_elt X, C))
23219 // Offset must be a constant multiple of the
23220 // known-minimum vector length of the result type.
23221 int64_t Offset = checkElem(Op0);
23222 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23223 return SDValue();
23224
23225 unsigned NumElems = N->getNumOperands();
23226 SDValue In = Op0.getOperand(0).getOperand(0);
23227 EVT InSVT = In.getValueType().getScalarType();
23228 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23229
23230 // Don't create an illegal input type after type legalization.
23231 if (LegalTypes && !TLI.isTypeLegal(InVT))
23232 return SDValue();
23233
23234 // Ensure all the elements come from the same vector and are adjacent.
23235 for (unsigned i = 1; i != NumElems; ++i) {
23236 if ((Offset + i) != checkElem(N->getOperand(i)))
23237 return SDValue();
23238 }
23239
23240 SDLoc DL(N);
23241 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23242 Op0.getOperand(0).getOperand(1));
23243 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23244 VT, In);
23245}
23246
23247// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23248// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23249// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23250// This patten can appear during legalization.
23251//
23252// NOTE: This can be generalized to allow more than a single
23253// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23254SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23255 // Don't run this after legalization. Targets may have other preferences.
23256 if (Level >= AfterLegalizeDAG)
23257 return SDValue();
23258
23259 // FIXME: support big-endian.
23260 if (DAG.getDataLayout().isBigEndian())
23261 return SDValue();
23262
23263 EVT VT = N->getValueType(0);
23264 EVT OpVT = N->getOperand(0).getValueType();
23265 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23266
23267 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23268
23269 if (!TLI.isTypeLegal(OpIntVT) ||
23270 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23271 return SDValue();
23272
23273 unsigned EltBitwidth = VT.getScalarSizeInBits();
23274 // NOTE: the actual width of operands may be wider than that!
23275
23276 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23277 // active bits they all have? We'll want to truncate them all to that width.
23278 unsigned ActiveBits = 0;
23279 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23280 for (auto I : enumerate(N->ops())) {
23281 SDValue Op = I.value();
23282 // FIXME: support UNDEF elements?
23283 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23284 unsigned OpActiveBits =
23285 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23286 if (OpActiveBits == 0) {
23287 KnownZeroOps.setBit(I.index());
23288 continue;
23289 }
23290 // Profitability check: don't allow non-zero constant operands.
23291 return SDValue();
23292 }
23293 // Profitability check: there must only be a single non-zero operand,
23294 // and it must be the first operand of the BUILD_VECTOR.
23295 if (I.index() != 0)
23296 return SDValue();
23297 // The operand must be a zero-extension itself.
23298 // FIXME: this could be generalized to known leading zeros check.
23299 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23300 return SDValue();
23301 unsigned CurrActiveBits =
23302 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23303 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23304 ActiveBits = CurrActiveBits;
23305 // We want to at least halve the element size.
23306 if (2 * ActiveBits > EltBitwidth)
23307 return SDValue();
23308 }
23309
23310 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23311 if (ActiveBits == 0)
23312 return SDValue();
23313
23314 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23315 // into how many chunks can we split our element width?
23316 EVT NewScalarIntVT, NewIntVT;
23317 std::optional<unsigned> Factor;
23318 // We can split the element into at least two chunks, but not into more
23319 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23320 // for which the element width is a multiple of it,
23321 // and the resulting types/operations on that chunk width are legal.
23322 assert(2 * ActiveBits <= EltBitwidth &&
23323 "We know that half or less bits of the element are active.");
23324 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23325 if (EltBitwidth % Scale != 0)
23326 continue;
23327 unsigned ChunkBitwidth = EltBitwidth / Scale;
23328 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23329 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23330 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23331 Scale * N->getNumOperands());
23332 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23333 (LegalOperations &&
23334 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23336 continue;
23337 Factor = Scale;
23338 break;
23339 }
23340 if (!Factor)
23341 return SDValue();
23342
23343 SDLoc DL(N);
23344 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23345
23346 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23348 NewOps.reserve(NewIntVT.getVectorNumElements());
23349 for (auto I : enumerate(N->ops())) {
23350 SDValue Op = I.value();
23351 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23352 unsigned SrcOpIdx = I.index();
23353 if (KnownZeroOps[SrcOpIdx]) {
23354 NewOps.append(*Factor, ZeroOp);
23355 continue;
23356 }
23357 Op = DAG.getBitcast(OpIntVT, Op);
23358 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23359 NewOps.emplace_back(Op);
23360 NewOps.append(*Factor - 1, ZeroOp);
23361 }
23362 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23363 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23364 NewBV = DAG.getBitcast(VT, NewBV);
23365 return NewBV;
23366}
23367
23368SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23369 EVT VT = N->getValueType(0);
23370
23371 // A vector built entirely of undefs is undef.
23373 return DAG.getUNDEF(VT);
23374
23375 // If this is a splat of a bitcast from another vector, change to a
23376 // concat_vector.
23377 // For example:
23378 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23379 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23380 //
23381 // If X is a build_vector itself, the concat can become a larger build_vector.
23382 // TODO: Maybe this is useful for non-splat too?
23383 if (!LegalOperations) {
23384 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23386 EVT SrcVT = Splat.getValueType();
23387 if (SrcVT.isVector()) {
23388 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23389 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23390 SrcVT.getVectorElementType(), NumElts);
23391 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23392 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23394 NewVT, Ops);
23395 return DAG.getBitcast(VT, Concat);
23396 }
23397 }
23398 }
23399 }
23400
23401 // Check if we can express BUILD VECTOR via subvector extract.
23402 if (!LegalTypes && (N->getNumOperands() > 1)) {
23403 SDValue Op0 = N->getOperand(0);
23404 auto checkElem = [&](SDValue Op) -> uint64_t {
23405 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23406 (Op0.getOperand(0) == Op.getOperand(0)))
23407 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23408 return CNode->getZExtValue();
23409 return -1;
23410 };
23411
23412 int Offset = checkElem(Op0);
23413 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23414 if (Offset + i != checkElem(N->getOperand(i))) {
23415 Offset = -1;
23416 break;
23417 }
23418 }
23419
23420 if ((Offset == 0) &&
23421 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23422 return Op0.getOperand(0);
23423 if ((Offset != -1) &&
23424 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23425 0)) // IDX must be multiple of output size.
23426 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23427 Op0.getOperand(0), Op0.getOperand(1));
23428 }
23429
23430 if (SDValue V = convertBuildVecZextToZext(N))
23431 return V;
23432
23433 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23434 return V;
23435
23436 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23437 return V;
23438
23439 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23440 return V;
23441
23442 if (SDValue V = reduceBuildVecToShuffle(N))
23443 return V;
23444
23445 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23446 // Do this late as some of the above may replace the splat.
23448 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23449 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23450 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23451 }
23452
23453 return SDValue();
23454}
23455
23457 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23458 EVT OpVT = N->getOperand(0).getValueType();
23459
23460 // If the operands are legal vectors, leave them alone.
23461 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23462 return SDValue();
23463
23464 SDLoc DL(N);
23465 EVT VT = N->getValueType(0);
23467 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23468
23469 // Keep track of what we encounter.
23470 bool AnyInteger = false;
23471 bool AnyFP = false;
23472 for (const SDValue &Op : N->ops()) {
23473 if (ISD::BITCAST == Op.getOpcode() &&
23474 !Op.getOperand(0).getValueType().isVector())
23475 Ops.push_back(Op.getOperand(0));
23476 else if (ISD::UNDEF == Op.getOpcode())
23477 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23478 else
23479 return SDValue();
23480
23481 // Note whether we encounter an integer or floating point scalar.
23482 // If it's neither, bail out, it could be something weird like x86mmx.
23483 EVT LastOpVT = Ops.back().getValueType();
23484 if (LastOpVT.isFloatingPoint())
23485 AnyFP = true;
23486 else if (LastOpVT.isInteger())
23487 AnyInteger = true;
23488 else
23489 return SDValue();
23490 }
23491
23492 // If any of the operands is a floating point scalar bitcast to a vector,
23493 // use floating point types throughout, and bitcast everything.
23494 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23495 if (AnyFP) {
23497 if (AnyInteger) {
23498 for (SDValue &Op : Ops) {
23499 if (Op.getValueType() == SVT)
23500 continue;
23501 if (Op.isUndef())
23502 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23503 else
23504 Op = DAG.getBitcast(SVT, Op);
23505 }
23506 }
23507 }
23508
23509 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23510 VT.getSizeInBits() / SVT.getSizeInBits());
23511 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23512}
23513
23514// Attempt to merge nested concat_vectors/undefs.
23515// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23516// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23518 SelectionDAG &DAG) {
23519 EVT VT = N->getValueType(0);
23520
23521 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23522 EVT SubVT;
23523 SDValue FirstConcat;
23524 for (const SDValue &Op : N->ops()) {
23525 if (Op.isUndef())
23526 continue;
23527 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23528 return SDValue();
23529 if (!FirstConcat) {
23530 SubVT = Op.getOperand(0).getValueType();
23531 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23532 return SDValue();
23533 FirstConcat = Op;
23534 continue;
23535 }
23536 if (SubVT != Op.getOperand(0).getValueType())
23537 return SDValue();
23538 }
23539 assert(FirstConcat && "Concat of all-undefs found");
23540
23541 SmallVector<SDValue> ConcatOps;
23542 for (const SDValue &Op : N->ops()) {
23543 if (Op.isUndef()) {
23544 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23545 continue;
23546 }
23547 ConcatOps.append(Op->op_begin(), Op->op_end());
23548 }
23549 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23550}
23551
23552// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23553// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23554// most two distinct vectors the same size as the result, attempt to turn this
23555// into a legal shuffle.
23557 EVT VT = N->getValueType(0);
23558 EVT OpVT = N->getOperand(0).getValueType();
23559
23560 // We currently can't generate an appropriate shuffle for a scalable vector.
23561 if (VT.isScalableVector())
23562 return SDValue();
23563
23564 int NumElts = VT.getVectorNumElements();
23565 int NumOpElts = OpVT.getVectorNumElements();
23566
23567 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23569
23570 for (SDValue Op : N->ops()) {
23572
23573 // UNDEF nodes convert to UNDEF shuffle mask values.
23574 if (Op.isUndef()) {
23575 Mask.append((unsigned)NumOpElts, -1);
23576 continue;
23577 }
23578
23579 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23580 return SDValue();
23581
23582 // What vector are we extracting the subvector from and at what index?
23583 SDValue ExtVec = Op.getOperand(0);
23584 int ExtIdx = Op.getConstantOperandVal(1);
23585
23586 // We want the EVT of the original extraction to correctly scale the
23587 // extraction index.
23588 EVT ExtVT = ExtVec.getValueType();
23589 ExtVec = peekThroughBitcasts(ExtVec);
23590
23591 // UNDEF nodes convert to UNDEF shuffle mask values.
23592 if (ExtVec.isUndef()) {
23593 Mask.append((unsigned)NumOpElts, -1);
23594 continue;
23595 }
23596
23597 // Ensure that we are extracting a subvector from a vector the same
23598 // size as the result.
23599 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23600 return SDValue();
23601
23602 // Scale the subvector index to account for any bitcast.
23603 int NumExtElts = ExtVT.getVectorNumElements();
23604 if (0 == (NumExtElts % NumElts))
23605 ExtIdx /= (NumExtElts / NumElts);
23606 else if (0 == (NumElts % NumExtElts))
23607 ExtIdx *= (NumElts / NumExtElts);
23608 else
23609 return SDValue();
23610
23611 // At most we can reference 2 inputs in the final shuffle.
23612 if (SV0.isUndef() || SV0 == ExtVec) {
23613 SV0 = ExtVec;
23614 for (int i = 0; i != NumOpElts; ++i)
23615 Mask.push_back(i + ExtIdx);
23616 } else if (SV1.isUndef() || SV1 == ExtVec) {
23617 SV1 = ExtVec;
23618 for (int i = 0; i != NumOpElts; ++i)
23619 Mask.push_back(i + ExtIdx + NumElts);
23620 } else {
23621 return SDValue();
23622 }
23623 }
23624
23625 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23626 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23627 DAG.getBitcast(VT, SV1), Mask, DAG);
23628}
23629
23631 unsigned CastOpcode = N->getOperand(0).getOpcode();
23632 switch (CastOpcode) {
23633 case ISD::SINT_TO_FP:
23634 case ISD::UINT_TO_FP:
23635 case ISD::FP_TO_SINT:
23636 case ISD::FP_TO_UINT:
23637 // TODO: Allow more opcodes?
23638 // case ISD::BITCAST:
23639 // case ISD::TRUNCATE:
23640 // case ISD::ZERO_EXTEND:
23641 // case ISD::SIGN_EXTEND:
23642 // case ISD::FP_EXTEND:
23643 break;
23644 default:
23645 return SDValue();
23646 }
23647
23648 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23649 if (!SrcVT.isVector())
23650 return SDValue();
23651
23652 // All operands of the concat must be the same kind of cast from the same
23653 // source type.
23655 for (SDValue Op : N->ops()) {
23656 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23657 Op.getOperand(0).getValueType() != SrcVT)
23658 return SDValue();
23659 SrcOps.push_back(Op.getOperand(0));
23660 }
23661
23662 // The wider cast must be supported by the target. This is unusual because
23663 // the operation support type parameter depends on the opcode. In addition,
23664 // check the other type in the cast to make sure this is really legal.
23665 EVT VT = N->getValueType(0);
23666 EVT SrcEltVT = SrcVT.getVectorElementType();
23667 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23668 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23669 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23670 switch (CastOpcode) {
23671 case ISD::SINT_TO_FP:
23672 case ISD::UINT_TO_FP:
23673 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23674 !TLI.isTypeLegal(VT))
23675 return SDValue();
23676 break;
23677 case ISD::FP_TO_SINT:
23678 case ISD::FP_TO_UINT:
23679 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23680 !TLI.isTypeLegal(ConcatSrcVT))
23681 return SDValue();
23682 break;
23683 default:
23684 llvm_unreachable("Unexpected cast opcode");
23685 }
23686
23687 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23688 SDLoc DL(N);
23689 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23690 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23691}
23692
23693// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23694// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23695// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23697 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23698 bool LegalOperations) {
23699 EVT VT = N->getValueType(0);
23700 EVT OpVT = N->getOperand(0).getValueType();
23701 if (VT.isScalableVector())
23702 return SDValue();
23703
23704 // For now, only allow simple 2-operand concatenations.
23705 if (N->getNumOperands() != 2)
23706 return SDValue();
23707
23708 // Don't create illegal types/shuffles when not allowed to.
23709 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23710 (LegalOperations &&
23712 return SDValue();
23713
23714 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23715 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23716 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23717 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23718 // (4) and for now, the SHUFFLE_VECTOR must be unary.
23719 ShuffleVectorSDNode *SVN = nullptr;
23720 for (SDValue Op : N->ops()) {
23721 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23722 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23723 all_of(N->ops(), [CurSVN](SDValue Op) {
23724 // FIXME: can we allow UNDEF operands?
23725 return !Op.isUndef() &&
23726 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23727 })) {
23728 SVN = CurSVN;
23729 break;
23730 }
23731 }
23732 if (!SVN)
23733 return SDValue();
23734
23735 // We are going to pad the shuffle operands, so any indice, that was picking
23736 // from the second operand, must be adjusted.
23737 SmallVector<int, 16> AdjustedMask;
23738 AdjustedMask.reserve(SVN->getMask().size());
23739 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23740 append_range(AdjustedMask, SVN->getMask());
23741
23742 // Identity masks for the operands of the (padded) shuffle.
23743 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23744 MutableArrayRef<int> FirstShufOpIdentityMask =
23745 MutableArrayRef<int>(IdentityMask)
23747 MutableArrayRef<int> SecondShufOpIdentityMask =
23749 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
23750 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
23752
23753 // New combined shuffle mask.
23755 Mask.reserve(VT.getVectorNumElements());
23756 for (SDValue Op : N->ops()) {
23757 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
23758 if (Op.getNode() == SVN) {
23759 append_range(Mask, AdjustedMask);
23760 continue;
23761 }
23762 if (Op == SVN->getOperand(0)) {
23763 append_range(Mask, FirstShufOpIdentityMask);
23764 continue;
23765 }
23766 if (Op == SVN->getOperand(1)) {
23767 append_range(Mask, SecondShufOpIdentityMask);
23768 continue;
23769 }
23770 llvm_unreachable("Unexpected operand!");
23771 }
23772
23773 // Don't create illegal shuffle masks.
23774 if (!TLI.isShuffleMaskLegal(Mask, VT))
23775 return SDValue();
23776
23777 // Pad the shuffle operands with UNDEF.
23778 SDLoc dl(N);
23779 std::array<SDValue, 2> ShufOps;
23780 for (auto I : zip(SVN->ops(), ShufOps)) {
23781 SDValue ShufOp = std::get<0>(I);
23782 SDValue &NewShufOp = std::get<1>(I);
23783 if (ShufOp.isUndef())
23784 NewShufOp = DAG.getUNDEF(VT);
23785 else {
23786 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
23787 DAG.getUNDEF(OpVT));
23788 ShufOpParts[0] = ShufOp;
23789 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
23790 }
23791 }
23792 // Finally, create the new wide shuffle.
23793 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
23794}
23795
23796SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
23797 // If we only have one input vector, we don't need to do any concatenation.
23798 if (N->getNumOperands() == 1)
23799 return N->getOperand(0);
23800
23801 // Check if all of the operands are undefs.
23802 EVT VT = N->getValueType(0);
23804 return DAG.getUNDEF(VT);
23805
23806 // Optimize concat_vectors where all but the first of the vectors are undef.
23807 if (all_of(drop_begin(N->ops()),
23808 [](const SDValue &Op) { return Op.isUndef(); })) {
23809 SDValue In = N->getOperand(0);
23810 assert(In.getValueType().isVector() && "Must concat vectors");
23811
23812 // If the input is a concat_vectors, just make a larger concat by padding
23813 // with smaller undefs.
23814 //
23815 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
23816 // here could cause an infinite loop. That legalizing happens when LegalDAG
23817 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
23818 // scalable.
23819 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
23820 !(LegalDAG && In.getValueType().isScalableVector())) {
23821 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
23822 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
23823 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
23824 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23825 }
23826
23828
23829 // concat_vectors(scalar_to_vector(scalar), undef) ->
23830 // scalar_to_vector(scalar)
23831 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23832 Scalar.hasOneUse()) {
23833 EVT SVT = Scalar.getValueType().getVectorElementType();
23834 if (SVT == Scalar.getOperand(0).getValueType())
23835 Scalar = Scalar.getOperand(0);
23836 }
23837
23838 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
23839 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
23840 // If the bitcast type isn't legal, it might be a trunc of a legal type;
23841 // look through the trunc so we can still do the transform:
23842 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
23843 if (Scalar->getOpcode() == ISD::TRUNCATE &&
23844 !TLI.isTypeLegal(Scalar.getValueType()) &&
23845 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
23846 Scalar = Scalar->getOperand(0);
23847
23848 EVT SclTy = Scalar.getValueType();
23849
23850 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
23851 return SDValue();
23852
23853 // Bail out if the vector size is not a multiple of the scalar size.
23854 if (VT.getSizeInBits() % SclTy.getSizeInBits())
23855 return SDValue();
23856
23857 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
23858 if (VNTNumElms < 2)
23859 return SDValue();
23860
23861 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
23862 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
23863 return SDValue();
23864
23865 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
23866 return DAG.getBitcast(VT, Res);
23867 }
23868 }
23869
23870 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
23871 // We have already tested above for an UNDEF only concatenation.
23872 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
23873 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
23874 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
23875 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
23876 };
23877 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
23879 EVT SVT = VT.getScalarType();
23880
23881 EVT MinVT = SVT;
23882 if (!SVT.isFloatingPoint()) {
23883 // If BUILD_VECTOR are from built from integer, they may have different
23884 // operand types. Get the smallest type and truncate all operands to it.
23885 bool FoundMinVT = false;
23886 for (const SDValue &Op : N->ops())
23887 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23888 EVT OpSVT = Op.getOperand(0).getValueType();
23889 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
23890 FoundMinVT = true;
23891 }
23892 assert(FoundMinVT && "Concat vector type mismatch");
23893 }
23894
23895 for (const SDValue &Op : N->ops()) {
23896 EVT OpVT = Op.getValueType();
23897 unsigned NumElts = OpVT.getVectorNumElements();
23898
23899 if (ISD::UNDEF == Op.getOpcode())
23900 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
23901
23902 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23903 if (SVT.isFloatingPoint()) {
23904 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
23905 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
23906 } else {
23907 for (unsigned i = 0; i != NumElts; ++i)
23908 Opnds.push_back(
23909 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
23910 }
23911 }
23912 }
23913
23914 assert(VT.getVectorNumElements() == Opnds.size() &&
23915 "Concat vector type mismatch");
23916 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
23917 }
23918
23919 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
23920 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
23922 return V;
23923
23924 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
23925 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
23927 return V;
23928
23929 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
23931 return V;
23932 }
23933
23934 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
23935 return V;
23936
23938 N, DAG, TLI, LegalTypes, LegalOperations))
23939 return V;
23940
23941 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
23942 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
23943 // operands and look for a CONCAT operations that place the incoming vectors
23944 // at the exact same location.
23945 //
23946 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
23947 SDValue SingleSource = SDValue();
23948 unsigned PartNumElem =
23949 N->getOperand(0).getValueType().getVectorMinNumElements();
23950
23951 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23952 SDValue Op = N->getOperand(i);
23953
23954 if (Op.isUndef())
23955 continue;
23956
23957 // Check if this is the identity extract:
23958 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23959 return SDValue();
23960
23961 // Find the single incoming vector for the extract_subvector.
23962 if (SingleSource.getNode()) {
23963 if (Op.getOperand(0) != SingleSource)
23964 return SDValue();
23965 } else {
23966 SingleSource = Op.getOperand(0);
23967
23968 // Check the source type is the same as the type of the result.
23969 // If not, this concat may extend the vector, so we can not
23970 // optimize it away.
23971 if (SingleSource.getValueType() != N->getValueType(0))
23972 return SDValue();
23973 }
23974
23975 // Check that we are reading from the identity index.
23976 unsigned IdentityIndex = i * PartNumElem;
23977 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
23978 return SDValue();
23979 }
23980
23981 if (SingleSource.getNode())
23982 return SingleSource;
23983
23984 return SDValue();
23985}
23986
23987// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
23988// if the subvector can be sourced for free.
23990 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
23991 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
23992 return V.getOperand(1);
23993 }
23994 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23995 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
23996 V.getOperand(0).getValueType() == SubVT &&
23997 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
23998 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
23999 return V.getOperand(SubIdx);
24000 }
24001 return SDValue();
24002}
24003
24005 SelectionDAG &DAG,
24006 bool LegalOperations) {
24007 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24008 SDValue BinOp = Extract->getOperand(0);
24009 unsigned BinOpcode = BinOp.getOpcode();
24010 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24011 return SDValue();
24012
24013 EVT VecVT = BinOp.getValueType();
24014 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24015 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24016 return SDValue();
24017
24018 SDValue Index = Extract->getOperand(1);
24019 EVT SubVT = Extract->getValueType(0);
24020 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24021 return SDValue();
24022
24023 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24024 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24025
24026 // TODO: We could handle the case where only 1 operand is being inserted by
24027 // creating an extract of the other operand, but that requires checking
24028 // number of uses and/or costs.
24029 if (!Sub0 || !Sub1)
24030 return SDValue();
24031
24032 // We are inserting both operands of the wide binop only to extract back
24033 // to the narrow vector size. Eliminate all of the insert/extract:
24034 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24035 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24036 BinOp->getFlags());
24037}
24038
24039/// If we are extracting a subvector produced by a wide binary operator try
24040/// to use a narrow binary operator and/or avoid concatenation and extraction.
24042 bool LegalOperations) {
24043 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24044 // some of these bailouts with other transforms.
24045
24046 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24047 return V;
24048
24049 // The extract index must be a constant, so we can map it to a concat operand.
24050 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24051 if (!ExtractIndexC)
24052 return SDValue();
24053
24054 // We are looking for an optionally bitcasted wide vector binary operator
24055 // feeding an extract subvector.
24056 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24057 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24058 unsigned BOpcode = BinOp.getOpcode();
24059 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24060 return SDValue();
24061
24062 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24063 // reduced to the unary fneg when it is visited, and we probably want to deal
24064 // with fneg in a target-specific way.
24065 if (BOpcode == ISD::FSUB) {
24066 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24067 if (C && C->getValueAPF().isNegZero())
24068 return SDValue();
24069 }
24070
24071 // The binop must be a vector type, so we can extract some fraction of it.
24072 EVT WideBVT = BinOp.getValueType();
24073 // The optimisations below currently assume we are dealing with fixed length
24074 // vectors. It is possible to add support for scalable vectors, but at the
24075 // moment we've done no analysis to prove whether they are profitable or not.
24076 if (!WideBVT.isFixedLengthVector())
24077 return SDValue();
24078
24079 EVT VT = Extract->getValueType(0);
24080 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24081 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24082 "Extract index is not a multiple of the vector length.");
24083
24084 // Bail out if this is not a proper multiple width extraction.
24085 unsigned WideWidth = WideBVT.getSizeInBits();
24086 unsigned NarrowWidth = VT.getSizeInBits();
24087 if (WideWidth % NarrowWidth != 0)
24088 return SDValue();
24089
24090 // Bail out if we are extracting a fraction of a single operation. This can
24091 // occur because we potentially looked through a bitcast of the binop.
24092 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24093 unsigned WideNumElts = WideBVT.getVectorNumElements();
24094 if (WideNumElts % NarrowingRatio != 0)
24095 return SDValue();
24096
24097 // Bail out if the target does not support a narrower version of the binop.
24098 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24099 WideNumElts / NarrowingRatio);
24100 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24101 LegalOperations))
24102 return SDValue();
24103
24104 // If extraction is cheap, we don't need to look at the binop operands
24105 // for concat ops. The narrow binop alone makes this transform profitable.
24106 // We can't just reuse the original extract index operand because we may have
24107 // bitcasted.
24108 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24109 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24110 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24111 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24112 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24113 SDLoc DL(Extract);
24114 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24115 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24116 BinOp.getOperand(0), NewExtIndex);
24117 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24118 BinOp.getOperand(1), NewExtIndex);
24119 SDValue NarrowBinOp =
24120 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24121 return DAG.getBitcast(VT, NarrowBinOp);
24122 }
24123
24124 // Only handle the case where we are doubling and then halving. A larger ratio
24125 // may require more than two narrow binops to replace the wide binop.
24126 if (NarrowingRatio != 2)
24127 return SDValue();
24128
24129 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24130 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24131 // flavors, but no other 256-bit integer support. This could be extended to
24132 // handle any binop, but that may require fixing/adding other folds to avoid
24133 // codegen regressions.
24134 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24135 return SDValue();
24136
24137 // We need at least one concatenation operation of a binop operand to make
24138 // this transform worthwhile. The concat must double the input vector sizes.
24139 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24140 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24141 return V.getOperand(ConcatOpNum);
24142 return SDValue();
24143 };
24144 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24145 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24146
24147 if (SubVecL || SubVecR) {
24148 // If a binop operand was not the result of a concat, we must extract a
24149 // half-sized operand for our new narrow binop:
24150 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24151 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24152 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24153 SDLoc DL(Extract);
24154 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24155 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24156 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24157 BinOp.getOperand(0), IndexC);
24158
24159 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24160 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24161 BinOp.getOperand(1), IndexC);
24162
24163 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24164 return DAG.getBitcast(VT, NarrowBinOp);
24165 }
24166
24167 return SDValue();
24168}
24169
24170/// If we are extracting a subvector from a wide vector load, convert to a
24171/// narrow load to eliminate the extraction:
24172/// (extract_subvector (load wide vector)) --> (load narrow vector)
24174 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24175 if (DAG.getDataLayout().isBigEndian())
24176 return SDValue();
24177
24178 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24179 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24180 return SDValue();
24181
24182 // Allow targets to opt-out.
24183 EVT VT = Extract->getValueType(0);
24184
24185 // We can only create byte sized loads.
24186 if (!VT.isByteSized())
24187 return SDValue();
24188
24189 unsigned Index = Extract->getConstantOperandVal(1);
24190 unsigned NumElts = VT.getVectorMinNumElements();
24191 // A fixed length vector being extracted from a scalable vector
24192 // may not be any *smaller* than the scalable one.
24193 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24194 return SDValue();
24195
24196 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24197 // multiple of the minimum number of elements in the result type.
24198 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24199 "multiple of the result's element count");
24200
24201 // It's fine to use TypeSize here as we know the offset will not be negative.
24202 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24203
24204 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24205 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24206 return SDValue();
24207
24208 // The narrow load will be offset from the base address of the old load if
24209 // we are extracting from something besides index 0 (little-endian).
24210 SDLoc DL(Extract);
24211
24212 // TODO: Use "BaseIndexOffset" to make this more effective.
24213 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24214
24217 MachineMemOperand *MMO;
24218 if (Offset.isScalable()) {
24219 MachinePointerInfo MPI =
24221 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24222 } else
24223 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24224 StoreSize);
24225
24226 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24227 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24228 return NewLd;
24229}
24230
24231/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24232/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24233/// EXTRACT_SUBVECTOR(Op?, ?),
24234/// Mask'))
24235/// iff it is legal and profitable to do so. Notably, the trimmed mask
24236/// (containing only the elements that are extracted)
24237/// must reference at most two subvectors.
24239 SelectionDAG &DAG,
24240 const TargetLowering &TLI,
24241 bool LegalOperations) {
24242 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24243 "Must only be called on EXTRACT_SUBVECTOR's");
24244
24245 SDValue N0 = N->getOperand(0);
24246
24247 // Only deal with non-scalable vectors.
24248 EVT NarrowVT = N->getValueType(0);
24249 EVT WideVT = N0.getValueType();
24250 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24251 return SDValue();
24252
24253 // The operand must be a shufflevector.
24254 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24255 if (!WideShuffleVector)
24256 return SDValue();
24257
24258 // The old shuffleneeds to go away.
24259 if (!WideShuffleVector->hasOneUse())
24260 return SDValue();
24261
24262 // And the narrow shufflevector that we'll form must be legal.
24263 if (LegalOperations &&
24265 return SDValue();
24266
24267 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24268 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24269 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24270 "Extract index is not a multiple of the output vector length.");
24271
24272 int WideNumElts = WideVT.getVectorNumElements();
24273
24274 SmallVector<int, 16> NewMask;
24275 NewMask.reserve(NumEltsExtracted);
24276 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24277 DemandedSubvectors;
24278
24279 // Try to decode the wide mask into narrow mask from at most two subvectors.
24280 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24281 NumEltsExtracted)) {
24282 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24283 "Out-of-bounds shuffle mask?");
24284
24285 if (M < 0) {
24286 // Does not depend on operands, does not require adjustment.
24287 NewMask.emplace_back(M);
24288 continue;
24289 }
24290
24291 // From which operand of the shuffle does this shuffle mask element pick?
24292 int WideShufOpIdx = M / WideNumElts;
24293 // Which element of that operand is picked?
24294 int OpEltIdx = M % WideNumElts;
24295
24296 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24297 "Shuffle mask vector decomposition failure.");
24298
24299 // And which NumEltsExtracted-sized subvector of that operand is that?
24300 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24301 // And which element within that subvector of that operand is that?
24302 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24303
24304 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24305 "Shuffle mask subvector decomposition failure.");
24306
24307 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24308 WideShufOpIdx * WideNumElts) == M &&
24309 "Shuffle mask full decomposition failure.");
24310
24311 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24312
24313 if (Op.isUndef()) {
24314 // Picking from an undef operand. Let's adjust mask instead.
24315 NewMask.emplace_back(-1);
24316 continue;
24317 }
24318
24319 const std::pair<SDValue, int> DemandedSubvector =
24320 std::make_pair(Op, OpSubvecIdx);
24321
24322 if (DemandedSubvectors.insert(DemandedSubvector)) {
24323 if (DemandedSubvectors.size() > 2)
24324 return SDValue(); // We can't handle more than two subvectors.
24325 // How many elements into the WideVT does this subvector start?
24326 int Index = NumEltsExtracted * OpSubvecIdx;
24327 // Bail out if the extraction isn't going to be cheap.
24328 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24329 return SDValue();
24330 }
24331
24332 // Ok, but from which operand of the new shuffle will this element pick?
24333 int NewOpIdx =
24334 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24335 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24336
24337 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24338 NewMask.emplace_back(AdjM);
24339 }
24340 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24341 assert(DemandedSubvectors.size() <= 2 &&
24342 "Should have ended up demanding at most two subvectors.");
24343
24344 // Did we discover that the shuffle does not actually depend on operands?
24345 if (DemandedSubvectors.empty())
24346 return DAG.getUNDEF(NarrowVT);
24347
24348 // Profitability check: only deal with extractions from the first subvector
24349 // unless the mask becomes an identity mask.
24350 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24351 any_of(NewMask, [](int M) { return M < 0; }))
24352 for (auto &DemandedSubvector : DemandedSubvectors)
24353 if (DemandedSubvector.second != 0)
24354 return SDValue();
24355
24356 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24357 // operand[s]/index[es], so there is no point in checking for it's legality.
24358
24359 // Do not turn a legal shuffle into an illegal one.
24360 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24361 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24362 return SDValue();
24363
24364 SDLoc DL(N);
24365
24367 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24368 &DemandedSubvector : DemandedSubvectors) {
24369 // How many elements into the WideVT does this subvector start?
24370 int Index = NumEltsExtracted * DemandedSubvector.second;
24371 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24372 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24373 DemandedSubvector.first, IndexC));
24374 }
24375 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24376 "Should end up with either one or two ops");
24377
24378 // If we ended up with only one operand, pad with an undef.
24379 if (NewOps.size() == 1)
24380 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24381
24382 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24383}
24384
24385SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24386 EVT NVT = N->getValueType(0);
24387 SDValue V = N->getOperand(0);
24388 uint64_t ExtIdx = N->getConstantOperandVal(1);
24389
24390 // Extract from UNDEF is UNDEF.
24391 if (V.isUndef())
24392 return DAG.getUNDEF(NVT);
24393
24395 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24396 return NarrowLoad;
24397
24398 // Combine an extract of an extract into a single extract_subvector.
24399 // ext (ext X, C), 0 --> ext X, C
24400 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24401 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24402 V.getConstantOperandVal(1)) &&
24404 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
24405 V.getOperand(1));
24406 }
24407 }
24408
24409 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24410 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24411 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24412 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24413 return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
24414
24415 // Try to move vector bitcast after extract_subv by scaling extraction index:
24416 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24417 if (V.getOpcode() == ISD::BITCAST &&
24418 V.getOperand(0).getValueType().isVector() &&
24419 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24420 SDValue SrcOp = V.getOperand(0);
24421 EVT SrcVT = SrcOp.getValueType();
24422 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24423 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24424 if ((SrcNumElts % DestNumElts) == 0) {
24425 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24426 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24427 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
24428 NewExtEC);
24430 SDLoc DL(N);
24431 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24432 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24433 V.getOperand(0), NewIndex);
24434 return DAG.getBitcast(NVT, NewExtract);
24435 }
24436 }
24437 if ((DestNumElts % SrcNumElts) == 0) {
24438 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24439 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24440 ElementCount NewExtEC =
24441 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24442 EVT ScalarVT = SrcVT.getScalarType();
24443 if ((ExtIdx % DestSrcRatio) == 0) {
24444 SDLoc DL(N);
24445 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24446 EVT NewExtVT =
24447 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24449 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24450 SDValue NewExtract =
24451 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24452 V.getOperand(0), NewIndex);
24453 return DAG.getBitcast(NVT, NewExtract);
24454 }
24455 if (NewExtEC.isScalar() &&
24457 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24458 SDValue NewExtract =
24459 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24460 V.getOperand(0), NewIndex);
24461 return DAG.getBitcast(NVT, NewExtract);
24462 }
24463 }
24464 }
24465 }
24466 }
24467
24468 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24469 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24470 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24471 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24472 "Concat and extract subvector do not change element type");
24473 assert((ExtIdx % ExtNumElts) == 0 &&
24474 "Extract index is not a multiple of the input vector length.");
24475
24476 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24477 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24478
24479 // If the concatenated source types match this extract, it's a direct
24480 // simplification:
24481 // extract_subvec (concat V1, V2, ...), i --> Vi
24482 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24483 return V.getOperand(ConcatOpIdx);
24484
24485 // If the concatenated source vectors are a multiple length of this extract,
24486 // then extract a fraction of one of those source vectors directly from a
24487 // concat operand. Example:
24488 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24489 // v2i8 extract_subvec v8i8 Y, 6
24490 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24491 ConcatSrcNumElts % ExtNumElts == 0) {
24492 SDLoc DL(N);
24493 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24494 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24495 "Trying to extract from >1 concat operand?");
24496 assert(NewExtIdx % ExtNumElts == 0 &&
24497 "Extract index is not a multiple of the input vector length.");
24498 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24499 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24500 V.getOperand(ConcatOpIdx), NewIndexC);
24501 }
24502 }
24503
24504 if (SDValue V =
24505 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24506 return V;
24507
24509
24510 // If the input is a build vector. Try to make a smaller build vector.
24511 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24512 EVT InVT = V.getValueType();
24513 unsigned ExtractSize = NVT.getSizeInBits();
24514 unsigned EltSize = InVT.getScalarSizeInBits();
24515 // Only do this if we won't split any elements.
24516 if (ExtractSize % EltSize == 0) {
24517 unsigned NumElems = ExtractSize / EltSize;
24518 EVT EltVT = InVT.getVectorElementType();
24519 EVT ExtractVT =
24520 NumElems == 1 ? EltVT
24521 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24522 if ((Level < AfterLegalizeDAG ||
24523 (NumElems == 1 ||
24524 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24525 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24526 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24527
24528 if (NumElems == 1) {
24529 SDValue Src = V->getOperand(IdxVal);
24530 if (EltVT != Src.getValueType())
24531 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src);
24532 return DAG.getBitcast(NVT, Src);
24533 }
24534
24535 // Extract the pieces from the original build_vector.
24536 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
24537 V->ops().slice(IdxVal, NumElems));
24538 return DAG.getBitcast(NVT, BuildVec);
24539 }
24540 }
24541 }
24542
24543 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24544 // Handle only simple case where vector being inserted and vector
24545 // being extracted are of same size.
24546 EVT SmallVT = V.getOperand(1).getValueType();
24547 if (!NVT.bitsEq(SmallVT))
24548 return SDValue();
24549
24550 // Combine:
24551 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24552 // Into:
24553 // indices are equal or bit offsets are equal => V1
24554 // otherwise => (extract_subvec V1, ExtIdx)
24555 uint64_t InsIdx = V.getConstantOperandVal(2);
24556 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24557 ExtIdx * NVT.getScalarSizeInBits()) {
24558 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24559 return SDValue();
24560
24561 return DAG.getBitcast(NVT, V.getOperand(1));
24562 }
24563 return DAG.getNode(
24565 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24566 N->getOperand(1));
24567 }
24568
24569 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24570 return NarrowBOp;
24571
24573 return SDValue(N, 0);
24574
24575 return SDValue();
24576}
24577
24578/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24579/// followed by concatenation. Narrow vector ops may have better performance
24580/// than wide ops, and this can unlock further narrowing of other vector ops.
24581/// Targets can invert this transform later if it is not profitable.
24583 SelectionDAG &DAG) {
24584 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24585 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24586 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24587 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24588 return SDValue();
24589
24590 // Split the wide shuffle mask into halves. Any mask element that is accessing
24591 // operand 1 is offset down to account for narrowing of the vectors.
24592 ArrayRef<int> Mask = Shuf->getMask();
24593 EVT VT = Shuf->getValueType(0);
24594 unsigned NumElts = VT.getVectorNumElements();
24595 unsigned HalfNumElts = NumElts / 2;
24596 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24597 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24598 for (unsigned i = 0; i != NumElts; ++i) {
24599 if (Mask[i] == -1)
24600 continue;
24601 // If we reference the upper (undef) subvector then the element is undef.
24602 if ((Mask[i] % NumElts) >= HalfNumElts)
24603 continue;
24604 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24605 if (i < HalfNumElts)
24606 Mask0[i] = M;
24607 else
24608 Mask1[i - HalfNumElts] = M;
24609 }
24610
24611 // Ask the target if this is a valid transform.
24612 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24613 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24614 HalfNumElts);
24615 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24616 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24617 return SDValue();
24618
24619 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24620 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24621 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24622 SDLoc DL(Shuf);
24623 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24624 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24625 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24626}
24627
24628// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24629// or turn a shuffle of a single concat into simpler shuffle then concat.
24631 EVT VT = N->getValueType(0);
24632 unsigned NumElts = VT.getVectorNumElements();
24633
24634 SDValue N0 = N->getOperand(0);
24635 SDValue N1 = N->getOperand(1);
24636 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24637 ArrayRef<int> Mask = SVN->getMask();
24638
24640 EVT ConcatVT = N0.getOperand(0).getValueType();
24641 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24642 unsigned NumConcats = NumElts / NumElemsPerConcat;
24643
24644 auto IsUndefMaskElt = [](int i) { return i == -1; };
24645
24646 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24647 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24648 // half vector elements.
24649 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24650 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24651 IsUndefMaskElt)) {
24652 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24653 N0.getOperand(1),
24654 Mask.slice(0, NumElemsPerConcat));
24655 N1 = DAG.getUNDEF(ConcatVT);
24656 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24657 }
24658
24659 // Look at every vector that's inserted. We're looking for exact
24660 // subvector-sized copies from a concatenated vector
24661 for (unsigned I = 0; I != NumConcats; ++I) {
24662 unsigned Begin = I * NumElemsPerConcat;
24663 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24664
24665 // Make sure we're dealing with a copy.
24666 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24667 Ops.push_back(DAG.getUNDEF(ConcatVT));
24668 continue;
24669 }
24670
24671 int OpIdx = -1;
24672 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24673 if (IsUndefMaskElt(SubMask[i]))
24674 continue;
24675 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24676 return SDValue();
24677 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24678 if (0 <= OpIdx && EltOpIdx != OpIdx)
24679 return SDValue();
24680 OpIdx = EltOpIdx;
24681 }
24682 assert(0 <= OpIdx && "Unknown concat_vectors op");
24683
24684 if (OpIdx < (int)N0.getNumOperands())
24685 Ops.push_back(N0.getOperand(OpIdx));
24686 else
24687 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24688 }
24689
24690 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24691}
24692
24693// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24694// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24695//
24696// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24697// a simplification in some sense, but it isn't appropriate in general: some
24698// BUILD_VECTORs are substantially cheaper than others. The general case
24699// of a BUILD_VECTOR requires inserting each element individually (or
24700// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24701// all constants is a single constant pool load. A BUILD_VECTOR where each
24702// element is identical is a splat. A BUILD_VECTOR where most of the operands
24703// are undef lowers to a small number of element insertions.
24704//
24705// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24706// We don't fold shuffles where one side is a non-zero constant, and we don't
24707// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24708// non-constant operands. This seems to work out reasonably well in practice.
24710 SelectionDAG &DAG,
24711 const TargetLowering &TLI) {
24712 EVT VT = SVN->getValueType(0);
24713 unsigned NumElts = VT.getVectorNumElements();
24714 SDValue N0 = SVN->getOperand(0);
24715 SDValue N1 = SVN->getOperand(1);
24716
24717 if (!N0->hasOneUse())
24718 return SDValue();
24719
24720 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24721 // discussed above.
24722 if (!N1.isUndef()) {
24723 if (!N1->hasOneUse())
24724 return SDValue();
24725
24726 bool N0AnyConst = isAnyConstantBuildVector(N0);
24727 bool N1AnyConst = isAnyConstantBuildVector(N1);
24728 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24729 return SDValue();
24730 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24731 return SDValue();
24732 }
24733
24734 // If both inputs are splats of the same value then we can safely merge this
24735 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
24736 bool IsSplat = false;
24737 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
24738 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
24739 if (BV0 && BV1)
24740 if (SDValue Splat0 = BV0->getSplatValue())
24741 IsSplat = (Splat0 == BV1->getSplatValue());
24742
24744 SmallSet<SDValue, 16> DuplicateOps;
24745 for (int M : SVN->getMask()) {
24746 SDValue Op = DAG.getUNDEF(VT.getScalarType());
24747 if (M >= 0) {
24748 int Idx = M < (int)NumElts ? M : M - NumElts;
24749 SDValue &S = (M < (int)NumElts ? N0 : N1);
24750 if (S.getOpcode() == ISD::BUILD_VECTOR) {
24751 Op = S.getOperand(Idx);
24752 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24753 SDValue Op0 = S.getOperand(0);
24754 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
24755 } else {
24756 // Operand can't be combined - bail out.
24757 return SDValue();
24758 }
24759 }
24760
24761 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
24762 // generating a splat; semantically, this is fine, but it's likely to
24763 // generate low-quality code if the target can't reconstruct an appropriate
24764 // shuffle.
24765 if (!Op.isUndef() && !isIntOrFPConstant(Op))
24766 if (!IsSplat && !DuplicateOps.insert(Op).second)
24767 return SDValue();
24768
24769 Ops.push_back(Op);
24770 }
24771
24772 // BUILD_VECTOR requires all inputs to be of the same type, find the
24773 // maximum type and extend them all.
24774 EVT SVT = VT.getScalarType();
24775 if (SVT.isInteger())
24776 for (SDValue &Op : Ops)
24777 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
24778 if (SVT != VT.getScalarType())
24779 for (SDValue &Op : Ops)
24780 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
24781 : (TLI.isZExtFree(Op.getValueType(), SVT)
24782 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
24783 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
24784 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
24785}
24786
24787// Match shuffles that can be converted to *_vector_extend_in_reg.
24788// This is often generated during legalization.
24789// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
24790// and returns the EVT to which the extension should be performed.
24791// NOTE: this assumes that the src is the first operand of the shuffle.
24793 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
24794 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24795 bool LegalOperations) {
24796 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24797
24798 // TODO Add support for big-endian when we have a test case.
24799 if (!VT.isInteger() || IsBigEndian)
24800 return std::nullopt;
24801
24802 unsigned NumElts = VT.getVectorNumElements();
24803 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24804
24805 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
24806 // power-of-2 extensions as they are the most likely.
24807 // FIXME: should try Scale == NumElts case too,
24808 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
24809 // The vector width must be a multiple of Scale.
24810 if (NumElts % Scale != 0)
24811 continue;
24812
24813 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
24814 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
24815
24816 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
24817 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
24818 continue;
24819
24820 if (Match(Scale))
24821 return OutVT;
24822 }
24823
24824 return std::nullopt;
24825}
24826
24827// Match shuffles that can be converted to any_vector_extend_in_reg.
24828// This is often generated during legalization.
24829// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
24831 SelectionDAG &DAG,
24832 const TargetLowering &TLI,
24833 bool LegalOperations) {
24834 EVT VT = SVN->getValueType(0);
24835 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24836
24837 // TODO Add support for big-endian when we have a test case.
24838 if (!VT.isInteger() || IsBigEndian)
24839 return SDValue();
24840
24841 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
24842 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
24843 Mask = SVN->getMask()](unsigned Scale) {
24844 for (unsigned i = 0; i != NumElts; ++i) {
24845 if (Mask[i] < 0)
24846 continue;
24847 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
24848 continue;
24849 return false;
24850 }
24851 return true;
24852 };
24853
24854 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
24855 SDValue N0 = SVN->getOperand(0);
24856 // Never create an illegal type. Only create unsupported operations if we
24857 // are pre-legalization.
24858 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24859 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
24860 if (!OutVT)
24861 return SDValue();
24862 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
24863}
24864
24865// Match shuffles that can be converted to zero_extend_vector_inreg.
24866// This is often generated during legalization.
24867// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
24869 SelectionDAG &DAG,
24870 const TargetLowering &TLI,
24871 bool LegalOperations) {
24872 bool LegalTypes = true;
24873 EVT VT = SVN->getValueType(0);
24874 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
24875 unsigned NumElts = VT.getVectorNumElements();
24876 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24877
24878 // TODO: add support for big-endian when we have a test case.
24879 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24880 if (!VT.isInteger() || IsBigEndian)
24881 return SDValue();
24882
24883 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
24884 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
24885 for (int &Indice : Mask) {
24886 if (Indice < 0)
24887 continue;
24888 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
24889 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
24890 Fn(Indice, OpIdx, OpEltIdx);
24891 }
24892 };
24893
24894 // Which elements of which operand does this shuffle demand?
24895 std::array<APInt, 2> OpsDemandedElts;
24896 for (APInt &OpDemandedElts : OpsDemandedElts)
24897 OpDemandedElts = APInt::getZero(NumElts);
24898 ForEachDecomposedIndice(
24899 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
24900 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
24901 });
24902
24903 // Element-wise(!), which of these demanded elements are know to be zero?
24904 std::array<APInt, 2> OpsKnownZeroElts;
24905 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
24906 std::get<2>(I) =
24907 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
24908
24909 // Manifest zeroable element knowledge in the shuffle mask.
24910 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
24911 // this is a local invention, but it won't leak into DAG.
24912 // FIXME: should we not manifest them, but just check when matching?
24913 bool HadZeroableElts = false;
24914 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
24915 int &Indice, int OpIdx, int OpEltIdx) {
24916 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
24917 Indice = -2; // Zeroable element.
24918 HadZeroableElts = true;
24919 }
24920 });
24921
24922 // Don't proceed unless we've refined at least one zeroable mask indice.
24923 // If we didn't, then we are still trying to match the same shuffle mask
24924 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
24925 // and evidently failed. Proceeding will lead to endless combine loops.
24926 if (!HadZeroableElts)
24927 return SDValue();
24928
24929 // The shuffle may be more fine-grained than we want. Widen elements first.
24930 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
24931 SmallVector<int, 16> ScaledMask;
24932 getShuffleMaskWithWidestElts(Mask, ScaledMask);
24933 assert(Mask.size() >= ScaledMask.size() &&
24934 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
24935 int Prescale = Mask.size() / ScaledMask.size();
24936
24937 NumElts = ScaledMask.size();
24938 EltSizeInBits *= Prescale;
24939
24940 EVT PrescaledVT = EVT::getVectorVT(
24941 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
24942 NumElts);
24943
24944 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
24945 return SDValue();
24946
24947 // For example,
24948 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
24949 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
24950 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
24951 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
24952 "Unexpected mask scaling factor.");
24953 ArrayRef<int> Mask = ScaledMask;
24954 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
24955 SrcElt != NumSrcElts; ++SrcElt) {
24956 // Analyze the shuffle mask in Scale-sized chunks.
24957 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
24958 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
24959 Mask = Mask.drop_front(MaskChunk.size());
24960 // The first indice in this chunk must be SrcElt, but not zero!
24961 // FIXME: undef should be fine, but that results in more-defined result.
24962 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
24963 return false;
24964 // The rest of the indices in this chunk must be zeros.
24965 // FIXME: undef should be fine, but that results in more-defined result.
24966 if (!all_of(MaskChunk.drop_front(1),
24967 [](int Indice) { return Indice == -2; }))
24968 return false;
24969 }
24970 assert(Mask.empty() && "Did not process the whole mask?");
24971 return true;
24972 };
24973
24974 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
24975 for (bool Commuted : {false, true}) {
24976 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
24977 if (Commuted)
24979 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24980 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
24981 LegalOperations);
24982 if (OutVT)
24983 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
24984 DAG.getBitcast(PrescaledVT, Op)));
24985 }
24986 return SDValue();
24987}
24988
24989// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
24990// each source element of a large type into the lowest elements of a smaller
24991// destination type. This is often generated during legalization.
24992// If the source node itself was a '*_extend_vector_inreg' node then we should
24993// then be able to remove it.
24995 SelectionDAG &DAG) {
24996 EVT VT = SVN->getValueType(0);
24997 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24998
24999 // TODO Add support for big-endian when we have a test case.
25000 if (!VT.isInteger() || IsBigEndian)
25001 return SDValue();
25002
25004
25005 unsigned Opcode = N0.getOpcode();
25006 if (!ISD::isExtVecInRegOpcode(Opcode))
25007 return SDValue();
25008
25009 SDValue N00 = N0.getOperand(0);
25010 ArrayRef<int> Mask = SVN->getMask();
25011 unsigned NumElts = VT.getVectorNumElements();
25012 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25013 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25014 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25015
25016 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25017 return SDValue();
25018 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25019
25020 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25021 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25022 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25023 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25024 for (unsigned i = 0; i != NumElts; ++i) {
25025 if (Mask[i] < 0)
25026 continue;
25027 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25028 continue;
25029 return false;
25030 }
25031 return true;
25032 };
25033
25034 // At the moment we just handle the case where we've truncated back to the
25035 // same size as before the extension.
25036 // TODO: handle more extension/truncation cases as cases arise.
25037 if (EltSizeInBits != ExtSrcSizeInBits)
25038 return SDValue();
25039
25040 // We can remove *extend_vector_inreg only if the truncation happens at
25041 // the same scale as the extension.
25042 if (isTruncate(ExtScale))
25043 return DAG.getBitcast(VT, N00);
25044
25045 return SDValue();
25046}
25047
25048// Combine shuffles of splat-shuffles of the form:
25049// shuffle (shuffle V, undef, splat-mask), undef, M
25050// If splat-mask contains undef elements, we need to be careful about
25051// introducing undef's in the folded mask which are not the result of composing
25052// the masks of the shuffles.
25054 SelectionDAG &DAG) {
25055 EVT VT = Shuf->getValueType(0);
25056 unsigned NumElts = VT.getVectorNumElements();
25057
25058 if (!Shuf->getOperand(1).isUndef())
25059 return SDValue();
25060
25061 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25062 // in disguise, with all demanded elements being identical.
25063 // FIXME: this can be done per-operand.
25064 if (!Shuf->isSplat()) {
25065 APInt DemandedElts(NumElts, 0);
25066 for (int Idx : Shuf->getMask()) {
25067 if (Idx < 0)
25068 continue; // Ignore sentinel indices.
25069 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25070 DemandedElts.setBit(Idx);
25071 }
25072 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25073 APInt UndefElts;
25074 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25075 // Even if all demanded elements are splat, some of them could be undef.
25076 // Which lowest demanded element is *not* known-undef?
25077 std::optional<unsigned> MinNonUndefIdx;
25078 for (int Idx : Shuf->getMask()) {
25079 if (Idx < 0 || UndefElts[Idx])
25080 continue; // Ignore sentinel indices, and undef elements.
25081 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25082 }
25083 if (!MinNonUndefIdx)
25084 return DAG.getUNDEF(VT); // All undef - result is undef.
25085 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25086 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25087 Shuf->getMask().end());
25088 for (int &Idx : SplatMask) {
25089 if (Idx < 0)
25090 continue; // Passthrough sentinel indices.
25091 // Otherwise, just pick the lowest demanded non-undef element.
25092 // Or sentinel undef, if we know we'd pick a known-undef element.
25093 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25094 }
25095 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25096 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25097 Shuf->getOperand(1), SplatMask);
25098 }
25099 }
25100
25101 // If the inner operand is a known splat with no undefs, just return that directly.
25102 // TODO: Create DemandedElts mask from Shuf's mask.
25103 // TODO: Allow undef elements and merge with the shuffle code below.
25104 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25105 return Shuf->getOperand(0);
25106
25107 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25108 if (!Splat || !Splat->isSplat())
25109 return SDValue();
25110
25111 ArrayRef<int> ShufMask = Shuf->getMask();
25112 ArrayRef<int> SplatMask = Splat->getMask();
25113 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25114
25115 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25116 // every undef mask element in the splat-shuffle has a corresponding undef
25117 // element in the user-shuffle's mask or if the composition of mask elements
25118 // would result in undef.
25119 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25120 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25121 // In this case it is not legal to simplify to the splat-shuffle because we
25122 // may be exposing the users of the shuffle an undef element at index 1
25123 // which was not there before the combine.
25124 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25125 // In this case the composition of masks yields SplatMask, so it's ok to
25126 // simplify to the splat-shuffle.
25127 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25128 // In this case the composed mask includes all undef elements of SplatMask
25129 // and in addition sets element zero to undef. It is safe to simplify to
25130 // the splat-shuffle.
25131 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25132 ArrayRef<int> SplatMask) {
25133 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25134 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25135 SplatMask[UserMask[i]] != -1)
25136 return false;
25137 return true;
25138 };
25139 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25140 return Shuf->getOperand(0);
25141
25142 // Create a new shuffle with a mask that is composed of the two shuffles'
25143 // masks.
25144 SmallVector<int, 32> NewMask;
25145 for (int Idx : ShufMask)
25146 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25147
25148 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25149 Splat->getOperand(0), Splat->getOperand(1),
25150 NewMask);
25151}
25152
25153// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25154// the mask can be treated as a larger type.
25156 SelectionDAG &DAG,
25157 const TargetLowering &TLI,
25158 bool LegalOperations) {
25159 SDValue Op0 = SVN->getOperand(0);
25160 SDValue Op1 = SVN->getOperand(1);
25161 EVT VT = SVN->getValueType(0);
25162 if (Op0.getOpcode() != ISD::BITCAST)
25163 return SDValue();
25164 EVT InVT = Op0.getOperand(0).getValueType();
25165 if (!InVT.isVector() ||
25166 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25167 Op1.getOperand(0).getValueType() != InVT)))
25168 return SDValue();
25170 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25171 return SDValue();
25172
25173 int VTLanes = VT.getVectorNumElements();
25174 int InLanes = InVT.getVectorNumElements();
25175 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25176 (LegalOperations &&
25178 return SDValue();
25179 int Factor = VTLanes / InLanes;
25180
25181 // Check that each group of lanes in the mask are either undef or make a valid
25182 // mask for the wider lane type.
25183 ArrayRef<int> Mask = SVN->getMask();
25184 SmallVector<int> NewMask;
25185 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25186 return SDValue();
25187
25188 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25189 return SDValue();
25190
25191 // Create the new shuffle with the new mask and bitcast it back to the
25192 // original type.
25193 SDLoc DL(SVN);
25194 Op0 = Op0.getOperand(0);
25195 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25196 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25197 return DAG.getBitcast(VT, NewShuf);
25198}
25199
25200/// Combine shuffle of shuffle of the form:
25201/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25203 SelectionDAG &DAG) {
25204 if (!OuterShuf->getOperand(1).isUndef())
25205 return SDValue();
25206 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25207 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25208 return SDValue();
25209
25210 ArrayRef<int> OuterMask = OuterShuf->getMask();
25211 ArrayRef<int> InnerMask = InnerShuf->getMask();
25212 unsigned NumElts = OuterMask.size();
25213 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25214 SmallVector<int, 32> CombinedMask(NumElts, -1);
25215 int SplatIndex = -1;
25216 for (unsigned i = 0; i != NumElts; ++i) {
25217 // Undef lanes remain undef.
25218 int OuterMaskElt = OuterMask[i];
25219 if (OuterMaskElt == -1)
25220 continue;
25221
25222 // Peek through the shuffle masks to get the underlying source element.
25223 int InnerMaskElt = InnerMask[OuterMaskElt];
25224 if (InnerMaskElt == -1)
25225 continue;
25226
25227 // Initialize the splatted element.
25228 if (SplatIndex == -1)
25229 SplatIndex = InnerMaskElt;
25230
25231 // Non-matching index - this is not a splat.
25232 if (SplatIndex != InnerMaskElt)
25233 return SDValue();
25234
25235 CombinedMask[i] = InnerMaskElt;
25236 }
25237 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25238 getSplatIndex(CombinedMask) != -1) &&
25239 "Expected a splat mask");
25240
25241 // TODO: The transform may be a win even if the mask is not legal.
25242 EVT VT = OuterShuf->getValueType(0);
25243 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25244 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25245 return SDValue();
25246
25247 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25248 InnerShuf->getOperand(1), CombinedMask);
25249}
25250
25251/// If the shuffle mask is taking exactly one element from the first vector
25252/// operand and passing through all other elements from the second vector
25253/// operand, return the index of the mask element that is choosing an element
25254/// from the first operand. Otherwise, return -1.
25256 int MaskSize = Mask.size();
25257 int EltFromOp0 = -1;
25258 // TODO: This does not match if there are undef elements in the shuffle mask.
25259 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25260 // removing an instruction (a shuffle), but losing the knowledge that some
25261 // vector lanes are not needed.
25262 for (int i = 0; i != MaskSize; ++i) {
25263 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25264 // We're looking for a shuffle of exactly one element from operand 0.
25265 if (EltFromOp0 != -1)
25266 return -1;
25267 EltFromOp0 = i;
25268 } else if (Mask[i] != i + MaskSize) {
25269 // Nothing from operand 1 can change lanes.
25270 return -1;
25271 }
25272 }
25273 return EltFromOp0;
25274}
25275
25276/// If a shuffle inserts exactly one element from a source vector operand into
25277/// another vector operand and we can access the specified element as a scalar,
25278/// then we can eliminate the shuffle.
25280 SelectionDAG &DAG) {
25281 // First, check if we are taking one element of a vector and shuffling that
25282 // element into another vector.
25283 ArrayRef<int> Mask = Shuf->getMask();
25284 SmallVector<int, 16> CommutedMask(Mask);
25285 SDValue Op0 = Shuf->getOperand(0);
25286 SDValue Op1 = Shuf->getOperand(1);
25287 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25288 if (ShufOp0Index == -1) {
25289 // Commute mask and check again.
25291 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25292 if (ShufOp0Index == -1)
25293 return SDValue();
25294 // Commute operands to match the commuted shuffle mask.
25295 std::swap(Op0, Op1);
25296 Mask = CommutedMask;
25297 }
25298
25299 // The shuffle inserts exactly one element from operand 0 into operand 1.
25300 // Now see if we can access that element as a scalar via a real insert element
25301 // instruction.
25302 // TODO: We can try harder to locate the element as a scalar. Examples: it
25303 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25304 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25305 "Shuffle mask value must be from operand 0");
25306 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25307 return SDValue();
25308
25309 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25310 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25311 return SDValue();
25312
25313 // There's an existing insertelement with constant insertion index, so we
25314 // don't need to check the legality/profitability of a replacement operation
25315 // that differs at most in the constant value. The target should be able to
25316 // lower any of those in a similar way. If not, legalization will expand this
25317 // to a scalar-to-vector plus shuffle.
25318 //
25319 // Note that the shuffle may move the scalar from the position that the insert
25320 // element used. Therefore, our new insert element occurs at the shuffle's
25321 // mask index value, not the insert's index value.
25322 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25323 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25324 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25325 Op1, Op0.getOperand(1), NewInsIndex);
25326}
25327
25328/// If we have a unary shuffle of a shuffle, see if it can be folded away
25329/// completely. This has the potential to lose undef knowledge because the first
25330/// shuffle may not have an undef mask element where the second one does. So
25331/// only call this after doing simplifications based on demanded elements.
25333 // shuf (shuf0 X, Y, Mask0), undef, Mask
25334 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25335 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25336 return SDValue();
25337
25338 ArrayRef<int> Mask = Shuf->getMask();
25339 ArrayRef<int> Mask0 = Shuf0->getMask();
25340 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25341 // Ignore undef elements.
25342 if (Mask[i] == -1)
25343 continue;
25344 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25345
25346 // Is the element of the shuffle operand chosen by this shuffle the same as
25347 // the element chosen by the shuffle operand itself?
25348 if (Mask0[Mask[i]] != Mask0[i])
25349 return SDValue();
25350 }
25351 // Every element of this shuffle is identical to the result of the previous
25352 // shuffle, so we can replace this value.
25353 return Shuf->getOperand(0);
25354}
25355
25356SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25357 EVT VT = N->getValueType(0);
25358 unsigned NumElts = VT.getVectorNumElements();
25359
25360 SDValue N0 = N->getOperand(0);
25361 SDValue N1 = N->getOperand(1);
25362
25363 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25364
25365 // Canonicalize shuffle undef, undef -> undef
25366 if (N0.isUndef() && N1.isUndef())
25367 return DAG.getUNDEF(VT);
25368
25369 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25370
25371 // Canonicalize shuffle v, v -> v, undef
25372 if (N0 == N1)
25373 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25374 createUnaryMask(SVN->getMask(), NumElts));
25375
25376 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25377 if (N0.isUndef())
25378 return DAG.getCommutedVectorShuffle(*SVN);
25379
25380 // Remove references to rhs if it is undef
25381 if (N1.isUndef()) {
25382 bool Changed = false;
25383 SmallVector<int, 8> NewMask;
25384 for (unsigned i = 0; i != NumElts; ++i) {
25385 int Idx = SVN->getMaskElt(i);
25386 if (Idx >= (int)NumElts) {
25387 Idx = -1;
25388 Changed = true;
25389 }
25390 NewMask.push_back(Idx);
25391 }
25392 if (Changed)
25393 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25394 }
25395
25396 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25397 return InsElt;
25398
25399 // A shuffle of a single vector that is a splatted value can always be folded.
25400 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25401 return V;
25402
25403 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25404 return V;
25405
25406 // If it is a splat, check if the argument vector is another splat or a
25407 // build_vector.
25408 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25409 int SplatIndex = SVN->getSplatIndex();
25410 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25411 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25412 // splat (vector_bo L, R), Index -->
25413 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25414 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25415 SDLoc DL(N);
25416 EVT EltVT = VT.getScalarType();
25417 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25418 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25419 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25420 SDValue NewBO =
25421 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25422 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25424 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25425 }
25426
25427 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25428 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25429 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25430 N0.hasOneUse()) {
25431 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25432 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25433
25435 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25436 if (Idx->getAPIntValue() == SplatIndex)
25437 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25438
25439 // Look through a bitcast if LE and splatting lane 0, through to a
25440 // scalar_to_vector or a build_vector.
25441 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25442 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25445 EVT N00VT = N0.getOperand(0).getValueType();
25446 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25447 VT.isInteger() && N00VT.isInteger()) {
25448 EVT InVT =
25451 SDLoc(N), InVT);
25452 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25453 }
25454 }
25455 }
25456
25457 // If this is a bit convert that changes the element type of the vector but
25458 // not the number of vector elements, look through it. Be careful not to
25459 // look though conversions that change things like v4f32 to v2f64.
25460 SDNode *V = N0.getNode();
25461 if (V->getOpcode() == ISD::BITCAST) {
25462 SDValue ConvInput = V->getOperand(0);
25463 if (ConvInput.getValueType().isVector() &&
25464 ConvInput.getValueType().getVectorNumElements() == NumElts)
25465 V = ConvInput.getNode();
25466 }
25467
25468 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25469 assert(V->getNumOperands() == NumElts &&
25470 "BUILD_VECTOR has wrong number of operands");
25471 SDValue Base;
25472 bool AllSame = true;
25473 for (unsigned i = 0; i != NumElts; ++i) {
25474 if (!V->getOperand(i).isUndef()) {
25475 Base = V->getOperand(i);
25476 break;
25477 }
25478 }
25479 // Splat of <u, u, u, u>, return <u, u, u, u>
25480 if (!Base.getNode())
25481 return N0;
25482 for (unsigned i = 0; i != NumElts; ++i) {
25483 if (V->getOperand(i) != Base) {
25484 AllSame = false;
25485 break;
25486 }
25487 }
25488 // Splat of <x, x, x, x>, return <x, x, x, x>
25489 if (AllSame)
25490 return N0;
25491
25492 // Canonicalize any other splat as a build_vector.
25493 SDValue Splatted = V->getOperand(SplatIndex);
25494 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25495 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25496
25497 // We may have jumped through bitcasts, so the type of the
25498 // BUILD_VECTOR may not match the type of the shuffle.
25499 if (V->getValueType(0) != VT)
25500 NewBV = DAG.getBitcast(VT, NewBV);
25501 return NewBV;
25502 }
25503 }
25504
25505 // Simplify source operands based on shuffle mask.
25507 return SDValue(N, 0);
25508
25509 // This is intentionally placed after demanded elements simplification because
25510 // it could eliminate knowledge of undef elements created by this shuffle.
25511 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25512 return ShufOp;
25513
25514 // Match shuffles that can be converted to any_vector_extend_in_reg.
25515 if (SDValue V =
25516 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25517 return V;
25518
25519 // Combine "truncate_vector_in_reg" style shuffles.
25520 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25521 return V;
25522
25523 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25524 Level < AfterLegalizeVectorOps &&
25525 (N1.isUndef() ||
25526 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25527 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25528 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25529 return V;
25530 }
25531
25532 // A shuffle of a concat of the same narrow vector can be reduced to use
25533 // only low-half elements of a concat with undef:
25534 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25535 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25536 N0.getNumOperands() == 2 &&
25537 N0.getOperand(0) == N0.getOperand(1)) {
25538 int HalfNumElts = (int)NumElts / 2;
25539 SmallVector<int, 8> NewMask;
25540 for (unsigned i = 0; i != NumElts; ++i) {
25541 int Idx = SVN->getMaskElt(i);
25542 if (Idx >= HalfNumElts) {
25543 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25544 Idx -= HalfNumElts;
25545 }
25546 NewMask.push_back(Idx);
25547 }
25548 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25549 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25550 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25551 N0.getOperand(0), UndefVec);
25552 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25553 }
25554 }
25555
25556 // See if we can replace a shuffle with an insert_subvector.
25557 // e.g. v2i32 into v8i32:
25558 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25559 // --> insert_subvector(lhs,rhs1,4).
25560 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25562 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25563 // Ensure RHS subvectors are legal.
25564 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25565 EVT SubVT = RHS.getOperand(0).getValueType();
25566 int NumSubVecs = RHS.getNumOperands();
25567 int NumSubElts = SubVT.getVectorNumElements();
25568 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25569 if (!TLI.isTypeLegal(SubVT))
25570 return SDValue();
25571
25572 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25573 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25574 return SDValue();
25575
25576 // Search [NumSubElts] spans for RHS sequence.
25577 // TODO: Can we avoid nested loops to increase performance?
25578 SmallVector<int> InsertionMask(NumElts);
25579 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25580 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25581 // Reset mask to identity.
25582 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25583
25584 // Add subvector insertion.
25585 std::iota(InsertionMask.begin() + SubIdx,
25586 InsertionMask.begin() + SubIdx + NumSubElts,
25587 NumElts + (SubVec * NumSubElts));
25588
25589 // See if the shuffle mask matches the reference insertion mask.
25590 bool MatchingShuffle = true;
25591 for (int i = 0; i != (int)NumElts; ++i) {
25592 int ExpectIdx = InsertionMask[i];
25593 int ActualIdx = Mask[i];
25594 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25595 MatchingShuffle = false;
25596 break;
25597 }
25598 }
25599
25600 if (MatchingShuffle)
25601 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25602 RHS.getOperand(SubVec),
25603 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25604 }
25605 }
25606 return SDValue();
25607 };
25608 ArrayRef<int> Mask = SVN->getMask();
25609 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25610 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25611 return InsertN1;
25612 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25613 SmallVector<int> CommuteMask(Mask);
25615 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25616 return InsertN0;
25617 }
25618 }
25619
25620 // If we're not performing a select/blend shuffle, see if we can convert the
25621 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25622 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25623 bool IsInLaneMask = true;
25624 ArrayRef<int> Mask = SVN->getMask();
25625 SmallVector<int, 16> ClearMask(NumElts, -1);
25626 APInt DemandedLHS = APInt::getZero(NumElts);
25627 APInt DemandedRHS = APInt::getZero(NumElts);
25628 for (int I = 0; I != (int)NumElts; ++I) {
25629 int M = Mask[I];
25630 if (M < 0)
25631 continue;
25632 ClearMask[I] = M == I ? I : (I + NumElts);
25633 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25634 if (M != I) {
25635 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25636 Demanded.setBit(M % NumElts);
25637 }
25638 }
25639 // TODO: Should we try to mask with N1 as well?
25640 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25641 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25642 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25643 SDLoc DL(N);
25646 // Transform the type to a legal type so that the buildvector constant
25647 // elements are not illegal. Make sure that the result is larger than the
25648 // original type, incase the value is split into two (eg i64->i32).
25649 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25650 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25651 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25652 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25653 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25654 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25655 for (int I = 0; I != (int)NumElts; ++I)
25656 if (0 <= Mask[I])
25657 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25658
25659 // See if a clear mask is legal instead of going via
25660 // XformToShuffleWithZero which loses UNDEF mask elements.
25661 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25662 return DAG.getBitcast(
25663 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25664 DAG.getConstant(0, DL, IntVT), ClearMask));
25665
25666 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25667 return DAG.getBitcast(
25668 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25669 DAG.getBuildVector(IntVT, DL, AndMask)));
25670 }
25671 }
25672 }
25673
25674 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25675 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25676 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25677 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25678 return Res;
25679
25680 // If this shuffle only has a single input that is a bitcasted shuffle,
25681 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25682 // back to their original types.
25683 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25684 N1.isUndef() && Level < AfterLegalizeVectorOps &&
25685 TLI.isTypeLegal(VT)) {
25686
25688 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25689 EVT SVT = VT.getScalarType();
25690 EVT InnerVT = BC0->getValueType(0);
25691 EVT InnerSVT = InnerVT.getScalarType();
25692
25693 // Determine which shuffle works with the smaller scalar type.
25694 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25695 EVT ScaleSVT = ScaleVT.getScalarType();
25696
25697 if (TLI.isTypeLegal(ScaleVT) &&
25698 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25699 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25700 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25701 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25702
25703 // Scale the shuffle masks to the smaller scalar type.
25704 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25705 SmallVector<int, 8> InnerMask;
25706 SmallVector<int, 8> OuterMask;
25707 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25708 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25709
25710 // Merge the shuffle masks.
25711 SmallVector<int, 8> NewMask;
25712 for (int M : OuterMask)
25713 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25714
25715 // Test for shuffle mask legality over both commutations.
25716 SDValue SV0 = BC0->getOperand(0);
25717 SDValue SV1 = BC0->getOperand(1);
25718 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25719 if (!LegalMask) {
25720 std::swap(SV0, SV1);
25722 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25723 }
25724
25725 if (LegalMask) {
25726 SV0 = DAG.getBitcast(ScaleVT, SV0);
25727 SV1 = DAG.getBitcast(ScaleVT, SV1);
25728 return DAG.getBitcast(
25729 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25730 }
25731 }
25732 }
25733 }
25734
25735 // Match shuffles of bitcasts, so long as the mask can be treated as the
25736 // larger type.
25737 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
25738 return V;
25739
25740 // Compute the combined shuffle mask for a shuffle with SV0 as the first
25741 // operand, and SV1 as the second operand.
25742 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
25743 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
25744 auto MergeInnerShuffle =
25745 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
25746 ShuffleVectorSDNode *OtherSVN, SDValue N1,
25747 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
25748 SmallVectorImpl<int> &Mask) -> bool {
25749 // Don't try to fold splats; they're likely to simplify somehow, or they
25750 // might be free.
25751 if (OtherSVN->isSplat())
25752 return false;
25753
25754 SV0 = SV1 = SDValue();
25755 Mask.clear();
25756
25757 for (unsigned i = 0; i != NumElts; ++i) {
25758 int Idx = SVN->getMaskElt(i);
25759 if (Idx < 0) {
25760 // Propagate Undef.
25761 Mask.push_back(Idx);
25762 continue;
25763 }
25764
25765 if (Commute)
25766 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
25767
25768 SDValue CurrentVec;
25769 if (Idx < (int)NumElts) {
25770 // This shuffle index refers to the inner shuffle N0. Lookup the inner
25771 // shuffle mask to identify which vector is actually referenced.
25772 Idx = OtherSVN->getMaskElt(Idx);
25773 if (Idx < 0) {
25774 // Propagate Undef.
25775 Mask.push_back(Idx);
25776 continue;
25777 }
25778 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
25779 : OtherSVN->getOperand(1);
25780 } else {
25781 // This shuffle index references an element within N1.
25782 CurrentVec = N1;
25783 }
25784
25785 // Simple case where 'CurrentVec' is UNDEF.
25786 if (CurrentVec.isUndef()) {
25787 Mask.push_back(-1);
25788 continue;
25789 }
25790
25791 // Canonicalize the shuffle index. We don't know yet if CurrentVec
25792 // will be the first or second operand of the combined shuffle.
25793 Idx = Idx % NumElts;
25794 if (!SV0.getNode() || SV0 == CurrentVec) {
25795 // Ok. CurrentVec is the left hand side.
25796 // Update the mask accordingly.
25797 SV0 = CurrentVec;
25798 Mask.push_back(Idx);
25799 continue;
25800 }
25801 if (!SV1.getNode() || SV1 == CurrentVec) {
25802 // Ok. CurrentVec is the right hand side.
25803 // Update the mask accordingly.
25804 SV1 = CurrentVec;
25805 Mask.push_back(Idx + NumElts);
25806 continue;
25807 }
25808
25809 // Last chance - see if the vector is another shuffle and if it
25810 // uses one of the existing candidate shuffle ops.
25811 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
25812 int InnerIdx = CurrentSVN->getMaskElt(Idx);
25813 if (InnerIdx < 0) {
25814 Mask.push_back(-1);
25815 continue;
25816 }
25817 SDValue InnerVec = (InnerIdx < (int)NumElts)
25818 ? CurrentSVN->getOperand(0)
25819 : CurrentSVN->getOperand(1);
25820 if (InnerVec.isUndef()) {
25821 Mask.push_back(-1);
25822 continue;
25823 }
25824 InnerIdx %= NumElts;
25825 if (InnerVec == SV0) {
25826 Mask.push_back(InnerIdx);
25827 continue;
25828 }
25829 if (InnerVec == SV1) {
25830 Mask.push_back(InnerIdx + NumElts);
25831 continue;
25832 }
25833 }
25834
25835 // Bail out if we cannot convert the shuffle pair into a single shuffle.
25836 return false;
25837 }
25838
25839 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25840 return true;
25841
25842 // Avoid introducing shuffles with illegal mask.
25843 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25844 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25845 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25846 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
25847 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
25848 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
25849 if (TLI.isShuffleMaskLegal(Mask, VT))
25850 return true;
25851
25852 std::swap(SV0, SV1);
25854 return TLI.isShuffleMaskLegal(Mask, VT);
25855 };
25856
25857 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25858 // Canonicalize shuffles according to rules:
25859 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
25860 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
25861 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
25862 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25864 // The incoming shuffle must be of the same type as the result of the
25865 // current shuffle.
25866 assert(N1->getOperand(0).getValueType() == VT &&
25867 "Shuffle types don't match");
25868
25869 SDValue SV0 = N1->getOperand(0);
25870 SDValue SV1 = N1->getOperand(1);
25871 bool HasSameOp0 = N0 == SV0;
25872 bool IsSV1Undef = SV1.isUndef();
25873 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
25874 // Commute the operands of this shuffle so merging below will trigger.
25875 return DAG.getCommutedVectorShuffle(*SVN);
25876 }
25877
25878 // Canonicalize splat shuffles to the RHS to improve merging below.
25879 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
25880 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
25881 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25882 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
25883 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
25884 return DAG.getCommutedVectorShuffle(*SVN);
25885 }
25886
25887 // Try to fold according to rules:
25888 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25889 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25890 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25891 // Don't try to fold shuffles with illegal type.
25892 // Only fold if this shuffle is the only user of the other shuffle.
25893 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
25894 for (int i = 0; i != 2; ++i) {
25895 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
25896 N->isOnlyUserOf(N->getOperand(i).getNode())) {
25897 // The incoming shuffle must be of the same type as the result of the
25898 // current shuffle.
25899 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
25900 assert(OtherSV->getOperand(0).getValueType() == VT &&
25901 "Shuffle types don't match");
25902
25903 SDValue SV0, SV1;
25905 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
25906 SV0, SV1, Mask)) {
25907 // Check if all indices in Mask are Undef. In case, propagate Undef.
25908 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25909 return DAG.getUNDEF(VT);
25910
25911 return DAG.getVectorShuffle(VT, SDLoc(N),
25912 SV0 ? SV0 : DAG.getUNDEF(VT),
25913 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
25914 }
25915 }
25916 }
25917
25918 // Merge shuffles through binops if we are able to merge it with at least
25919 // one other shuffles.
25920 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
25921 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
25922 unsigned SrcOpcode = N0.getOpcode();
25923 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
25924 (N1.isUndef() ||
25925 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
25926 // Get binop source ops, or just pass on the undef.
25927 SDValue Op00 = N0.getOperand(0);
25928 SDValue Op01 = N0.getOperand(1);
25929 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
25930 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
25931 // TODO: We might be able to relax the VT check but we don't currently
25932 // have any isBinOp() that has different result/ops VTs so play safe until
25933 // we have test coverage.
25934 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
25935 Op01.getValueType() == VT && Op11.getValueType() == VT &&
25936 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
25937 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
25938 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
25939 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
25940 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
25941 SmallVectorImpl<int> &Mask, bool LeftOp,
25942 bool Commute) {
25943 SDValue InnerN = Commute ? N1 : N0;
25944 SDValue Op0 = LeftOp ? Op00 : Op01;
25945 SDValue Op1 = LeftOp ? Op10 : Op11;
25946 if (Commute)
25947 std::swap(Op0, Op1);
25948 // Only accept the merged shuffle if we don't introduce undef elements,
25949 // or the inner shuffle already contained undef elements.
25950 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
25951 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
25952 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
25953 Mask) &&
25954 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
25955 llvm::none_of(Mask, [](int M) { return M < 0; }));
25956 };
25957
25958 // Ensure we don't increase the number of shuffles - we must merge a
25959 // shuffle from at least one of the LHS and RHS ops.
25960 bool MergedLeft = false;
25961 SDValue LeftSV0, LeftSV1;
25962 SmallVector<int, 4> LeftMask;
25963 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
25964 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
25965 MergedLeft = true;
25966 } else {
25967 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
25968 LeftSV0 = Op00, LeftSV1 = Op10;
25969 }
25970
25971 bool MergedRight = false;
25972 SDValue RightSV0, RightSV1;
25973 SmallVector<int, 4> RightMask;
25974 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
25975 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
25976 MergedRight = true;
25977 } else {
25978 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
25979 RightSV0 = Op01, RightSV1 = Op11;
25980 }
25981
25982 if (MergedLeft || MergedRight) {
25983 SDLoc DL(N);
25985 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
25986 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
25988 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
25989 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
25990 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
25991 }
25992 }
25993 }
25994 }
25995
25996 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
25997 return V;
25998
25999 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26000 // Perform this really late, because it could eliminate knowledge
26001 // of undef elements created by this shuffle.
26002 if (Level < AfterLegalizeTypes)
26003 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26004 LegalOperations))
26005 return V;
26006
26007 return SDValue();
26008}
26009
26010SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26011 EVT VT = N->getValueType(0);
26012 if (!VT.isFixedLengthVector())
26013 return SDValue();
26014
26015 // Try to convert a scalar binop with an extracted vector element to a vector
26016 // binop. This is intended to reduce potentially expensive register moves.
26017 // TODO: Check if both operands are extracted.
26018 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26019 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26020 SDValue Scalar = N->getOperand(0);
26021 unsigned Opcode = Scalar.getOpcode();
26022 EVT VecEltVT = VT.getScalarType();
26023 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26024 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26025 Scalar.getOperand(0).getValueType() == VecEltVT &&
26026 Scalar.getOperand(1).getValueType() == VecEltVT &&
26027 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26028 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26029 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26030 // Match an extract element and get a shuffle mask equivalent.
26031 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26032
26033 for (int i : {0, 1}) {
26034 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26035 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26036 SDValue EE = Scalar.getOperand(i);
26037 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26038 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26039 EE.getOperand(0).getValueType() == VT &&
26040 isa<ConstantSDNode>(EE.getOperand(1))) {
26041 // Mask = {ExtractIndex, undef, undef....}
26042 ShufMask[0] = EE.getConstantOperandVal(1);
26043 // Make sure the shuffle is legal if we are crossing lanes.
26044 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26045 SDLoc DL(N);
26046 SDValue V[] = {EE.getOperand(0),
26047 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26048 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26049 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26050 ShufMask);
26051 }
26052 }
26053 }
26054 }
26055
26056 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26057 // with a VECTOR_SHUFFLE and possible truncate.
26058 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26059 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26060 return SDValue();
26061
26062 // If we have an implicit truncate, truncate here if it is legal.
26063 if (VecEltVT != Scalar.getValueType() &&
26064 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26065 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26066 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26067 }
26068
26069 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26070 if (!ExtIndexC)
26071 return SDValue();
26072
26073 SDValue SrcVec = Scalar.getOperand(0);
26074 EVT SrcVT = SrcVec.getValueType();
26075 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26076 unsigned VTNumElts = VT.getVectorNumElements();
26077 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26078 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26079 SmallVector<int, 8> Mask(SrcNumElts, -1);
26080 Mask[0] = ExtIndexC->getZExtValue();
26081 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26082 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26083 if (!LegalShuffle)
26084 return SDValue();
26085
26086 // If the initial vector is the same size, the shuffle is the result.
26087 if (VT == SrcVT)
26088 return LegalShuffle;
26089
26090 // If not, shorten the shuffled vector.
26091 if (VTNumElts != SrcNumElts) {
26092 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26093 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26094 SrcVT.getVectorElementType(), VTNumElts);
26095 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26096 ZeroIdx);
26097 }
26098 }
26099
26100 return SDValue();
26101}
26102
26103SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26104 EVT VT = N->getValueType(0);
26105 SDValue N0 = N->getOperand(0);
26106 SDValue N1 = N->getOperand(1);
26107 SDValue N2 = N->getOperand(2);
26108 uint64_t InsIdx = N->getConstantOperandVal(2);
26109
26110 // If inserting an UNDEF, just return the original vector.
26111 if (N1.isUndef())
26112 return N0;
26113
26114 // If this is an insert of an extracted vector into an undef vector, we can
26115 // just use the input to the extract if the types match, and can simplify
26116 // in some cases even if they don't.
26117 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26118 N1.getOperand(1) == N2) {
26119 EVT SrcVT = N1.getOperand(0).getValueType();
26120 if (SrcVT == VT)
26121 return N1.getOperand(0);
26122 // TODO: To remove the zero check, need to adjust the offset to
26123 // a multiple of the new src type.
26124 if (isNullConstant(N2) &&
26125 VT.isScalableVector() == SrcVT.isScalableVector()) {
26127 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26128 VT, N0, N1.getOperand(0), N2);
26129 else
26131 VT, N1.getOperand(0), N2);
26132 }
26133 }
26134
26135 // Handle case where we've ended up inserting back into the source vector
26136 // we extracted the subvector from.
26137 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26138 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26139 N1.getOperand(1) == N2)
26140 return N0;
26141
26142 // Simplify scalar inserts into an undef vector:
26143 // insert_subvector undef, (splat X), N2 -> splat X
26144 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26145 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26146 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26147
26148 // If we are inserting a bitcast value into an undef, with the same
26149 // number of elements, just use the bitcast input of the extract.
26150 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26151 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26152 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26154 N1.getOperand(0).getOperand(1) == N2 &&
26156 VT.getVectorElementCount() &&
26158 VT.getSizeInBits()) {
26159 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26160 }
26161
26162 // If both N1 and N2 are bitcast values on which insert_subvector
26163 // would makes sense, pull the bitcast through.
26164 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26165 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26166 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26167 SDValue CN0 = N0.getOperand(0);
26168 SDValue CN1 = N1.getOperand(0);
26169 EVT CN0VT = CN0.getValueType();
26170 EVT CN1VT = CN1.getValueType();
26171 if (CN0VT.isVector() && CN1VT.isVector() &&
26172 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26174 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26175 CN0.getValueType(), CN0, CN1, N2);
26176 return DAG.getBitcast(VT, NewINSERT);
26177 }
26178 }
26179
26180 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26181 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26182 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26183 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26184 N0.getOperand(1).getValueType() == N1.getValueType() &&
26185 N0.getOperand(2) == N2)
26186 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26187 N1, N2);
26188
26189 // Eliminate an intermediate insert into an undef vector:
26190 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26191 // insert_subvector undef, X, 0
26192 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26193 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26194 isNullConstant(N2))
26195 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26196 N1.getOperand(1), N2);
26197
26198 // Push subvector bitcasts to the output, adjusting the index as we go.
26199 // insert_subvector(bitcast(v), bitcast(s), c1)
26200 // -> bitcast(insert_subvector(v, s, c2))
26201 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26202 N1.getOpcode() == ISD::BITCAST) {
26203 SDValue N0Src = peekThroughBitcasts(N0);
26204 SDValue N1Src = peekThroughBitcasts(N1);
26205 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26206 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26207 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26208 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26209 EVT NewVT;
26210 SDLoc DL(N);
26211 SDValue NewIdx;
26212 LLVMContext &Ctx = *DAG.getContext();
26213 ElementCount NumElts = VT.getVectorElementCount();
26214 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26215 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26216 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26217 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26218 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26219 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26220 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26221 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26222 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26223 NumElts.divideCoefficientBy(Scale));
26224 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26225 }
26226 }
26227 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26228 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26229 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26230 return DAG.getBitcast(VT, Res);
26231 }
26232 }
26233 }
26234
26235 // Canonicalize insert_subvector dag nodes.
26236 // Example:
26237 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26238 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26239 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26240 N1.getValueType() == N0.getOperand(1).getValueType()) {
26241 unsigned OtherIdx = N0.getConstantOperandVal(2);
26242 if (InsIdx < OtherIdx) {
26243 // Swap nodes.
26244 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26245 N0.getOperand(0), N1, N2);
26246 AddToWorklist(NewOp.getNode());
26247 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26248 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26249 }
26250 }
26251
26252 // If the input vector is a concatenation, and the insert replaces
26253 // one of the pieces, we can optimize into a single concat_vectors.
26254 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26255 N0.getOperand(0).getValueType() == N1.getValueType() &&
26258 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26259 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26260 Ops[InsIdx / Factor] = N1;
26261 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26262 }
26263
26264 // Simplify source operands based on insertion.
26266 return SDValue(N, 0);
26267
26268 return SDValue();
26269}
26270
26271SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26272 SDValue N0 = N->getOperand(0);
26273
26274 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26275 if (N0->getOpcode() == ISD::FP16_TO_FP)
26276 return N0->getOperand(0);
26277
26278 return SDValue();
26279}
26280
26281SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26282 auto Op = N->getOpcode();
26284 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26285 SDValue N0 = N->getOperand(0);
26286
26287 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26288 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26289 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26291 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26292 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26293 }
26294 }
26295
26296 return SDValue();
26297}
26298
26299SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26300 SDValue N0 = N->getOperand(0);
26301
26302 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26303 if (N0->getOpcode() == ISD::BF16_TO_FP)
26304 return N0->getOperand(0);
26305
26306 return SDValue();
26307}
26308
26309SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26310 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26311 return visitFP16_TO_FP(N);
26312}
26313
26314SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26315 SDValue N0 = N->getOperand(0);
26316 EVT VT = N0.getValueType();
26317 unsigned Opcode = N->getOpcode();
26318
26319 // VECREDUCE over 1-element vector is just an extract.
26320 if (VT.getVectorElementCount().isScalar()) {
26321 SDLoc dl(N);
26322 SDValue Res =
26324 DAG.getVectorIdxConstant(0, dl));
26325 if (Res.getValueType() != N->getValueType(0))
26326 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26327 return Res;
26328 }
26329
26330 // On an boolean vector an and/or reduction is the same as a umin/umax
26331 // reduction. Convert them if the latter is legal while the former isn't.
26332 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26333 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26335 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26336 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26338 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26339 }
26340
26341 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26342 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26343 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26344 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26345 SDValue Vec = N0.getOperand(0);
26346 SDValue Subvec = N0.getOperand(1);
26347 if ((Opcode == ISD::VECREDUCE_OR &&
26348 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26349 (Opcode == ISD::VECREDUCE_AND &&
26350 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26351 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26352 }
26353
26354 return SDValue();
26355}
26356
26357SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26358 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26359
26360 // FSUB -> FMA combines:
26361 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26362 AddToWorklist(Fused.getNode());
26363 return Fused;
26364 }
26365 return SDValue();
26366}
26367
26368SDValue DAGCombiner::visitVPOp(SDNode *N) {
26369
26370 if (N->getOpcode() == ISD::VP_GATHER)
26371 if (SDValue SD = visitVPGATHER(N))
26372 return SD;
26373
26374 if (N->getOpcode() == ISD::VP_SCATTER)
26375 if (SDValue SD = visitVPSCATTER(N))
26376 return SD;
26377
26378 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26379 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26380 return SD;
26381
26382 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26383 if (SDValue SD = visitVP_STRIDED_STORE(N))
26384 return SD;
26385
26386 // VP operations in which all vector elements are disabled - either by
26387 // determining that the mask is all false or that the EVL is 0 - can be
26388 // eliminated.
26389 bool AreAllEltsDisabled = false;
26390 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26391 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26392 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26393 AreAllEltsDisabled |=
26394 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26395
26396 // This is the only generic VP combine we support for now.
26397 if (!AreAllEltsDisabled) {
26398 switch (N->getOpcode()) {
26399 case ISD::VP_FADD:
26400 return visitVP_FADD(N);
26401 case ISD::VP_FSUB:
26402 return visitVP_FSUB(N);
26403 case ISD::VP_FMA:
26404 return visitFMA<VPMatchContext>(N);
26405 case ISD::VP_SELECT:
26406 return visitVP_SELECT(N);
26407 }
26408 return SDValue();
26409 }
26410
26411 // Binary operations can be replaced by UNDEF.
26412 if (ISD::isVPBinaryOp(N->getOpcode()))
26413 return DAG.getUNDEF(N->getValueType(0));
26414
26415 // VP Memory operations can be replaced by either the chain (stores) or the
26416 // chain + undef (loads).
26417 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26418 if (MemSD->writeMem())
26419 return MemSD->getChain();
26420 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26421 }
26422
26423 // Reduction operations return the start operand when no elements are active.
26424 if (ISD::isVPReduction(N->getOpcode()))
26425 return N->getOperand(0);
26426
26427 return SDValue();
26428}
26429
26430SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26431 SDValue Chain = N->getOperand(0);
26432 SDValue Ptr = N->getOperand(1);
26433 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26434
26435 // Check if the memory, where FP state is written to, is used only in a single
26436 // load operation.
26437 LoadSDNode *LdNode = nullptr;
26438 for (auto *U : Ptr->uses()) {
26439 if (U == N)
26440 continue;
26441 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26442 if (LdNode && LdNode != Ld)
26443 return SDValue();
26444 LdNode = Ld;
26445 continue;
26446 }
26447 return SDValue();
26448 }
26449 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26450 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26452 return SDValue();
26453
26454 // Check if the loaded value is used only in a store operation.
26455 StoreSDNode *StNode = nullptr;
26456 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26457 SDUse &U = I.getUse();
26458 if (U.getResNo() == 0) {
26459 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26460 if (StNode)
26461 return SDValue();
26462 StNode = St;
26463 } else {
26464 return SDValue();
26465 }
26466 }
26467 }
26468 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26469 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26470 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26471 return SDValue();
26472
26473 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26474 // environment.
26475 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26476 StNode->getMemOperand());
26477 CombineTo(StNode, Res, false);
26478 return Res;
26479}
26480
26481SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26482 SDValue Chain = N->getOperand(0);
26483 SDValue Ptr = N->getOperand(1);
26484 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26485
26486 // Check if the address of FP state is used also in a store operation only.
26487 StoreSDNode *StNode = nullptr;
26488 for (auto *U : Ptr->uses()) {
26489 if (U == N)
26490 continue;
26491 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26492 if (StNode && StNode != St)
26493 return SDValue();
26494 StNode = St;
26495 continue;
26496 }
26497 return SDValue();
26498 }
26499 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26500 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26501 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26502 return SDValue();
26503
26504 // Check if the stored value is loaded from some location and the loaded
26505 // value is used only in the store operation.
26506 SDValue StValue = StNode->getValue();
26507 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26508 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26509 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26510 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26511 return SDValue();
26512
26513 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26514 // environment.
26515 SDValue Res =
26516 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26517 LdNode->getMemOperand());
26518 return Res;
26519}
26520
26521/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26522/// with the destination vector and a zero vector.
26523/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26524/// vector_shuffle V, Zero, <0, 4, 2, 4>
26525SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26526 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26527
26528 EVT VT = N->getValueType(0);
26529 SDValue LHS = N->getOperand(0);
26530 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26531 SDLoc DL(N);
26532
26533 // Make sure we're not running after operation legalization where it
26534 // may have custom lowered the vector shuffles.
26535 if (LegalOperations)
26536 return SDValue();
26537
26538 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26539 return SDValue();
26540
26541 EVT RVT = RHS.getValueType();
26542 unsigned NumElts = RHS.getNumOperands();
26543
26544 // Attempt to create a valid clear mask, splitting the mask into
26545 // sub elements and checking to see if each is
26546 // all zeros or all ones - suitable for shuffle masking.
26547 auto BuildClearMask = [&](int Split) {
26548 int NumSubElts = NumElts * Split;
26549 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26550
26551 SmallVector<int, 8> Indices;
26552 for (int i = 0; i != NumSubElts; ++i) {
26553 int EltIdx = i / Split;
26554 int SubIdx = i % Split;
26555 SDValue Elt = RHS.getOperand(EltIdx);
26556 // X & undef --> 0 (not undef). So this lane must be converted to choose
26557 // from the zero constant vector (same as if the element had all 0-bits).
26558 if (Elt.isUndef()) {
26559 Indices.push_back(i + NumSubElts);
26560 continue;
26561 }
26562
26563 APInt Bits;
26564 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26565 Bits = Cst->getAPIntValue();
26566 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26567 Bits = CstFP->getValueAPF().bitcastToAPInt();
26568 else
26569 return SDValue();
26570
26571 // Extract the sub element from the constant bit mask.
26572 if (DAG.getDataLayout().isBigEndian())
26573 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26574 else
26575 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26576
26577 if (Bits.isAllOnes())
26578 Indices.push_back(i);
26579 else if (Bits == 0)
26580 Indices.push_back(i + NumSubElts);
26581 else
26582 return SDValue();
26583 }
26584
26585 // Let's see if the target supports this vector_shuffle.
26586 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26587 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26588 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26589 return SDValue();
26590
26591 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26592 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26593 DAG.getBitcast(ClearVT, LHS),
26594 Zero, Indices));
26595 };
26596
26597 // Determine maximum split level (byte level masking).
26598 int MaxSplit = 1;
26599 if (RVT.getScalarSizeInBits() % 8 == 0)
26600 MaxSplit = RVT.getScalarSizeInBits() / 8;
26601
26602 for (int Split = 1; Split <= MaxSplit; ++Split)
26603 if (RVT.getScalarSizeInBits() % Split == 0)
26604 if (SDValue S = BuildClearMask(Split))
26605 return S;
26606
26607 return SDValue();
26608}
26609
26610/// If a vector binop is performed on splat values, it may be profitable to
26611/// extract, scalarize, and insert/splat.
26613 const SDLoc &DL) {
26614 SDValue N0 = N->getOperand(0);
26615 SDValue N1 = N->getOperand(1);
26616 unsigned Opcode = N->getOpcode();
26617 EVT VT = N->getValueType(0);
26618 EVT EltVT = VT.getVectorElementType();
26619 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26620
26621 // TODO: Remove/replace the extract cost check? If the elements are available
26622 // as scalars, then there may be no extract cost. Should we ask if
26623 // inserting a scalar back into a vector is cheap instead?
26624 int Index0, Index1;
26625 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26626 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26627 // Extract element from splat_vector should be free.
26628 // TODO: use DAG.isSplatValue instead?
26629 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26631 if (!Src0 || !Src1 || Index0 != Index1 ||
26632 Src0.getValueType().getVectorElementType() != EltVT ||
26633 Src1.getValueType().getVectorElementType() != EltVT ||
26634 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26635 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26636 return SDValue();
26637
26638 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26639 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26640 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26641 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26642
26643 // If all lanes but 1 are undefined, no need to splat the scalar result.
26644 // TODO: Keep track of undefs and use that info in the general case.
26645 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26646 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26647 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26648 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26649 // build_vec ..undef, (bo X, Y), undef...
26651 Ops[Index0] = ScalarBO;
26652 return DAG.getBuildVector(VT, DL, Ops);
26653 }
26654
26655 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26656 return DAG.getSplat(VT, DL, ScalarBO);
26657}
26658
26659/// Visit a vector cast operation, like FP_EXTEND.
26660SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26661 EVT VT = N->getValueType(0);
26662 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26663 EVT EltVT = VT.getVectorElementType();
26664 unsigned Opcode = N->getOpcode();
26665
26666 SDValue N0 = N->getOperand(0);
26667 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26668
26669 // TODO: promote operation might be also good here?
26670 int Index0;
26671 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26672 if (Src0 &&
26673 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26674 TLI.isExtractVecEltCheap(VT, Index0)) &&
26675 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26676 TLI.preferScalarizeSplat(N)) {
26677 EVT SrcVT = N0.getValueType();
26678 EVT SrcEltVT = SrcVT.getVectorElementType();
26679 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26680 SDValue Elt =
26681 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26682 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26683 if (VT.isScalableVector())
26684 return DAG.getSplatVector(VT, DL, ScalarBO);
26686 return DAG.getBuildVector(VT, DL, Ops);
26687 }
26688
26689 return SDValue();
26690}
26691
26692/// Visit a binary vector operation, like ADD.
26693SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26694 EVT VT = N->getValueType(0);
26695 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26696
26697 SDValue LHS = N->getOperand(0);
26698 SDValue RHS = N->getOperand(1);
26699 unsigned Opcode = N->getOpcode();
26700 SDNodeFlags Flags = N->getFlags();
26701
26702 // Move unary shuffles with identical masks after a vector binop:
26703 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26704 // --> shuffle (VBinOp A, B), Undef, Mask
26705 // This does not require type legality checks because we are creating the
26706 // same types of operations that are in the original sequence. We do have to
26707 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26708 // though. This code is adapted from the identical transform in instcombine.
26709 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26710 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26711 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26712 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26713 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26714 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26715 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26716 RHS.getOperand(0), Flags);
26717 SDValue UndefV = LHS.getOperand(1);
26718 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26719 }
26720
26721 // Try to sink a splat shuffle after a binop with a uniform constant.
26722 // This is limited to cases where neither the shuffle nor the constant have
26723 // undefined elements because that could be poison-unsafe or inhibit
26724 // demanded elements analysis. It is further limited to not change a splat
26725 // of an inserted scalar because that may be optimized better by
26726 // load-folding or other target-specific behaviors.
26727 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26728 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26729 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26730 // binop (splat X), (splat C) --> splat (binop X, C)
26731 SDValue X = Shuf0->getOperand(0);
26732 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26733 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26734 Shuf0->getMask());
26735 }
26736 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
26737 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
26738 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26739 // binop (splat C), (splat X) --> splat (binop C, X)
26740 SDValue X = Shuf1->getOperand(0);
26741 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
26742 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26743 Shuf1->getMask());
26744 }
26745 }
26746
26747 // The following pattern is likely to emerge with vector reduction ops. Moving
26748 // the binary operation ahead of insertion may allow using a narrower vector
26749 // instruction that has better performance than the wide version of the op:
26750 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
26751 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
26752 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
26753 LHS.getOperand(2) == RHS.getOperand(2) &&
26754 (LHS.hasOneUse() || RHS.hasOneUse())) {
26755 SDValue X = LHS.getOperand(1);
26756 SDValue Y = RHS.getOperand(1);
26757 SDValue Z = LHS.getOperand(2);
26758 EVT NarrowVT = X.getValueType();
26759 if (NarrowVT == Y.getValueType() &&
26760 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
26761 LegalOperations)) {
26762 // (binop undef, undef) may not return undef, so compute that result.
26763 SDValue VecC =
26764 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
26765 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
26766 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
26767 }
26768 }
26769
26770 // Make sure all but the first op are undef or constant.
26771 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
26772 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
26773 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
26774 return Op.isUndef() ||
26775 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
26776 });
26777 };
26778
26779 // The following pattern is likely to emerge with vector reduction ops. Moving
26780 // the binary operation ahead of the concat may allow using a narrower vector
26781 // instruction that has better performance than the wide version of the op:
26782 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
26783 // concat (VBinOp X, Y), VecC
26784 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
26785 (LHS.hasOneUse() || RHS.hasOneUse())) {
26786 EVT NarrowVT = LHS.getOperand(0).getValueType();
26787 if (NarrowVT == RHS.getOperand(0).getValueType() &&
26788 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
26789 unsigned NumOperands = LHS.getNumOperands();
26790 SmallVector<SDValue, 4> ConcatOps;
26791 for (unsigned i = 0; i != NumOperands; ++i) {
26792 // This constant fold for operands 1 and up.
26793 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
26794 RHS.getOperand(i)));
26795 }
26796
26797 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
26798 }
26799 }
26800
26801 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
26802 return V;
26803
26804 return SDValue();
26805}
26806
26807SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
26808 SDValue N2) {
26809 assert(N0.getOpcode() == ISD::SETCC &&
26810 "First argument must be a SetCC node!");
26811
26812 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
26813 cast<CondCodeSDNode>(N0.getOperand(2))->get());
26814
26815 // If we got a simplified select_cc node back from SimplifySelectCC, then
26816 // break it down into a new SETCC node, and a new SELECT node, and then return
26817 // the SELECT node, since we were called with a SELECT node.
26818 if (SCC.getNode()) {
26819 // Check to see if we got a select_cc back (to turn into setcc/select).
26820 // Otherwise, just return whatever node we got back, like fabs.
26821 if (SCC.getOpcode() == ISD::SELECT_CC) {
26822 const SDNodeFlags Flags = N0->getFlags();
26824 N0.getValueType(),
26825 SCC.getOperand(0), SCC.getOperand(1),
26826 SCC.getOperand(4), Flags);
26827 AddToWorklist(SETCC.getNode());
26828 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
26829 SCC.getOperand(2), SCC.getOperand(3));
26830 SelectNode->setFlags(Flags);
26831 return SelectNode;
26832 }
26833
26834 return SCC;
26835 }
26836 return SDValue();
26837}
26838
26839/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
26840/// being selected between, see if we can simplify the select. Callers of this
26841/// should assume that TheSelect is deleted if this returns true. As such, they
26842/// should return the appropriate thing (e.g. the node) back to the top-level of
26843/// the DAG combiner loop to avoid it being looked at.
26844bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
26845 SDValue RHS) {
26846 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26847 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
26848 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
26849 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
26850 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
26851 SDValue Sqrt = RHS;
26853 SDValue CmpLHS;
26854 const ConstantFPSDNode *Zero = nullptr;
26855
26856 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
26857 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
26858 CmpLHS = TheSelect->getOperand(0);
26859 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
26860 } else {
26861 // SELECT or VSELECT
26862 SDValue Cmp = TheSelect->getOperand(0);
26863 if (Cmp.getOpcode() == ISD::SETCC) {
26864 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
26865 CmpLHS = Cmp.getOperand(0);
26866 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
26867 }
26868 }
26869 if (Zero && Zero->isZero() &&
26870 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
26871 CC == ISD::SETULT || CC == ISD::SETLT)) {
26872 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26873 CombineTo(TheSelect, Sqrt);
26874 return true;
26875 }
26876 }
26877 }
26878 // Cannot simplify select with vector condition
26879 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
26880
26881 // If this is a select from two identical things, try to pull the operation
26882 // through the select.
26883 if (LHS.getOpcode() != RHS.getOpcode() ||
26884 !LHS.hasOneUse() || !RHS.hasOneUse())
26885 return false;
26886
26887 // If this is a load and the token chain is identical, replace the select
26888 // of two loads with a load through a select of the address to load from.
26889 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
26890 // constants have been dropped into the constant pool.
26891 if (LHS.getOpcode() == ISD::LOAD) {
26892 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
26893 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
26894
26895 // Token chains must be identical.
26896 if (LHS.getOperand(0) != RHS.getOperand(0) ||
26897 // Do not let this transformation reduce the number of volatile loads.
26898 // Be conservative for atomics for the moment
26899 // TODO: This does appear to be legal for unordered atomics (see D66309)
26900 !LLD->isSimple() || !RLD->isSimple() ||
26901 // FIXME: If either is a pre/post inc/dec load,
26902 // we'd need to split out the address adjustment.
26903 LLD->isIndexed() || RLD->isIndexed() ||
26904 // If this is an EXTLOAD, the VT's must match.
26905 LLD->getMemoryVT() != RLD->getMemoryVT() ||
26906 // If this is an EXTLOAD, the kind of extension must match.
26907 (LLD->getExtensionType() != RLD->getExtensionType() &&
26908 // The only exception is if one of the extensions is anyext.
26909 LLD->getExtensionType() != ISD::EXTLOAD &&
26910 RLD->getExtensionType() != ISD::EXTLOAD) ||
26911 // FIXME: this discards src value information. This is
26912 // over-conservative. It would be beneficial to be able to remember
26913 // both potential memory locations. Since we are discarding
26914 // src value info, don't do the transformation if the memory
26915 // locations are not in the default address space.
26916 LLD->getPointerInfo().getAddrSpace() != 0 ||
26917 RLD->getPointerInfo().getAddrSpace() != 0 ||
26918 // We can't produce a CMOV of a TargetFrameIndex since we won't
26919 // generate the address generation required.
26922 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
26923 LLD->getBasePtr().getValueType()))
26924 return false;
26925
26926 // The loads must not depend on one another.
26927 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
26928 return false;
26929
26930 // Check that the select condition doesn't reach either load. If so,
26931 // folding this will induce a cycle into the DAG. If not, this is safe to
26932 // xform, so create a select of the addresses.
26933
26936
26937 // Always fail if LLD and RLD are not independent. TheSelect is a
26938 // predecessor to all Nodes in question so we need not search past it.
26939
26940 Visited.insert(TheSelect);
26941 Worklist.push_back(LLD);
26942 Worklist.push_back(RLD);
26943
26944 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
26945 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
26946 return false;
26947
26948 SDValue Addr;
26949 if (TheSelect->getOpcode() == ISD::SELECT) {
26950 // We cannot do this optimization if any pair of {RLD, LLD} is a
26951 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
26952 // Loads, we only need to check if CondNode is a successor to one of the
26953 // loads. We can further avoid this if there's no use of their chain
26954 // value.
26955 SDNode *CondNode = TheSelect->getOperand(0).getNode();
26956 Worklist.push_back(CondNode);
26957
26958 if ((LLD->hasAnyUseOfValue(1) &&
26959 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
26960 (RLD->hasAnyUseOfValue(1) &&
26961 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
26962 return false;
26963
26964 Addr = DAG.getSelect(SDLoc(TheSelect),
26965 LLD->getBasePtr().getValueType(),
26966 TheSelect->getOperand(0), LLD->getBasePtr(),
26967 RLD->getBasePtr());
26968 } else { // Otherwise SELECT_CC
26969 // We cannot do this optimization if any pair of {RLD, LLD} is a
26970 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
26971 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
26972 // one of the loads. We can further avoid this if there's no use of their
26973 // chain value.
26974
26975 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
26976 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
26977 Worklist.push_back(CondLHS);
26978 Worklist.push_back(CondRHS);
26979
26980 if ((LLD->hasAnyUseOfValue(1) &&
26981 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
26982 (RLD->hasAnyUseOfValue(1) &&
26983 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
26984 return false;
26985
26986 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
26987 LLD->getBasePtr().getValueType(),
26988 TheSelect->getOperand(0),
26989 TheSelect->getOperand(1),
26990 LLD->getBasePtr(), RLD->getBasePtr(),
26991 TheSelect->getOperand(4));
26992 }
26993
26994 SDValue Load;
26995 // It is safe to replace the two loads if they have different alignments,
26996 // but the new load must be the minimum (most restrictive) alignment of the
26997 // inputs.
26998 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
26999 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27000 if (!RLD->isInvariant())
27001 MMOFlags &= ~MachineMemOperand::MOInvariant;
27002 if (!RLD->isDereferenceable())
27003 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27004 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27005 // FIXME: Discards pointer and AA info.
27006 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27007 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27008 MMOFlags);
27009 } else {
27010 // FIXME: Discards pointer and AA info.
27011 Load = DAG.getExtLoad(
27013 : LLD->getExtensionType(),
27014 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27015 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27016 }
27017
27018 // Users of the select now use the result of the load.
27019 CombineTo(TheSelect, Load);
27020
27021 // Users of the old loads now use the new load's chain. We know the
27022 // old-load value is dead now.
27023 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27024 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27025 return true;
27026 }
27027
27028 return false;
27029}
27030
27031/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27032/// bitwise 'and'.
27033SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27034 SDValue N1, SDValue N2, SDValue N3,
27035 ISD::CondCode CC) {
27036 // If this is a select where the false operand is zero and the compare is a
27037 // check of the sign bit, see if we can perform the "gzip trick":
27038 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27039 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27040 EVT XType = N0.getValueType();
27041 EVT AType = N2.getValueType();
27042 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27043 return SDValue();
27044
27045 // If the comparison is testing for a positive value, we have to invert
27046 // the sign bit mask, so only do that transform if the target has a bitwise
27047 // 'and not' instruction (the invert is free).
27048 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27049 // (X > -1) ? A : 0
27050 // (X > 0) ? X : 0 <-- This is canonical signed max.
27051 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27052 return SDValue();
27053 } else if (CC == ISD::SETLT) {
27054 // (X < 0) ? A : 0
27055 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27056 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27057 return SDValue();
27058 } else {
27059 return SDValue();
27060 }
27061
27062 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27063 // constant.
27064 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27065 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27066 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27067 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27068 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27069 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27070 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27071 AddToWorklist(Shift.getNode());
27072
27073 if (XType.bitsGT(AType)) {
27074 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27075 AddToWorklist(Shift.getNode());
27076 }
27077
27078 if (CC == ISD::SETGT)
27079 Shift = DAG.getNOT(DL, Shift, AType);
27080
27081 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27082 }
27083 }
27084
27085 unsigned ShCt = XType.getSizeInBits() - 1;
27086 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27087 return SDValue();
27088
27089 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27090 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27091 AddToWorklist(Shift.getNode());
27092
27093 if (XType.bitsGT(AType)) {
27094 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27095 AddToWorklist(Shift.getNode());
27096 }
27097
27098 if (CC == ISD::SETGT)
27099 Shift = DAG.getNOT(DL, Shift, AType);
27100
27101 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27102}
27103
27104// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27105SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27106 SDValue N0 = N->getOperand(0);
27107 SDValue N1 = N->getOperand(1);
27108 SDValue N2 = N->getOperand(2);
27109 SDLoc DL(N);
27110
27111 unsigned BinOpc = N1.getOpcode();
27112 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27113 (N1.getResNo() != N2.getResNo()))
27114 return SDValue();
27115
27116 // The use checks are intentionally on SDNode because we may be dealing
27117 // with opcodes that produce more than one SDValue.
27118 // TODO: Do we really need to check N0 (the condition operand of the select)?
27119 // But removing that clause could cause an infinite loop...
27120 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27121 return SDValue();
27122
27123 // Binops may include opcodes that return multiple values, so all values
27124 // must be created/propagated from the newly created binops below.
27125 SDVTList OpVTs = N1->getVTList();
27126
27127 // Fold select(cond, binop(x, y), binop(z, y))
27128 // --> binop(select(cond, x, z), y)
27129 if (N1.getOperand(1) == N2.getOperand(1)) {
27130 SDValue N10 = N1.getOperand(0);
27131 SDValue N20 = N2.getOperand(0);
27132 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27133 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27134 NewBinOp->setFlags(N1->getFlags());
27135 NewBinOp->intersectFlagsWith(N2->getFlags());
27136 return SDValue(NewBinOp.getNode(), N1.getResNo());
27137 }
27138
27139 // Fold select(cond, binop(x, y), binop(x, z))
27140 // --> binop(x, select(cond, y, z))
27141 if (N1.getOperand(0) == N2.getOperand(0)) {
27142 SDValue N11 = N1.getOperand(1);
27143 SDValue N21 = N2.getOperand(1);
27144 // Second op VT might be different (e.g. shift amount type)
27145 if (N11.getValueType() == N21.getValueType()) {
27146 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27147 SDValue NewBinOp =
27148 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27149 NewBinOp->setFlags(N1->getFlags());
27150 NewBinOp->intersectFlagsWith(N2->getFlags());
27151 return SDValue(NewBinOp.getNode(), N1.getResNo());
27152 }
27153 }
27154
27155 // TODO: Handle isCommutativeBinOp patterns as well?
27156 return SDValue();
27157}
27158
27159// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27160SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27161 SDValue N0 = N->getOperand(0);
27162 EVT VT = N->getValueType(0);
27163 bool IsFabs = N->getOpcode() == ISD::FABS;
27164 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27165
27166 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27167 return SDValue();
27168
27169 SDValue Int = N0.getOperand(0);
27170 EVT IntVT = Int.getValueType();
27171
27172 // The operand to cast should be integer.
27173 if (!IntVT.isInteger() || IntVT.isVector())
27174 return SDValue();
27175
27176 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27177 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27178 APInt SignMask;
27179 if (N0.getValueType().isVector()) {
27180 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27181 // 0x7f...) per element and splat it.
27183 if (IsFabs)
27184 SignMask = ~SignMask;
27185 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27186 } else {
27187 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27188 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27189 if (IsFabs)
27190 SignMask = ~SignMask;
27191 }
27192 SDLoc DL(N0);
27193 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27194 DAG.getConstant(SignMask, DL, IntVT));
27195 AddToWorklist(Int.getNode());
27196 return DAG.getBitcast(VT, Int);
27197}
27198
27199/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27200/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27201/// in it. This may be a win when the constant is not otherwise available
27202/// because it replaces two constant pool loads with one.
27203SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27204 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27205 ISD::CondCode CC) {
27207 return SDValue();
27208
27209 // If we are before legalize types, we want the other legalization to happen
27210 // first (for example, to avoid messing with soft float).
27211 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27212 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27213 EVT VT = N2.getValueType();
27214 if (!TV || !FV || !TLI.isTypeLegal(VT))
27215 return SDValue();
27216
27217 // If a constant can be materialized without loads, this does not make sense.
27219 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27220 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27221 return SDValue();
27222
27223 // If both constants have multiple uses, then we won't need to do an extra
27224 // load. The values are likely around in registers for other users.
27225 if (!TV->hasOneUse() && !FV->hasOneUse())
27226 return SDValue();
27227
27228 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27229 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27230 Type *FPTy = Elts[0]->getType();
27231 const DataLayout &TD = DAG.getDataLayout();
27232
27233 // Create a ConstantArray of the two constants.
27234 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27235 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27236 TD.getPrefTypeAlign(FPTy));
27237 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27238
27239 // Get offsets to the 0 and 1 elements of the array, so we can select between
27240 // them.
27241 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27242 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27243 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27244 SDValue Cond =
27245 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27246 AddToWorklist(Cond.getNode());
27247 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27248 AddToWorklist(CstOffset.getNode());
27249 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27250 AddToWorklist(CPIdx.getNode());
27251 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27253 DAG.getMachineFunction()), Alignment);
27254}
27255
27256/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27257/// where 'cond' is the comparison specified by CC.
27258SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27260 bool NotExtCompare) {
27261 // (x ? y : y) -> y.
27262 if (N2 == N3) return N2;
27263
27264 EVT CmpOpVT = N0.getValueType();
27265 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27266 EVT VT = N2.getValueType();
27267 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27268 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27269 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27270
27271 // Determine if the condition we're dealing with is constant.
27272 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27273 AddToWorklist(SCC.getNode());
27274 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27275 // fold select_cc true, x, y -> x
27276 // fold select_cc false, x, y -> y
27277 return !(SCCC->isZero()) ? N2 : N3;
27278 }
27279 }
27280
27281 if (SDValue V =
27282 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27283 return V;
27284
27285 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27286 return V;
27287
27288 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27289 // where y is has a single bit set.
27290 // A plaintext description would be, we can turn the SELECT_CC into an AND
27291 // when the condition can be materialized as an all-ones register. Any
27292 // single bit-test can be materialized as an all-ones register with
27293 // shift-left and shift-right-arith.
27294 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27295 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27296 SDValue AndLHS = N0->getOperand(0);
27297 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27298 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27299 // Shift the tested bit over the sign bit.
27300 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27301 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27302 unsigned ShCt = AndMask.getBitWidth() - 1;
27303 SDValue ShlAmt =
27304 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27305 getShiftAmountTy(AndLHS.getValueType()));
27306 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27307
27308 // Now arithmetic right shift it all the way over, so the result is
27309 // either all-ones, or zero.
27310 SDValue ShrAmt =
27311 DAG.getConstant(ShCt, SDLoc(Shl),
27313 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27314
27315 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27316 }
27317 }
27318 }
27319
27320 // fold select C, 16, 0 -> shl C, 4
27321 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27322 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27323
27324 if ((Fold || Swap) &&
27325 TLI.getBooleanContents(CmpOpVT) ==
27327 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27328
27329 if (Swap) {
27330 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27331 std::swap(N2C, N3C);
27332 }
27333
27334 // If the caller doesn't want us to simplify this into a zext of a compare,
27335 // don't do it.
27336 if (NotExtCompare && N2C->isOne())
27337 return SDValue();
27338
27339 SDValue Temp, SCC;
27340 // zext (setcc n0, n1)
27341 if (LegalTypes) {
27342 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27343 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27344 } else {
27345 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27346 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27347 }
27348
27349 AddToWorklist(SCC.getNode());
27350 AddToWorklist(Temp.getNode());
27351
27352 if (N2C->isOne())
27353 return Temp;
27354
27355 unsigned ShCt = N2C->getAPIntValue().logBase2();
27356 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27357 return SDValue();
27358
27359 // shl setcc result by log2 n2c
27360 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27361 DAG.getConstant(ShCt, SDLoc(Temp),
27363 }
27364
27365 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27366 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27367 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27368 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27369 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27370 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27371 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27372 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27373 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27374 SDValue ValueOnZero = N2;
27375 SDValue Count = N3;
27376 // If the condition is NE instead of E, swap the operands.
27377 if (CC == ISD::SETNE)
27378 std::swap(ValueOnZero, Count);
27379 // Check if the value on zero is a constant equal to the bits in the type.
27380 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27381 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27382 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27383 // legal, combine to just cttz.
27384 if ((Count.getOpcode() == ISD::CTTZ ||
27385 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27386 N0 == Count.getOperand(0) &&
27387 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27388 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27389 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27390 // legal, combine to just ctlz.
27391 if ((Count.getOpcode() == ISD::CTLZ ||
27392 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27393 N0 == Count.getOperand(0) &&
27394 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27395 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27396 }
27397 }
27398 }
27399
27400 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27401 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27402 if (!NotExtCompare && N1C && N2C && N3C &&
27403 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27404 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27405 (N1C->isZero() && CC == ISD::SETLT)) &&
27406 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27407 SDValue ASR = DAG.getNode(
27408 ISD::SRA, DL, CmpOpVT, N0,
27409 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27410 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27411 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27412 }
27413
27414 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27415 return S;
27416 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27417 return S;
27418
27419 return SDValue();
27420}
27421
27422/// This is a stub for TargetLowering::SimplifySetCC.
27423SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27424 ISD::CondCode Cond, const SDLoc &DL,
27425 bool foldBooleans) {
27427 DagCombineInfo(DAG, Level, false, this);
27428 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27429}
27430
27431/// Given an ISD::SDIV node expressing a divide by constant, return
27432/// a DAG expression to select that will generate the same value by multiplying
27433/// by a magic number.
27434/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27435SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27436 // when optimising for minimum size, we don't want to expand a div to a mul
27437 // and a shift.
27439 return SDValue();
27440
27442 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27443 for (SDNode *N : Built)
27444 AddToWorklist(N);
27445 return S;
27446 }
27447
27448 return SDValue();
27449}
27450
27451/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27452/// DAG expression that will generate the same value by right shifting.
27453SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27454 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27455 if (!C)
27456 return SDValue();
27457
27458 // Avoid division by zero.
27459 if (C->isZero())
27460 return SDValue();
27461
27463 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27464 for (SDNode *N : Built)
27465 AddToWorklist(N);
27466 return S;
27467 }
27468
27469 return SDValue();
27470}
27471
27472/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27473/// expression that will generate the same value by multiplying by a magic
27474/// number.
27475/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27476SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27477 // when optimising for minimum size, we don't want to expand a div to a mul
27478 // and a shift.
27480 return SDValue();
27481
27483 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27484 for (SDNode *N : Built)
27485 AddToWorklist(N);
27486 return S;
27487 }
27488
27489 return SDValue();
27490}
27491
27492/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27493/// return a DAG expression that will generate the same value.
27494SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27495 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27496 if (!C)
27497 return SDValue();
27498
27499 // Avoid division by zero.
27500 if (C->isZero())
27501 return SDValue();
27502
27504 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27505 for (SDNode *N : Built)
27506 AddToWorklist(N);
27507 return S;
27508 }
27509
27510 return SDValue();
27511}
27512
27513// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27514//
27515// Returns the node that represents `Log2(Op)`. This may create a new node. If
27516// we are unable to compute `Log2(Op)` its return `SDValue()`.
27517//
27518// All nodes will be created at `DL` and the output will be of type `VT`.
27519//
27520// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27521// `AssumeNonZero` if this function should simply assume (not require proving
27522// `Op` is non-zero).
27524 SDValue Op, unsigned Depth,
27525 bool AssumeNonZero) {
27526 assert(VT.isInteger() && "Only integer types are supported!");
27527
27528 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27529 while (true) {
27530 switch (V.getOpcode()) {
27531 case ISD::TRUNCATE:
27532 case ISD::ZERO_EXTEND:
27533 V = V.getOperand(0);
27534 break;
27535 default:
27536 return V;
27537 }
27538 }
27539 };
27540
27541 if (VT.isScalableVector())
27542 return SDValue();
27543
27544 Op = PeekThroughCastsAndTrunc(Op);
27545
27546 // Helper for determining whether a value is a power-2 constant scalar or a
27547 // vector of such elements.
27548 SmallVector<APInt> Pow2Constants;
27549 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27550 if (C->isZero() || C->isOpaque())
27551 return false;
27552 // TODO: We may also be able to support negative powers of 2 here.
27553 if (C->getAPIntValue().isPowerOf2()) {
27554 Pow2Constants.emplace_back(C->getAPIntValue());
27555 return true;
27556 }
27557 return false;
27558 };
27559
27560 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27561 if (!VT.isVector())
27562 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27563 // We need to create a build vector
27564 SmallVector<SDValue> Log2Ops;
27565 for (const APInt &Pow2 : Pow2Constants)
27566 Log2Ops.emplace_back(
27567 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27568 return DAG.getBuildVector(VT, DL, Log2Ops);
27569 }
27570
27571 if (Depth >= DAG.MaxRecursionDepth)
27572 return SDValue();
27573
27574 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27575 ToCast = PeekThroughCastsAndTrunc(ToCast);
27576 EVT CurVT = ToCast.getValueType();
27577 if (NewVT == CurVT)
27578 return ToCast;
27579
27580 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27581 return DAG.getBitcast(NewVT, ToCast);
27582
27583 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27584 };
27585
27586 // log2(X << Y) -> log2(X) + Y
27587 if (Op.getOpcode() == ISD::SHL) {
27588 // 1 << Y and X nuw/nsw << Y are all non-zero.
27589 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27590 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27591 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27592 Depth + 1, AssumeNonZero))
27593 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27594 CastToVT(VT, Op.getOperand(1)));
27595 }
27596
27597 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27598 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27599 Op.hasOneUse()) {
27600 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27601 Depth + 1, AssumeNonZero))
27602 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27603 Depth + 1, AssumeNonZero))
27604 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27605 }
27606
27607 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27608 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27609 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27610 Op.hasOneUse()) {
27611 // Use AssumeNonZero as false here. Otherwise we can hit case where
27612 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27613 if (SDValue LogX =
27614 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27615 /*AssumeNonZero*/ false))
27616 if (SDValue LogY =
27617 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27618 /*AssumeNonZero*/ false))
27619 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27620 }
27621
27622 return SDValue();
27623}
27624
27625/// Determines the LogBase2 value for a non-null input value using the
27626/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27627SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27628 bool KnownNonZero, bool InexpensiveOnly,
27629 std::optional<EVT> OutVT) {
27630 EVT VT = OutVT ? *OutVT : V.getValueType();
27631 SDValue InexpensiveLogBase2 =
27632 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27633 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27634 return InexpensiveLogBase2;
27635
27636 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27637 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27638 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27639 return LogBase2;
27640}
27641
27642/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27643/// For the reciprocal, we need to find the zero of the function:
27644/// F(X) = 1/X - A [which has a zero at X = 1/A]
27645/// =>
27646/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27647/// does not require additional intermediate precision]
27648/// For the last iteration, put numerator N into it to gain more precision:
27649/// Result = N X_i + X_i (N - N A X_i)
27650SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27651 SDNodeFlags Flags) {
27652 if (LegalDAG)
27653 return SDValue();
27654
27655 // TODO: Handle extended types?
27656 EVT VT = Op.getValueType();
27657 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27658 VT.getScalarType() != MVT::f64)
27659 return SDValue();
27660
27661 // If estimates are explicitly disabled for this function, we're done.
27663 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27664 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27665 return SDValue();
27666
27667 // Estimates may be explicitly enabled for this type with a custom number of
27668 // refinement steps.
27669 int Iterations = TLI.getDivRefinementSteps(VT, MF);
27670 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27671 AddToWorklist(Est.getNode());
27672
27673 SDLoc DL(Op);
27674 if (Iterations) {
27675 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27676
27677 // Newton iterations: Est = Est + Est (N - Arg * Est)
27678 // If this is the last iteration, also multiply by the numerator.
27679 for (int i = 0; i < Iterations; ++i) {
27680 SDValue MulEst = Est;
27681
27682 if (i == Iterations - 1) {
27683 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27684 AddToWorklist(MulEst.getNode());
27685 }
27686
27687 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27688 AddToWorklist(NewEst.getNode());
27689
27690 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27691 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27692 AddToWorklist(NewEst.getNode());
27693
27694 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27695 AddToWorklist(NewEst.getNode());
27696
27697 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27698 AddToWorklist(Est.getNode());
27699 }
27700 } else {
27701 // If no iterations are available, multiply with N.
27702 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27703 AddToWorklist(Est.getNode());
27704 }
27705
27706 return Est;
27707 }
27708
27709 return SDValue();
27710}
27711
27712/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27713/// For the reciprocal sqrt, we need to find the zero of the function:
27714/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27715/// =>
27716/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27717/// As a result, we precompute A/2 prior to the iteration loop.
27718SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27719 unsigned Iterations,
27720 SDNodeFlags Flags, bool Reciprocal) {
27721 EVT VT = Arg.getValueType();
27722 SDLoc DL(Arg);
27723 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27724
27725 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27726 // this entire sequence requires only one FP constant.
27727 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27728 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27729
27730 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27731 for (unsigned i = 0; i < Iterations; ++i) {
27732 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27733 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27734 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
27735 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27736 }
27737
27738 // If non-reciprocal square root is requested, multiply the result by Arg.
27739 if (!Reciprocal)
27740 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
27741
27742 return Est;
27743}
27744
27745/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27746/// For the reciprocal sqrt, we need to find the zero of the function:
27747/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27748/// =>
27749/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
27750SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
27751 unsigned Iterations,
27752 SDNodeFlags Flags, bool Reciprocal) {
27753 EVT VT = Arg.getValueType();
27754 SDLoc DL(Arg);
27755 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
27756 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
27757
27758 // This routine must enter the loop below to work correctly
27759 // when (Reciprocal == false).
27760 assert(Iterations > 0);
27761
27762 // Newton iterations for reciprocal square root:
27763 // E = (E * -0.5) * ((A * E) * E + -3.0)
27764 for (unsigned i = 0; i < Iterations; ++i) {
27765 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
27766 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
27767 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
27768
27769 // When calculating a square root at the last iteration build:
27770 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
27771 // (notice a common subexpression)
27772 SDValue LHS;
27773 if (Reciprocal || (i + 1) < Iterations) {
27774 // RSQRT: LHS = (E * -0.5)
27775 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
27776 } else {
27777 // SQRT: LHS = (A * E) * -0.5
27778 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
27779 }
27780
27781 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
27782 }
27783
27784 return Est;
27785}
27786
27787/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
27788/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
27789/// Op can be zero.
27790SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
27791 bool Reciprocal) {
27792 if (LegalDAG)
27793 return SDValue();
27794
27795 // TODO: Handle extended types?
27796 EVT VT = Op.getValueType();
27797 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27798 VT.getScalarType() != MVT::f64)
27799 return SDValue();
27800
27801 // If estimates are explicitly disabled for this function, we're done.
27803 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
27804 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27805 return SDValue();
27806
27807 // Estimates may be explicitly enabled for this type with a custom number of
27808 // refinement steps.
27809 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
27810
27811 bool UseOneConstNR = false;
27812 if (SDValue Est =
27813 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
27814 Reciprocal)) {
27815 AddToWorklist(Est.getNode());
27816
27817 if (Iterations > 0)
27818 Est = UseOneConstNR
27819 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
27820 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
27821 if (!Reciprocal) {
27822 SDLoc DL(Op);
27823 // Try the target specific test first.
27824 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
27825
27826 // The estimate is now completely wrong if the input was exactly 0.0 or
27827 // possibly a denormal. Force the answer to 0.0 or value provided by
27828 // target for those cases.
27829 Est = DAG.getNode(
27830 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
27831 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
27832 }
27833 return Est;
27834 }
27835
27836 return SDValue();
27837}
27838
27839SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27840 return buildSqrtEstimateImpl(Op, Flags, true);
27841}
27842
27843SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27844 return buildSqrtEstimateImpl(Op, Flags, false);
27845}
27846
27847/// Return true if there is any possibility that the two addresses overlap.
27848bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
27849
27850 struct MemUseCharacteristics {
27851 bool IsVolatile;
27852 bool IsAtomic;
27854 int64_t Offset;
27855 LocationSize NumBytes;
27856 MachineMemOperand *MMO;
27857 };
27858
27859 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
27860 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
27861 int64_t Offset = 0;
27862 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
27863 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
27864 : (LSN->getAddressingMode() == ISD::PRE_DEC)
27865 ? -1 * C->getSExtValue()
27866 : 0;
27867 TypeSize Size = LSN->getMemoryVT().getStoreSize();
27868 return {LSN->isVolatile(), LSN->isAtomic(),
27869 LSN->getBasePtr(), Offset /*base offset*/,
27870 LocationSize::precise(Size), LSN->getMemOperand()};
27871 }
27872 if (const auto *LN = cast<LifetimeSDNode>(N))
27873 return {false /*isVolatile*/,
27874 /*isAtomic*/ false,
27875 LN->getOperand(1),
27876 (LN->hasOffset()) ? LN->getOffset() : 0,
27877 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
27879 (MachineMemOperand *)nullptr};
27880 // Default.
27881 return {false /*isvolatile*/,
27882 /*isAtomic*/ false,
27883 SDValue(),
27884 (int64_t)0 /*offset*/,
27886 (MachineMemOperand *)nullptr};
27887 };
27888
27889 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
27890 MUC1 = getCharacteristics(Op1);
27891
27892 // If they are to the same address, then they must be aliases.
27893 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
27894 MUC0.Offset == MUC1.Offset)
27895 return true;
27896
27897 // If they are both volatile then they cannot be reordered.
27898 if (MUC0.IsVolatile && MUC1.IsVolatile)
27899 return true;
27900
27901 // Be conservative about atomics for the moment
27902 // TODO: This is way overconservative for unordered atomics (see D66309)
27903 if (MUC0.IsAtomic && MUC1.IsAtomic)
27904 return true;
27905
27906 if (MUC0.MMO && MUC1.MMO) {
27907 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
27908 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
27909 return false;
27910 }
27911
27912 // If NumBytes is scalable and offset is not 0, conservatively return may
27913 // alias
27914 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
27915 MUC0.Offset != 0) ||
27916 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
27917 MUC1.Offset != 0))
27918 return true;
27919 // Try to prove that there is aliasing, or that there is no aliasing. Either
27920 // way, we can return now. If nothing can be proved, proceed with more tests.
27921 bool IsAlias;
27922 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
27923 DAG, IsAlias))
27924 return IsAlias;
27925
27926 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
27927 // either are not known.
27928 if (!MUC0.MMO || !MUC1.MMO)
27929 return true;
27930
27931 // If one operation reads from invariant memory, and the other may store, they
27932 // cannot alias. These should really be checking the equivalent of mayWrite,
27933 // but it only matters for memory nodes other than load /store.
27934 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
27935 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
27936 return false;
27937
27938 // If we know required SrcValue1 and SrcValue2 have relatively large
27939 // alignment compared to the size and offset of the access, we may be able
27940 // to prove they do not alias. This check is conservative for now to catch
27941 // cases created by splitting vector types, it only works when the offsets are
27942 // multiples of the size of the data.
27943 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
27944 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
27945 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
27946 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
27947 LocationSize Size0 = MUC0.NumBytes;
27948 LocationSize Size1 = MUC1.NumBytes;
27949
27950 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
27951 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
27952 !Size1.isScalable() && Size0 == Size1 &&
27953 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
27954 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
27955 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
27956 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
27957 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
27958
27959 // There is no overlap between these relatively aligned accesses of
27960 // similar size. Return no alias.
27961 if ((OffAlign0 + static_cast<int64_t>(
27962 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
27963 (OffAlign1 + static_cast<int64_t>(
27964 Size1.getValue().getKnownMinValue())) <= OffAlign0)
27965 return false;
27966 }
27967
27968 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
27970 : DAG.getSubtarget().useAA();
27971#ifndef NDEBUG
27972 if (CombinerAAOnlyFunc.getNumOccurrences() &&
27974 UseAA = false;
27975#endif
27976
27977 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
27978 Size0.hasValue() && Size1.hasValue()) {
27979 // Use alias analysis information.
27980 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
27981 int64_t Overlap0 =
27982 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
27983 int64_t Overlap1 =
27984 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
27985 LocationSize Loc0 =
27986 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
27987 LocationSize Loc1 =
27988 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
27989 if (AA->isNoAlias(
27990 MemoryLocation(MUC0.MMO->getValue(), Loc0,
27991 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
27992 MemoryLocation(MUC1.MMO->getValue(), Loc1,
27993 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
27994 return false;
27995 }
27996
27997 // Otherwise we have to assume they alias.
27998 return true;
27999}
28000
28001/// Walk up chain skipping non-aliasing memory nodes,
28002/// looking for aliasing nodes and adding them to the Aliases vector.
28003void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28004 SmallVectorImpl<SDValue> &Aliases) {
28005 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28006 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28007
28008 // Get alias information for node.
28009 // TODO: relax aliasing for unordered atomics (see D66309)
28010 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28011
28012 // Starting off.
28013 Chains.push_back(OriginalChain);
28014 unsigned Depth = 0;
28015
28016 // Attempt to improve chain by a single step
28017 auto ImproveChain = [&](SDValue &C) -> bool {
28018 switch (C.getOpcode()) {
28019 case ISD::EntryToken:
28020 // No need to mark EntryToken.
28021 C = SDValue();
28022 return true;
28023 case ISD::LOAD:
28024 case ISD::STORE: {
28025 // Get alias information for C.
28026 // TODO: Relax aliasing for unordered atomics (see D66309)
28027 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28028 cast<LSBaseSDNode>(C.getNode())->isSimple();
28029 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28030 // Look further up the chain.
28031 C = C.getOperand(0);
28032 return true;
28033 }
28034 // Alias, so stop here.
28035 return false;
28036 }
28037
28038 case ISD::CopyFromReg:
28039 // Always forward past CopyFromReg.
28040 C = C.getOperand(0);
28041 return true;
28042
28044 case ISD::LIFETIME_END: {
28045 // We can forward past any lifetime start/end that can be proven not to
28046 // alias the memory access.
28047 if (!mayAlias(N, C.getNode())) {
28048 // Look further up the chain.
28049 C = C.getOperand(0);
28050 return true;
28051 }
28052 return false;
28053 }
28054 default:
28055 return false;
28056 }
28057 };
28058
28059 // Look at each chain and determine if it is an alias. If so, add it to the
28060 // aliases list. If not, then continue up the chain looking for the next
28061 // candidate.
28062 while (!Chains.empty()) {
28063 SDValue Chain = Chains.pop_back_val();
28064
28065 // Don't bother if we've seen Chain before.
28066 if (!Visited.insert(Chain.getNode()).second)
28067 continue;
28068
28069 // For TokenFactor nodes, look at each operand and only continue up the
28070 // chain until we reach the depth limit.
28071 //
28072 // FIXME: The depth check could be made to return the last non-aliasing
28073 // chain we found before we hit a tokenfactor rather than the original
28074 // chain.
28075 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28076 Aliases.clear();
28077 Aliases.push_back(OriginalChain);
28078 return;
28079 }
28080
28081 if (Chain.getOpcode() == ISD::TokenFactor) {
28082 // We have to check each of the operands of the token factor for "small"
28083 // token factors, so we queue them up. Adding the operands to the queue
28084 // (stack) in reverse order maintains the original order and increases the
28085 // likelihood that getNode will find a matching token factor (CSE.)
28086 if (Chain.getNumOperands() > 16) {
28087 Aliases.push_back(Chain);
28088 continue;
28089 }
28090 for (unsigned n = Chain.getNumOperands(); n;)
28091 Chains.push_back(Chain.getOperand(--n));
28092 ++Depth;
28093 continue;
28094 }
28095 // Everything else
28096 if (ImproveChain(Chain)) {
28097 // Updated Chain Found, Consider new chain if one exists.
28098 if (Chain.getNode())
28099 Chains.push_back(Chain);
28100 ++Depth;
28101 continue;
28102 }
28103 // No Improved Chain Possible, treat as Alias.
28104 Aliases.push_back(Chain);
28105 }
28106}
28107
28108/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28109/// (aliasing node.)
28110SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28111 if (OptLevel == CodeGenOptLevel::None)
28112 return OldChain;
28113
28114 // Ops for replacing token factor.
28116
28117 // Accumulate all the aliases to this node.
28118 GatherAllAliases(N, OldChain, Aliases);
28119
28120 // If no operands then chain to entry token.
28121 if (Aliases.empty())
28122 return DAG.getEntryNode();
28123
28124 // If a single operand then chain to it. We don't need to revisit it.
28125 if (Aliases.size() == 1)
28126 return Aliases[0];
28127
28128 // Construct a custom tailored token factor.
28129 return DAG.getTokenFactor(SDLoc(N), Aliases);
28130}
28131
28132// This function tries to collect a bunch of potentially interesting
28133// nodes to improve the chains of, all at once. This might seem
28134// redundant, as this function gets called when visiting every store
28135// node, so why not let the work be done on each store as it's visited?
28136//
28137// I believe this is mainly important because mergeConsecutiveStores
28138// is unable to deal with merging stores of different sizes, so unless
28139// we improve the chains of all the potential candidates up-front
28140// before running mergeConsecutiveStores, it might only see some of
28141// the nodes that will eventually be candidates, and then not be able
28142// to go from a partially-merged state to the desired final
28143// fully-merged state.
28144
28145bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28146 SmallVector<StoreSDNode *, 8> ChainedStores;
28147 StoreSDNode *STChain = St;
28148 // Intervals records which offsets from BaseIndex have been covered. In
28149 // the common case, every store writes to the immediately previous address
28150 // space and thus merged with the previous interval at insertion time.
28151
28152 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28154 IMap::Allocator A;
28155 IMap Intervals(A);
28156
28157 // This holds the base pointer, index, and the offset in bytes from the base
28158 // pointer.
28160
28161 // We must have a base and an offset.
28162 if (!BasePtr.getBase().getNode())
28163 return false;
28164
28165 // Do not handle stores to undef base pointers.
28166 if (BasePtr.getBase().isUndef())
28167 return false;
28168
28169 // Do not handle stores to opaque types
28170 if (St->getMemoryVT().isZeroSized())
28171 return false;
28172
28173 // BaseIndexOffset assumes that offsets are fixed-size, which
28174 // is not valid for scalable vectors where the offsets are
28175 // scaled by `vscale`, so bail out early.
28176 if (St->getMemoryVT().isScalableVT())
28177 return false;
28178
28179 // Add ST's interval.
28180 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28181 std::monostate{});
28182
28183 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28184 if (Chain->getMemoryVT().isScalableVector())
28185 return false;
28186
28187 // If the chain has more than one use, then we can't reorder the mem ops.
28188 if (!SDValue(Chain, 0)->hasOneUse())
28189 break;
28190 // TODO: Relax for unordered atomics (see D66309)
28191 if (!Chain->isSimple() || Chain->isIndexed())
28192 break;
28193
28194 // Find the base pointer and offset for this memory node.
28195 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28196 // Check that the base pointer is the same as the original one.
28197 int64_t Offset;
28198 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28199 break;
28200 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28201 // Make sure we don't overlap with other intervals by checking the ones to
28202 // the left or right before inserting.
28203 auto I = Intervals.find(Offset);
28204 // If there's a next interval, we should end before it.
28205 if (I != Intervals.end() && I.start() < (Offset + Length))
28206 break;
28207 // If there's a previous interval, we should start after it.
28208 if (I != Intervals.begin() && (--I).stop() <= Offset)
28209 break;
28210 Intervals.insert(Offset, Offset + Length, std::monostate{});
28211
28212 ChainedStores.push_back(Chain);
28213 STChain = Chain;
28214 }
28215
28216 // If we didn't find a chained store, exit.
28217 if (ChainedStores.empty())
28218 return false;
28219
28220 // Improve all chained stores (St and ChainedStores members) starting from
28221 // where the store chain ended and return single TokenFactor.
28222 SDValue NewChain = STChain->getChain();
28224 for (unsigned I = ChainedStores.size(); I;) {
28225 StoreSDNode *S = ChainedStores[--I];
28226 SDValue BetterChain = FindBetterChain(S, NewChain);
28227 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28228 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28229 TFOps.push_back(SDValue(S, 0));
28230 ChainedStores[I] = S;
28231 }
28232
28233 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28234 SDValue BetterChain = FindBetterChain(St, NewChain);
28235 SDValue NewST;
28236 if (St->isTruncatingStore())
28237 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28238 St->getBasePtr(), St->getMemoryVT(),
28239 St->getMemOperand());
28240 else
28241 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28242 St->getBasePtr(), St->getMemOperand());
28243
28244 TFOps.push_back(NewST);
28245
28246 // If we improved every element of TFOps, then we've lost the dependence on
28247 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28248 // the beginning to keep relative order consistent with FindBetterChains.
28249 auto hasImprovedChain = [&](SDValue ST) -> bool {
28250 return ST->getOperand(0) != NewChain;
28251 };
28252 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28253 if (AddNewChain)
28254 TFOps.insert(TFOps.begin(), NewChain);
28255
28256 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28257 CombineTo(St, TF);
28258
28259 // Add TF and its operands to the worklist.
28260 AddToWorklist(TF.getNode());
28261 for (const SDValue &Op : TF->ops())
28262 AddToWorklist(Op.getNode());
28263 AddToWorklist(STChain);
28264 return true;
28265}
28266
28267bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28268 if (OptLevel == CodeGenOptLevel::None)
28269 return false;
28270
28272
28273 // We must have a base and an offset.
28274 if (!BasePtr.getBase().getNode())
28275 return false;
28276
28277 // Do not handle stores to undef base pointers.
28278 if (BasePtr.getBase().isUndef())
28279 return false;
28280
28281 // Directly improve a chain of disjoint stores starting at St.
28282 if (parallelizeChainedStores(St))
28283 return true;
28284
28285 // Improve St's Chain..
28286 SDValue BetterChain = FindBetterChain(St, St->getChain());
28287 if (St->getChain() != BetterChain) {
28288 replaceStoreChain(St, BetterChain);
28289 return true;
28290 }
28291 return false;
28292}
28293
28294/// This is the entry point for the file.
28296 CodeGenOptLevel OptLevel) {
28297 /// This is the main entry point to this class.
28298 DAGCombiner(*this, AA, OptLevel).Run(Level);
28299}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propapagtion pattern try to break it up to generate somet...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
bool isNegative() const
Definition: APFloat.h:1295
bool isNormal() const
Definition: APFloat.h:1299
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1278
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isLargest() const
Definition: APFloat.h:1311
bool isIEEE() const
Definition: APFloat.h:1313
bool isInfinity() const
Definition: APFloat.h:1292
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1977
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1764
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:444
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1672
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
int32_t exactLogBase2() const
Definition: APInt.h:1725
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
unsigned logBase2() const
Definition: APInt.h:1703
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1966
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:307
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:677
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:338
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:361
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isADDLike(SDValue Op) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:479
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:908
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:217
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:239
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2178
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2183
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2188
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2193
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1370
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:367
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:487
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:979
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:373
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1355
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1359
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1369
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:662
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1352
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1356
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:758
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:646
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1371
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1364
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:978
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ TargetConstantFP
Definition: ISDOpcodes.h:159
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1329
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:359
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1304
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1191
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1372
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:922
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:984
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1007
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:260
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:657
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1353
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:944
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1360
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1140
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1012
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:313
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1489
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1599
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1574
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1594
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1415
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1485
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1485
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1556
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1568
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:918
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:541
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:839
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(Preds &&...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4504
constexpr double e
Definition: MathExtras.h:31
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:326
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:862
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:2004
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:228
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1457
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:899
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2082
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1439
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:319
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1407
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1745
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2048
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:349
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:300
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:296
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:307
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...