LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG.
175
176 /// Mapping from an SDNode to its position on the worklist.
177 ///
178 /// This is used to find and remove nodes from the worklist (by nulling
179 /// them) when they are deleted from the underlying DAG. It relies on
180 /// stable indices of nodes within the worklist.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Set of nodes which have been combined (at least once).
189 ///
190 /// This is used to allow us to reliably add any operands of a DAG node
191 /// which have not yet been combined to the worklist.
192 SmallPtrSet<SDNode *, 32> CombinedNodes;
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // AA - Used for DAG load/store alias analysis.
203 AliasAnalysis *AA;
204
205 /// When an instruction is simplified, add all users of the instruction to
206 /// the work lists because they might get more simplified now.
207 void AddUsersToWorklist(SDNode *N) {
208 for (SDNode *Node : N->uses())
209 AddToWorklist(Node);
210 }
211
212 /// Convenient shorthand to add a node and all of its user to the worklist.
213 void AddToWorklistWithUsers(SDNode *N) {
214 AddUsersToWorklist(N);
215 AddToWorklist(N);
216 }
217
218 // Prune potentially dangling nodes. This is called after
219 // any visit to a node, but should also be called during a visit after any
220 // failed combine which may have created a DAG node.
221 void clearAddedDanglingWorklistEntries() {
222 // Check any nodes added to the worklist to see if they are prunable.
223 while (!PruningList.empty()) {
224 auto *N = PruningList.pop_back_val();
225 if (N->use_empty())
226 recursivelyDeleteUnusedNodes(N);
227 }
228 }
229
230 SDNode *getNextWorklistEntry() {
231 // Before we do any work, remove nodes that are not in use.
232 clearAddedDanglingWorklistEntries();
233 SDNode *N = nullptr;
234 // The Worklist holds the SDNodes in order, but it may contain null
235 // entries.
236 while (!N && !Worklist.empty()) {
237 N = Worklist.pop_back_val();
238 }
239
240 if (N) {
241 bool GoodWorklistEntry = WorklistMap.erase(N);
242 (void)GoodWorklistEntry;
243 assert(GoodWorklistEntry &&
244 "Found a worklist entry without a corresponding map entry!");
245 }
246 return N;
247 }
248
249 /// Call the node-specific routine that folds each particular type of node.
250 SDValue visit(SDNode *N);
251
252 public:
253 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
254 : DAG(D), TLI(D.getTargetLoweringInfo()),
255 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
277 assert(N->getOpcode() != ISD::DELETED_NODE &&
278 "Deleted Node added to Worklist");
279
280 // Skip handle nodes as they can't usefully be combined and confuse the
281 // zero-use deletion strategy.
282 if (N->getOpcode() == ISD::HANDLENODE)
283 return;
284
285 if (IsCandidateForPruning)
286 ConsiderForPruning(N);
287
288 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
289 Worklist.push_back(N);
290 }
291
292 /// Remove all instances of N from the worklist.
293 void removeFromWorklist(SDNode *N) {
294 CombinedNodes.erase(N);
295 PruningList.remove(N);
296 StoreRootCountMap.erase(N);
297
298 auto It = WorklistMap.find(N);
299 if (It == WorklistMap.end())
300 return; // Not in the worklist.
301
302 // Null out the entry rather than erasing it to avoid a linear operation.
303 Worklist[It->second] = nullptr;
304 WorklistMap.erase(It);
305 }
306
307 void deleteAndRecombine(SDNode *N);
308 bool recursivelyDeleteUnusedNodes(SDNode *N);
309
310 /// Replaces all uses of the results of one DAG node with new values.
311 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
312 bool AddTo = true);
313
314 /// Replaces all uses of the results of one DAG node with new values.
315 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
316 return CombineTo(N, &Res, 1, AddTo);
317 }
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
321 bool AddTo = true) {
322 SDValue To[] = { Res0, Res1 };
323 return CombineTo(N, To, 2, AddTo);
324 }
325
326 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
327
328 private:
329 unsigned MaximumLegalStoreInBits;
330
331 /// Check the specified integer node value to see if it can be simplified or
332 /// if things it uses can be simplified by bit propagation.
333 /// If so, return true.
334 bool SimplifyDemandedBits(SDValue Op) {
335 unsigned BitWidth = Op.getScalarValueSizeInBits();
337 return SimplifyDemandedBits(Op, DemandedBits);
338 }
339
340 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
341 EVT VT = Op.getValueType();
342 APInt DemandedElts = VT.isFixedLengthVector()
344 : APInt(1, 1);
345 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
346 }
347
348 /// Check the specified vector node value to see if it can be simplified or
349 /// if things it uses can be simplified as it only uses some of the
350 /// elements. If so, return true.
351 bool SimplifyDemandedVectorElts(SDValue Op) {
352 // TODO: For now just pretend it cannot be simplified.
353 if (Op.getValueType().isScalableVector())
354 return false;
355
356 unsigned NumElts = Op.getValueType().getVectorNumElements();
357 APInt DemandedElts = APInt::getAllOnes(NumElts);
358 return SimplifyDemandedVectorElts(Op, DemandedElts);
359 }
360
361 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
362 const APInt &DemandedElts,
363 bool AssumeSingleUse = false);
364 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
365 bool AssumeSingleUse = false);
366
367 bool CombineToPreIndexedLoadStore(SDNode *N);
368 bool CombineToPostIndexedLoadStore(SDNode *N);
369 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
370 bool SliceUpLoad(SDNode *N);
371
372 // Looks up the chain to find a unique (unaliased) store feeding the passed
373 // load. If no such store is found, returns a nullptr.
374 // Note: This will look past a CALLSEQ_START if the load is chained to it so
375 // so that it can find stack stores for byval params.
376 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
377 // Scalars have size 0 to distinguish from singleton vectors.
378 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
379 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
380 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
381
382 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
383 /// load.
384 ///
385 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
386 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
387 /// \param EltNo index of the vector element to load.
388 /// \param OriginalLoad load that EVE came from to be replaced.
389 /// \returns EVE on success SDValue() on failure.
390 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
391 SDValue EltNo,
392 LoadSDNode *OriginalLoad);
393 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
394 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
395 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
396 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
397 SDValue PromoteIntBinOp(SDValue Op);
398 SDValue PromoteIntShiftOp(SDValue Op);
399 SDValue PromoteExtend(SDValue Op);
400 bool PromoteLoad(SDValue Op);
401
402 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
403 SDValue RHS, SDValue True, SDValue False,
405
406 /// Call the node-specific routine that knows how to fold each
407 /// particular type of node. If that doesn't do anything, try the
408 /// target-specific DAG combines.
409 SDValue combine(SDNode *N);
410
411 // Visitation implementation - Implement dag node combining for different
412 // node types. The semantics are as follows:
413 // Return Value:
414 // SDValue.getNode() == 0 - No change was made
415 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
416 // otherwise - N should be replaced by the returned Operand.
417 //
418 SDValue visitTokenFactor(SDNode *N);
419 SDValue visitMERGE_VALUES(SDNode *N);
420 SDValue visitADD(SDNode *N);
421 SDValue visitADDLike(SDNode *N);
422 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
423 SDValue visitSUB(SDNode *N);
424 SDValue visitADDSAT(SDNode *N);
425 SDValue visitSUBSAT(SDNode *N);
426 SDValue visitADDC(SDNode *N);
427 SDValue visitADDO(SDNode *N);
428 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
429 SDValue visitSUBC(SDNode *N);
430 SDValue visitSUBO(SDNode *N);
431 SDValue visitADDE(SDNode *N);
432 SDValue visitUADDO_CARRY(SDNode *N);
433 SDValue visitSADDO_CARRY(SDNode *N);
434 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
435 SDNode *N);
436 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
437 SDNode *N);
438 SDValue visitSUBE(SDNode *N);
439 SDValue visitUSUBO_CARRY(SDNode *N);
440 SDValue visitSSUBO_CARRY(SDNode *N);
441 SDValue visitMUL(SDNode *N);
442 SDValue visitMULFIX(SDNode *N);
443 SDValue useDivRem(SDNode *N);
444 SDValue visitSDIV(SDNode *N);
445 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
446 SDValue visitUDIV(SDNode *N);
447 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
448 SDValue visitREM(SDNode *N);
449 SDValue visitMULHU(SDNode *N);
450 SDValue visitMULHS(SDNode *N);
451 SDValue visitAVG(SDNode *N);
452 SDValue visitABD(SDNode *N);
453 SDValue visitSMUL_LOHI(SDNode *N);
454 SDValue visitUMUL_LOHI(SDNode *N);
455 SDValue visitMULO(SDNode *N);
456 SDValue visitIMINMAX(SDNode *N);
457 SDValue visitAND(SDNode *N);
458 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
459 SDValue visitOR(SDNode *N);
460 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
461 SDValue visitXOR(SDNode *N);
462 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
463 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
464 SDValue visitSHL(SDNode *N);
465 SDValue visitSRA(SDNode *N);
466 SDValue visitSRL(SDNode *N);
467 SDValue visitFunnelShift(SDNode *N);
468 SDValue visitSHLSAT(SDNode *N);
469 SDValue visitRotate(SDNode *N);
470 SDValue visitABS(SDNode *N);
471 SDValue visitBSWAP(SDNode *N);
472 SDValue visitBITREVERSE(SDNode *N);
473 SDValue visitCTLZ(SDNode *N);
474 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
475 SDValue visitCTTZ(SDNode *N);
476 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
477 SDValue visitCTPOP(SDNode *N);
478 SDValue visitSELECT(SDNode *N);
479 SDValue visitVSELECT(SDNode *N);
480 SDValue visitVP_SELECT(SDNode *N);
481 SDValue visitSELECT_CC(SDNode *N);
482 SDValue visitSETCC(SDNode *N);
483 SDValue visitSETCCCARRY(SDNode *N);
484 SDValue visitSIGN_EXTEND(SDNode *N);
485 SDValue visitZERO_EXTEND(SDNode *N);
486 SDValue visitANY_EXTEND(SDNode *N);
487 SDValue visitAssertExt(SDNode *N);
488 SDValue visitAssertAlign(SDNode *N);
489 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491 SDValue visitTRUNCATE(SDNode *N);
492 SDValue visitBITCAST(SDNode *N);
493 SDValue visitFREEZE(SDNode *N);
494 SDValue visitBUILD_PAIR(SDNode *N);
495 SDValue visitFADD(SDNode *N);
496 SDValue visitVP_FADD(SDNode *N);
497 SDValue visitVP_FSUB(SDNode *N);
498 SDValue visitSTRICT_FADD(SDNode *N);
499 SDValue visitFSUB(SDNode *N);
500 SDValue visitFMUL(SDNode *N);
501 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502 SDValue visitFMAD(SDNode *N);
503 SDValue visitFDIV(SDNode *N);
504 SDValue visitFREM(SDNode *N);
505 SDValue visitFSQRT(SDNode *N);
506 SDValue visitFCOPYSIGN(SDNode *N);
507 SDValue visitFPOW(SDNode *N);
508 SDValue visitSINT_TO_FP(SDNode *N);
509 SDValue visitUINT_TO_FP(SDNode *N);
510 SDValue visitFP_TO_SINT(SDNode *N);
511 SDValue visitFP_TO_UINT(SDNode *N);
512 SDValue visitXRINT(SDNode *N);
513 SDValue visitFP_ROUND(SDNode *N);
514 SDValue visitFP_EXTEND(SDNode *N);
515 SDValue visitFNEG(SDNode *N);
516 SDValue visitFABS(SDNode *N);
517 SDValue visitFCEIL(SDNode *N);
518 SDValue visitFTRUNC(SDNode *N);
519 SDValue visitFFREXP(SDNode *N);
520 SDValue visitFFLOOR(SDNode *N);
521 SDValue visitFMinMax(SDNode *N);
522 SDValue visitBRCOND(SDNode *N);
523 SDValue visitBR_CC(SDNode *N);
524 SDValue visitLOAD(SDNode *N);
525
526 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529
530 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531
532 SDValue visitSTORE(SDNode *N);
533 SDValue visitATOMIC_STORE(SDNode *N);
534 SDValue visitLIFETIME_END(SDNode *N);
535 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
536 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
537 SDValue visitBUILD_VECTOR(SDNode *N);
538 SDValue visitCONCAT_VECTORS(SDNode *N);
539 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
540 SDValue visitVECTOR_SHUFFLE(SDNode *N);
541 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
542 SDValue visitINSERT_SUBVECTOR(SDNode *N);
543 SDValue visitMLOAD(SDNode *N);
544 SDValue visitMSTORE(SDNode *N);
545 SDValue visitMGATHER(SDNode *N);
546 SDValue visitMSCATTER(SDNode *N);
547 SDValue visitVPGATHER(SDNode *N);
548 SDValue visitVPSCATTER(SDNode *N);
549 SDValue visitVP_STRIDED_LOAD(SDNode *N);
550 SDValue visitVP_STRIDED_STORE(SDNode *N);
551 SDValue visitFP_TO_FP16(SDNode *N);
552 SDValue visitFP16_TO_FP(SDNode *N);
553 SDValue visitFP_TO_BF16(SDNode *N);
554 SDValue visitBF16_TO_FP(SDNode *N);
555 SDValue visitVECREDUCE(SDNode *N);
556 SDValue visitVPOp(SDNode *N);
557 SDValue visitGET_FPENV_MEM(SDNode *N);
558 SDValue visitSET_FPENV_MEM(SDNode *N);
559
560 template <class MatchContextClass>
561 SDValue visitFADDForFMACombine(SDNode *N);
562 template <class MatchContextClass>
563 SDValue visitFSUBForFMACombine(SDNode *N);
564 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
565
566 SDValue XformToShuffleWithZero(SDNode *N);
567 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
568 const SDLoc &DL,
569 SDNode *N,
570 SDValue N0,
571 SDValue N1);
572 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
573 SDValue N1, SDNodeFlags Flags);
574 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
575 SDValue N1, SDNodeFlags Flags);
576 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
577 EVT VT, SDValue N0, SDValue N1,
578 SDNodeFlags Flags = SDNodeFlags());
579
580 SDValue visitShiftByConstant(SDNode *N);
581
582 SDValue foldSelectOfConstants(SDNode *N);
583 SDValue foldVSelectOfConstants(SDNode *N);
584 SDValue foldBinOpIntoSelect(SDNode *BO);
585 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
586 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
587 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
588 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
590 bool NotExtCompare = false);
591 SDValue convertSelectOfFPConstantsToLoadOffset(
592 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
594 SDValue foldSignChangeInBitcast(SDNode *N);
595 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
597 SDValue foldSelectOfBinops(SDNode *N);
598 SDValue foldSextSetcc(SDNode *N);
599 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
600 const SDLoc &DL);
601 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
602 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
603 SDValue unfoldMaskedMerge(SDNode *N);
604 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
605 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
606 const SDLoc &DL, bool foldBooleans);
607 SDValue rebuildSetCC(SDValue N);
608
609 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
610 SDValue &CC, bool MatchStrict = false) const;
611 bool isOneUseSetCC(SDValue N) const;
612
613 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
614 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
615
616 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
617 unsigned HiOp);
618 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
619 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
620 const TargetLowering &TLI);
621
622 SDValue CombineExtLoad(SDNode *N);
623 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
624 SDValue combineRepeatedFPDivisors(SDNode *N);
625 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
626 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
628 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
629 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
630 SDValue BuildSDIV(SDNode *N);
631 SDValue BuildSDIVPow2(SDNode *N);
632 SDValue BuildUDIV(SDNode *N);
633 SDValue BuildSREMPow2(SDNode *N);
634 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
635 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
636 bool KnownNeverZero = false,
637 bool InexpensiveOnly = false,
638 std::optional<EVT> OutVT = std::nullopt);
639 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
640 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
642 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
643 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
644 SDNodeFlags Flags, bool Reciprocal);
645 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
646 SDNodeFlags Flags, bool Reciprocal);
647 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
648 bool DemandHighBits = true);
649 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
650 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
651 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652 unsigned PosOpcode, unsigned NegOpcode,
653 const SDLoc &DL);
654 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
655 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
656 unsigned PosOpcode, unsigned NegOpcode,
657 const SDLoc &DL);
658 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
659 SDValue MatchLoadCombine(SDNode *N);
660 SDValue mergeTruncStores(StoreSDNode *N);
661 SDValue reduceLoadWidth(SDNode *N);
662 SDValue ReduceLoadOpStoreWidth(SDNode *N);
664 SDValue TransformFPLoadStorePair(SDNode *N);
665 SDValue convertBuildVecZextToZext(SDNode *N);
666 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
667 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
668 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
669 SDValue reduceBuildVecToShuffle(SDNode *N);
670 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
671 ArrayRef<int> VectorMask, SDValue VecIn1,
672 SDValue VecIn2, unsigned LeftIdx,
673 bool DidSplitVec);
674 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
675
676 /// Walk up chain skipping non-aliasing memory nodes,
677 /// looking for aliasing nodes and adding them to the Aliases vector.
678 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
679 SmallVectorImpl<SDValue> &Aliases);
680
681 /// Return true if there is any possibility that the two addresses overlap.
682 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
683
684 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
685 /// chain (aliasing node.)
686 SDValue FindBetterChain(SDNode *N, SDValue Chain);
687
688 /// Try to replace a store and any possibly adjacent stores on
689 /// consecutive chains with better chains. Return true only if St is
690 /// replaced.
691 ///
692 /// Notice that other chains may still be replaced even if the function
693 /// returns false.
694 bool findBetterNeighborChains(StoreSDNode *St);
695
696 // Helper for findBetterNeighborChains. Walk up store chain add additional
697 // chained stores that do not overlap and can be parallelized.
698 bool parallelizeChainedStores(StoreSDNode *St);
699
700 /// Holds a pointer to an LSBaseSDNode as well as information on where it
701 /// is located in a sequence of memory operations connected by a chain.
702 struct MemOpLink {
703 // Ptr to the mem node.
704 LSBaseSDNode *MemNode;
705
706 // Offset from the base ptr.
707 int64_t OffsetFromBase;
708
709 MemOpLink(LSBaseSDNode *N, int64_t Offset)
710 : MemNode(N), OffsetFromBase(Offset) {}
711 };
712
713 // Classify the origin of a stored value.
714 enum class StoreSource { Unknown, Constant, Extract, Load };
715 StoreSource getStoreSource(SDValue StoreVal) {
716 switch (StoreVal.getOpcode()) {
717 case ISD::Constant:
718 case ISD::ConstantFP:
719 return StoreSource::Constant;
723 return StoreSource::Constant;
724 return StoreSource::Unknown;
727 return StoreSource::Extract;
728 case ISD::LOAD:
729 return StoreSource::Load;
730 default:
731 return StoreSource::Unknown;
732 }
733 }
734
735 /// This is a helper function for visitMUL to check the profitability
736 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
737 /// MulNode is the original multiply, AddNode is (add x, c1),
738 /// and ConstNode is c2.
739 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
740 SDValue ConstNode);
741
742 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
743 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
744 /// the type of the loaded value to be extended.
745 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
746 EVT LoadResultTy, EVT &ExtVT);
747
748 /// Helper function to calculate whether the given Load/Store can have its
749 /// width reduced to ExtVT.
750 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
751 EVT &MemVT, unsigned ShAmt = 0);
752
753 /// Used by BackwardsPropagateMask to find suitable loads.
754 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
755 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
756 ConstantSDNode *Mask, SDNode *&NodeToMask);
757 /// Attempt to propagate a given AND node back to load leaves so that they
758 /// can be combined into narrow loads.
759 bool BackwardsPropagateMask(SDNode *N);
760
761 /// Helper function for mergeConsecutiveStores which merges the component
762 /// store chains.
763 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
764 unsigned NumStores);
765
766 /// Helper function for mergeConsecutiveStores which checks if all the store
767 /// nodes have the same underlying object. We can still reuse the first
768 /// store's pointer info if all the stores are from the same object.
769 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
770
771 /// This is a helper function for mergeConsecutiveStores. When the source
772 /// elements of the consecutive stores are all constants or all extracted
773 /// vector elements, try to merge them into one larger store introducing
774 /// bitcasts if necessary. \return True if a merged store was created.
775 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
776 EVT MemVT, unsigned NumStores,
777 bool IsConstantSrc, bool UseVector,
778 bool UseTrunc);
779
780 /// This is a helper function for mergeConsecutiveStores. Stores that
781 /// potentially may be merged with St are placed in StoreNodes. RootNode is
782 /// a chain predecessor to all store candidates.
783 void getStoreMergeCandidates(StoreSDNode *St,
784 SmallVectorImpl<MemOpLink> &StoreNodes,
785 SDNode *&Root);
786
787 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
788 /// have indirect dependency through their operands. RootNode is the
789 /// predecessor to all stores calculated by getStoreMergeCandidates and is
790 /// used to prune the dependency check. \return True if safe to merge.
791 bool checkMergeStoreCandidatesForDependencies(
792 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
793 SDNode *RootNode);
794
795 /// This is a helper function for mergeConsecutiveStores. Given a list of
796 /// store candidates, find the first N that are consecutive in memory.
797 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
798 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
799 int64_t ElementSizeBytes) const;
800
801 /// This is a helper function for mergeConsecutiveStores. It is used for
802 /// store chains that are composed entirely of constant values.
803 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
804 unsigned NumConsecutiveStores,
805 EVT MemVT, SDNode *Root, bool AllowVectors);
806
807 /// This is a helper function for mergeConsecutiveStores. It is used for
808 /// store chains that are composed entirely of extracted vector elements.
809 /// When extracting multiple vector elements, try to store them in one
810 /// vector store rather than a sequence of scalar stores.
811 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
812 unsigned NumConsecutiveStores, EVT MemVT,
813 SDNode *Root);
814
815 /// This is a helper function for mergeConsecutiveStores. It is used for
816 /// store chains that are composed entirely of loaded values.
817 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
818 unsigned NumConsecutiveStores, EVT MemVT,
819 SDNode *Root, bool AllowVectors,
820 bool IsNonTemporalStore, bool IsNonTemporalLoad);
821
822 /// Merge consecutive store operations into a wide store.
823 /// This optimization uses wide integers or vectors when possible.
824 /// \return true if stores were merged.
825 bool mergeConsecutiveStores(StoreSDNode *St);
826
827 /// Try to transform a truncation where C is a constant:
828 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
829 ///
830 /// \p N needs to be a truncation and its first operand an AND. Other
831 /// requirements are checked by the function (e.g. that trunc is
832 /// single-use) and if missed an empty SDValue is returned.
833 SDValue distributeTruncateThroughAnd(SDNode *N);
834
835 /// Helper function to determine whether the target supports operation
836 /// given by \p Opcode for type \p VT, that is, whether the operation
837 /// is legal or custom before legalizing operations, and whether is
838 /// legal (but not custom) after legalization.
839 bool hasOperation(unsigned Opcode, EVT VT) {
840 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
841 }
842
843 public:
844 /// Runs the dag combiner on all nodes in the work list
845 void Run(CombineLevel AtLevel);
846
847 SelectionDAG &getDAG() const { return DAG; }
848
849 /// Returns a type large enough to hold any valid shift amount - before type
850 /// legalization these can be huge.
851 EVT getShiftAmountTy(EVT LHSTy) {
852 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
853 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
854 }
855
856 /// This method returns true if we are running before type legalization or
857 /// if the specified VT is legal.
858 bool isTypeLegal(const EVT &VT) {
859 if (!LegalTypes) return true;
860 return TLI.isTypeLegal(VT);
861 }
862
863 /// Convenience wrapper around TargetLowering::getSetCCResultType
864 EVT getSetCCResultType(EVT VT) const {
865 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
866 }
867
868 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
869 SDValue OrigLoad, SDValue ExtLoad,
870 ISD::NodeType ExtType);
871 };
872
873/// This class is a DAGUpdateListener that removes any deleted
874/// nodes from the worklist.
875class WorklistRemover : public SelectionDAG::DAGUpdateListener {
876 DAGCombiner &DC;
877
878public:
879 explicit WorklistRemover(DAGCombiner &dc)
880 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
881
882 void NodeDeleted(SDNode *N, SDNode *E) override {
883 DC.removeFromWorklist(N);
884 }
885};
886
887class WorklistInserter : public SelectionDAG::DAGUpdateListener {
888 DAGCombiner &DC;
889
890public:
891 explicit WorklistInserter(DAGCombiner &dc)
892 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
893
894 // FIXME: Ideally we could add N to the worklist, but this causes exponential
895 // compile time costs in large DAGs, e.g. Halide.
896 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
897};
898
899} // end anonymous namespace
900
901//===----------------------------------------------------------------------===//
902// TargetLowering::DAGCombinerInfo implementation
903//===----------------------------------------------------------------------===//
904
906 ((DAGCombiner*)DC)->AddToWorklist(N);
907}
908
910CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
911 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
912}
913
915CombineTo(SDNode *N, SDValue Res, bool AddTo) {
916 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
917}
918
920CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
921 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
922}
923
926 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
927}
928
931 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
932}
933
934//===----------------------------------------------------------------------===//
935// Helper Functions
936//===----------------------------------------------------------------------===//
937
938void DAGCombiner::deleteAndRecombine(SDNode *N) {
939 removeFromWorklist(N);
940
941 // If the operands of this node are only used by the node, they will now be
942 // dead. Make sure to re-visit them and recursively delete dead nodes.
943 for (const SDValue &Op : N->ops())
944 // For an operand generating multiple values, one of the values may
945 // become dead allowing further simplification (e.g. split index
946 // arithmetic from an indexed load).
947 if (Op->hasOneUse() || Op->getNumValues() > 1)
948 AddToWorklist(Op.getNode());
949
950 DAG.DeleteNode(N);
951}
952
953// APInts must be the same size for most operations, this helper
954// function zero extends the shorter of the pair so that they match.
955// We provide an Offset so that we can create bitwidths that won't overflow.
956static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
957 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
958 LHS = LHS.zext(Bits);
959 RHS = RHS.zext(Bits);
960}
961
962// Return true if this node is a setcc, or is a select_cc
963// that selects between the target values used for true and false, making it
964// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
965// the appropriate nodes based on the type of node we are checking. This
966// simplifies life a bit for the callers.
967bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
968 SDValue &CC, bool MatchStrict) const {
969 if (N.getOpcode() == ISD::SETCC) {
970 LHS = N.getOperand(0);
971 RHS = N.getOperand(1);
972 CC = N.getOperand(2);
973 return true;
974 }
975
976 if (MatchStrict &&
977 (N.getOpcode() == ISD::STRICT_FSETCC ||
978 N.getOpcode() == ISD::STRICT_FSETCCS)) {
979 LHS = N.getOperand(1);
980 RHS = N.getOperand(2);
981 CC = N.getOperand(3);
982 return true;
983 }
984
985 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
986 !TLI.isConstFalseVal(N.getOperand(3)))
987 return false;
988
989 if (TLI.getBooleanContents(N.getValueType()) ==
991 return false;
992
993 LHS = N.getOperand(0);
994 RHS = N.getOperand(1);
995 CC = N.getOperand(4);
996 return true;
997}
998
999/// Return true if this is a SetCC-equivalent operation with only one use.
1000/// If this is true, it allows the users to invert the operation for free when
1001/// it is profitable to do so.
1002bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1003 SDValue N0, N1, N2;
1004 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1005 return true;
1006 return false;
1007}
1008
1010 if (!ScalarTy.isSimple())
1011 return false;
1012
1013 uint64_t MaskForTy = 0ULL;
1014 switch (ScalarTy.getSimpleVT().SimpleTy) {
1015 case MVT::i8:
1016 MaskForTy = 0xFFULL;
1017 break;
1018 case MVT::i16:
1019 MaskForTy = 0xFFFFULL;
1020 break;
1021 case MVT::i32:
1022 MaskForTy = 0xFFFFFFFFULL;
1023 break;
1024 default:
1025 return false;
1026 break;
1027 }
1028
1029 APInt Val;
1030 if (ISD::isConstantSplatVector(N, Val))
1031 return Val.getLimitedValue() == MaskForTy;
1032
1033 return false;
1034}
1035
1036// Determines if it is a constant integer or a splat/build vector of constant
1037// integers (and undefs).
1038// Do not permit build vector implicit truncation.
1039static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1040 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1041 return !(Const->isOpaque() && NoOpaques);
1042 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1043 return false;
1044 unsigned BitWidth = N.getScalarValueSizeInBits();
1045 for (const SDValue &Op : N->op_values()) {
1046 if (Op.isUndef())
1047 continue;
1048 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1049 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1050 (Const->isOpaque() && NoOpaques))
1051 return false;
1052 }
1053 return true;
1054}
1055
1056// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1057// undef's.
1058static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1059 if (V.getOpcode() != ISD::BUILD_VECTOR)
1060 return false;
1061 return isConstantOrConstantVector(V, NoOpaques) ||
1063}
1064
1065// Determine if this an indexed load with an opaque target constant index.
1066static bool canSplitIdx(LoadSDNode *LD) {
1067 return MaySplitLoadIndex &&
1068 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1069 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1070}
1071
1072bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1073 const SDLoc &DL,
1074 SDNode *N,
1075 SDValue N0,
1076 SDValue N1) {
1077 // Currently this only tries to ensure we don't undo the GEP splits done by
1078 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1079 // we check if the following transformation would be problematic:
1080 // (load/store (add, (add, x, offset1), offset2)) ->
1081 // (load/store (add, x, offset1+offset2)).
1082
1083 // (load/store (add, (add, x, y), offset2)) ->
1084 // (load/store (add, (add, x, offset2), y)).
1085
1086 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1087 return false;
1088
1089 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1090 if (!C2)
1091 return false;
1092
1093 const APInt &C2APIntVal = C2->getAPIntValue();
1094 if (C2APIntVal.getSignificantBits() > 64)
1095 return false;
1096
1097 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1098 if (N0.hasOneUse())
1099 return false;
1100
1101 const APInt &C1APIntVal = C1->getAPIntValue();
1102 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1103 if (CombinedValueIntVal.getSignificantBits() > 64)
1104 return false;
1105 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1106
1107 for (SDNode *Node : N->uses()) {
1108 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1109 // Is x[offset2] already not a legal addressing mode? If so then
1110 // reassociating the constants breaks nothing (we test offset2 because
1111 // that's the one we hope to fold into the load or store).
1113 AM.HasBaseReg = true;
1114 AM.BaseOffs = C2APIntVal.getSExtValue();
1115 EVT VT = LoadStore->getMemoryVT();
1116 unsigned AS = LoadStore->getAddressSpace();
1117 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1118 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1119 continue;
1120
1121 // Would x[offset1+offset2] still be a legal addressing mode?
1122 AM.BaseOffs = CombinedValue;
1123 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1124 return true;
1125 }
1126 }
1127 } else {
1128 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1129 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1130 return false;
1131
1132 for (SDNode *Node : N->uses()) {
1133 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1134 if (!LoadStore)
1135 return false;
1136
1137 // Is x[offset2] a legal addressing mode? If so then
1138 // reassociating the constants breaks address pattern
1140 AM.HasBaseReg = true;
1141 AM.BaseOffs = C2APIntVal.getSExtValue();
1142 EVT VT = LoadStore->getMemoryVT();
1143 unsigned AS = LoadStore->getAddressSpace();
1144 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1145 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1146 return false;
1147 }
1148 return true;
1149 }
1150
1151 return false;
1152}
1153
1154/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1155/// \p N0 is the same kind of operation as \p Opc.
1156SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1157 SDValue N0, SDValue N1,
1158 SDNodeFlags Flags) {
1159 EVT VT = N0.getValueType();
1160
1161 if (N0.getOpcode() != Opc)
1162 return SDValue();
1163
1164 SDValue N00 = N0.getOperand(0);
1165 SDValue N01 = N0.getOperand(1);
1166
1168 SDNodeFlags NewFlags;
1169 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1170 Flags.hasNoUnsignedWrap())
1171 NewFlags.setNoUnsignedWrap(true);
1172
1174 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1175 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1176 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1177 return SDValue();
1178 }
1179 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1180 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1181 // iff (op x, c1) has one use
1182 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1183 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1184 }
1185 }
1186
1187 // Check for repeated operand logic simplifications.
1188 if (Opc == ISD::AND || Opc == ISD::OR) {
1189 // (N00 & N01) & N00 --> N00 & N01
1190 // (N00 & N01) & N01 --> N00 & N01
1191 // (N00 | N01) | N00 --> N00 | N01
1192 // (N00 | N01) | N01 --> N00 | N01
1193 if (N1 == N00 || N1 == N01)
1194 return N0;
1195 }
1196 if (Opc == ISD::XOR) {
1197 // (N00 ^ N01) ^ N00 --> N01
1198 if (N1 == N00)
1199 return N01;
1200 // (N00 ^ N01) ^ N01 --> N00
1201 if (N1 == N01)
1202 return N00;
1203 }
1204
1205 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1206 if (N1 != N01) {
1207 // Reassociate if (op N00, N1) already exist
1208 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1209 // if Op (Op N00, N1), N01 already exist
1210 // we need to stop reassciate to avoid dead loop
1211 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1212 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1213 }
1214 }
1215
1216 if (N1 != N00) {
1217 // Reassociate if (op N01, N1) already exist
1218 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1219 // if Op (Op N01, N1), N00 already exist
1220 // we need to stop reassciate to avoid dead loop
1221 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1222 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1223 }
1224 }
1225
1226 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1227 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1228 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1229 // comparisons with the same predicate. This enables optimizations as the
1230 // following one:
1231 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1232 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1233 if (Opc == ISD::AND || Opc == ISD::OR) {
1234 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1235 N01->getOpcode() == ISD::SETCC) {
1236 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1237 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1238 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1239 if (CC1 == CC00 && CC1 != CC01) {
1240 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1241 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1242 }
1243 if (CC1 == CC01 && CC1 != CC00) {
1244 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1245 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1246 }
1247 }
1248 }
1249 }
1250
1251 return SDValue();
1252}
1253
1254/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1255/// same kind of operation as \p Opc.
1256SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1257 SDValue N1, SDNodeFlags Flags) {
1258 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1259
1260 // Floating-point reassociation is not allowed without loose FP math.
1261 if (N0.getValueType().isFloatingPoint() ||
1263 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1264 return SDValue();
1265
1266 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1267 return Combined;
1268 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1269 return Combined;
1270 return SDValue();
1271}
1272
1273// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1274// Note that we only expect Flags to be passed from FP operations. For integer
1275// operations they need to be dropped.
1276SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1277 const SDLoc &DL, EVT VT, SDValue N0,
1278 SDValue N1, SDNodeFlags Flags) {
1279 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1280 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1281 N0->hasOneUse() && N1->hasOneUse() &&
1283 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1284 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1285 return DAG.getNode(RedOpc, DL, VT,
1286 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1287 N0.getOperand(0), N1.getOperand(0)));
1288 }
1289 return SDValue();
1290}
1291
1292SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1293 bool AddTo) {
1294 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1295 ++NodesCombined;
1296 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1297 To[0].dump(&DAG);
1298 dbgs() << " and " << NumTo - 1 << " other values\n");
1299 for (unsigned i = 0, e = NumTo; i != e; ++i)
1300 assert((!To[i].getNode() ||
1301 N->getValueType(i) == To[i].getValueType()) &&
1302 "Cannot combine value to value of different type!");
1303
1304 WorklistRemover DeadNodes(*this);
1305 DAG.ReplaceAllUsesWith(N, To);
1306 if (AddTo) {
1307 // Push the new nodes and any users onto the worklist
1308 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1309 if (To[i].getNode())
1310 AddToWorklistWithUsers(To[i].getNode());
1311 }
1312 }
1313
1314 // Finally, if the node is now dead, remove it from the graph. The node
1315 // may not be dead if the replacement process recursively simplified to
1316 // something else needing this node.
1317 if (N->use_empty())
1318 deleteAndRecombine(N);
1319 return SDValue(N, 0);
1320}
1321
1322void DAGCombiner::
1323CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1324 // Replace the old value with the new one.
1325 ++NodesCombined;
1326 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1327 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1328
1329 // Replace all uses.
1330 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1331
1332 // Push the new node and any (possibly new) users onto the worklist.
1333 AddToWorklistWithUsers(TLO.New.getNode());
1334
1335 // Finally, if the node is now dead, remove it from the graph.
1336 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1337}
1338
1339/// Check the specified integer node value to see if it can be simplified or if
1340/// things it uses can be simplified by bit propagation. If so, return true.
1341bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1342 const APInt &DemandedElts,
1343 bool AssumeSingleUse) {
1344 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1345 KnownBits Known;
1346 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1347 AssumeSingleUse))
1348 return false;
1349
1350 // Revisit the node.
1351 AddToWorklist(Op.getNode());
1352
1353 CommitTargetLoweringOpt(TLO);
1354 return true;
1355}
1356
1357/// Check the specified vector node value to see if it can be simplified or
1358/// if things it uses can be simplified as it only uses some of the elements.
1359/// If so, return true.
1360bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1361 const APInt &DemandedElts,
1362 bool AssumeSingleUse) {
1363 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1364 APInt KnownUndef, KnownZero;
1365 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1366 TLO, 0, AssumeSingleUse))
1367 return false;
1368
1369 // Revisit the node.
1370 AddToWorklist(Op.getNode());
1371
1372 CommitTargetLoweringOpt(TLO);
1373 return true;
1374}
1375
1376void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1377 SDLoc DL(Load);
1378 EVT VT = Load->getValueType(0);
1379 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1380
1381 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1382 Trunc.dump(&DAG); dbgs() << '\n');
1383
1384 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1385 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1386
1387 AddToWorklist(Trunc.getNode());
1388 recursivelyDeleteUnusedNodes(Load);
1389}
1390
1391SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1392 Replace = false;
1393 SDLoc DL(Op);
1394 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1395 LoadSDNode *LD = cast<LoadSDNode>(Op);
1396 EVT MemVT = LD->getMemoryVT();
1398 : LD->getExtensionType();
1399 Replace = true;
1400 return DAG.getExtLoad(ExtType, DL, PVT,
1401 LD->getChain(), LD->getBasePtr(),
1402 MemVT, LD->getMemOperand());
1403 }
1404
1405 unsigned Opc = Op.getOpcode();
1406 switch (Opc) {
1407 default: break;
1408 case ISD::AssertSext:
1409 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1410 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1411 break;
1412 case ISD::AssertZext:
1413 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1414 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1415 break;
1416 case ISD::Constant: {
1417 unsigned ExtOpc =
1418 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1419 return DAG.getNode(ExtOpc, DL, PVT, Op);
1420 }
1421 }
1422
1423 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1424 return SDValue();
1425 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1426}
1427
1428SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1430 return SDValue();
1431 EVT OldVT = Op.getValueType();
1432 SDLoc DL(Op);
1433 bool Replace = false;
1434 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1435 if (!NewOp.getNode())
1436 return SDValue();
1437 AddToWorklist(NewOp.getNode());
1438
1439 if (Replace)
1440 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1441 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1442 DAG.getValueType(OldVT));
1443}
1444
1445SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1446 EVT OldVT = Op.getValueType();
1447 SDLoc DL(Op);
1448 bool Replace = false;
1449 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1450 if (!NewOp.getNode())
1451 return SDValue();
1452 AddToWorklist(NewOp.getNode());
1453
1454 if (Replace)
1455 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1456 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1457}
1458
1459/// Promote the specified integer binary operation if the target indicates it is
1460/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1461/// i32 since i16 instructions are longer.
1462SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1463 if (!LegalOperations)
1464 return SDValue();
1465
1466 EVT VT = Op.getValueType();
1467 if (VT.isVector() || !VT.isInteger())
1468 return SDValue();
1469
1470 // If operation type is 'undesirable', e.g. i16 on x86, consider
1471 // promoting it.
1472 unsigned Opc = Op.getOpcode();
1473 if (TLI.isTypeDesirableForOp(Opc, VT))
1474 return SDValue();
1475
1476 EVT PVT = VT;
1477 // Consult target whether it is a good idea to promote this operation and
1478 // what's the right type to promote it to.
1479 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1480 assert(PVT != VT && "Don't know what type to promote to!");
1481
1482 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1483
1484 bool Replace0 = false;
1485 SDValue N0 = Op.getOperand(0);
1486 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1487
1488 bool Replace1 = false;
1489 SDValue N1 = Op.getOperand(1);
1490 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1491 SDLoc DL(Op);
1492
1493 SDValue RV =
1494 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1495
1496 // We are always replacing N0/N1's use in N and only need additional
1497 // replacements if there are additional uses.
1498 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1499 // (SDValue) here because the node may reference multiple values
1500 // (for example, the chain value of a load node).
1501 Replace0 &= !N0->hasOneUse();
1502 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1503
1504 // Combine Op here so it is preserved past replacements.
1505 CombineTo(Op.getNode(), RV);
1506
1507 // If operands have a use ordering, make sure we deal with
1508 // predecessor first.
1509 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1510 std::swap(N0, N1);
1511 std::swap(NN0, NN1);
1512 }
1513
1514 if (Replace0) {
1515 AddToWorklist(NN0.getNode());
1516 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1517 }
1518 if (Replace1) {
1519 AddToWorklist(NN1.getNode());
1520 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1521 }
1522 return Op;
1523 }
1524 return SDValue();
1525}
1526
1527/// Promote the specified integer shift operation if the target indicates it is
1528/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1529/// i32 since i16 instructions are longer.
1530SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1531 if (!LegalOperations)
1532 return SDValue();
1533
1534 EVT VT = Op.getValueType();
1535 if (VT.isVector() || !VT.isInteger())
1536 return SDValue();
1537
1538 // If operation type is 'undesirable', e.g. i16 on x86, consider
1539 // promoting it.
1540 unsigned Opc = Op.getOpcode();
1541 if (TLI.isTypeDesirableForOp(Opc, VT))
1542 return SDValue();
1543
1544 EVT PVT = VT;
1545 // Consult target whether it is a good idea to promote this operation and
1546 // what's the right type to promote it to.
1547 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1548 assert(PVT != VT && "Don't know what type to promote to!");
1549
1550 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1551
1552 bool Replace = false;
1553 SDValue N0 = Op.getOperand(0);
1554 if (Opc == ISD::SRA)
1555 N0 = SExtPromoteOperand(N0, PVT);
1556 else if (Opc == ISD::SRL)
1557 N0 = ZExtPromoteOperand(N0, PVT);
1558 else
1559 N0 = PromoteOperand(N0, PVT, Replace);
1560
1561 if (!N0.getNode())
1562 return SDValue();
1563
1564 SDLoc DL(Op);
1565 SDValue N1 = Op.getOperand(1);
1566 SDValue RV =
1567 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1568
1569 if (Replace)
1570 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1571
1572 // Deal with Op being deleted.
1573 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1574 return RV;
1575 }
1576 return SDValue();
1577}
1578
1579SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1580 if (!LegalOperations)
1581 return SDValue();
1582
1583 EVT VT = Op.getValueType();
1584 if (VT.isVector() || !VT.isInteger())
1585 return SDValue();
1586
1587 // If operation type is 'undesirable', e.g. i16 on x86, consider
1588 // promoting it.
1589 unsigned Opc = Op.getOpcode();
1590 if (TLI.isTypeDesirableForOp(Opc, VT))
1591 return SDValue();
1592
1593 EVT PVT = VT;
1594 // Consult target whether it is a good idea to promote this operation and
1595 // what's the right type to promote it to.
1596 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1597 assert(PVT != VT && "Don't know what type to promote to!");
1598 // fold (aext (aext x)) -> (aext x)
1599 // fold (aext (zext x)) -> (zext x)
1600 // fold (aext (sext x)) -> (sext x)
1601 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1602 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1603 }
1604 return SDValue();
1605}
1606
1607bool DAGCombiner::PromoteLoad(SDValue Op) {
1608 if (!LegalOperations)
1609 return false;
1610
1611 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1612 return false;
1613
1614 EVT VT = Op.getValueType();
1615 if (VT.isVector() || !VT.isInteger())
1616 return false;
1617
1618 // If operation type is 'undesirable', e.g. i16 on x86, consider
1619 // promoting it.
1620 unsigned Opc = Op.getOpcode();
1621 if (TLI.isTypeDesirableForOp(Opc, VT))
1622 return false;
1623
1624 EVT PVT = VT;
1625 // Consult target whether it is a good idea to promote this operation and
1626 // what's the right type to promote it to.
1627 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1628 assert(PVT != VT && "Don't know what type to promote to!");
1629
1630 SDLoc DL(Op);
1631 SDNode *N = Op.getNode();
1632 LoadSDNode *LD = cast<LoadSDNode>(N);
1633 EVT MemVT = LD->getMemoryVT();
1635 : LD->getExtensionType();
1636 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1637 LD->getChain(), LD->getBasePtr(),
1638 MemVT, LD->getMemOperand());
1639 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1640
1641 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1642 Result.dump(&DAG); dbgs() << '\n');
1643
1645 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1646
1647 AddToWorklist(Result.getNode());
1648 recursivelyDeleteUnusedNodes(N);
1649 return true;
1650 }
1651
1652 return false;
1653}
1654
1655/// Recursively delete a node which has no uses and any operands for
1656/// which it is the only use.
1657///
1658/// Note that this both deletes the nodes and removes them from the worklist.
1659/// It also adds any nodes who have had a user deleted to the worklist as they
1660/// may now have only one use and subject to other combines.
1661bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1662 if (!N->use_empty())
1663 return false;
1664
1666 Nodes.insert(N);
1667 do {
1668 N = Nodes.pop_back_val();
1669 if (!N)
1670 continue;
1671
1672 if (N->use_empty()) {
1673 for (const SDValue &ChildN : N->op_values())
1674 Nodes.insert(ChildN.getNode());
1675
1676 removeFromWorklist(N);
1677 DAG.DeleteNode(N);
1678 } else {
1679 AddToWorklist(N);
1680 }
1681 } while (!Nodes.empty());
1682 return true;
1683}
1684
1685//===----------------------------------------------------------------------===//
1686// Main DAG Combiner implementation
1687//===----------------------------------------------------------------------===//
1688
1689void DAGCombiner::Run(CombineLevel AtLevel) {
1690 // set the instance variables, so that the various visit routines may use it.
1691 Level = AtLevel;
1692 LegalDAG = Level >= AfterLegalizeDAG;
1693 LegalOperations = Level >= AfterLegalizeVectorOps;
1694 LegalTypes = Level >= AfterLegalizeTypes;
1695
1696 WorklistInserter AddNodes(*this);
1697
1698 // Add all the dag nodes to the worklist.
1699 //
1700 // Note: All nodes are not added to PruningList here, this is because the only
1701 // nodes which can be deleted are those which have no uses and all other nodes
1702 // which would otherwise be added to the worklist by the first call to
1703 // getNextWorklistEntry are already present in it.
1704 for (SDNode &Node : DAG.allnodes())
1705 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1706
1707 // Create a dummy node (which is not added to allnodes), that adds a reference
1708 // to the root node, preventing it from being deleted, and tracking any
1709 // changes of the root.
1710 HandleSDNode Dummy(DAG.getRoot());
1711
1712 // While we have a valid worklist entry node, try to combine it.
1713 while (SDNode *N = getNextWorklistEntry()) {
1714 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1715 // N is deleted from the DAG, since they too may now be dead or may have a
1716 // reduced number of uses, allowing other xforms.
1717 if (recursivelyDeleteUnusedNodes(N))
1718 continue;
1719
1720 WorklistRemover DeadNodes(*this);
1721
1722 // If this combine is running after legalizing the DAG, re-legalize any
1723 // nodes pulled off the worklist.
1724 if (LegalDAG) {
1725 SmallSetVector<SDNode *, 16> UpdatedNodes;
1726 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1727
1728 for (SDNode *LN : UpdatedNodes)
1729 AddToWorklistWithUsers(LN);
1730
1731 if (!NIsValid)
1732 continue;
1733 }
1734
1735 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1736
1737 // Add any operands of the new node which have not yet been combined to the
1738 // worklist as well. Because the worklist uniques things already, this
1739 // won't repeatedly process the same operand.
1740 for (const SDValue &ChildN : N->op_values())
1741 if (!CombinedNodes.count(ChildN.getNode()))
1742 AddToWorklist(ChildN.getNode());
1743
1744 CombinedNodes.insert(N);
1745 SDValue RV = combine(N);
1746
1747 if (!RV.getNode())
1748 continue;
1749
1750 ++NodesCombined;
1751
1752 // If we get back the same node we passed in, rather than a new node or
1753 // zero, we know that the node must have defined multiple values and
1754 // CombineTo was used. Since CombineTo takes care of the worklist
1755 // mechanics for us, we have no work to do in this case.
1756 if (RV.getNode() == N)
1757 continue;
1758
1759 assert(N->getOpcode() != ISD::DELETED_NODE &&
1760 RV.getOpcode() != ISD::DELETED_NODE &&
1761 "Node was deleted but visit returned new node!");
1762
1763 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1764
1765 if (N->getNumValues() == RV->getNumValues())
1766 DAG.ReplaceAllUsesWith(N, RV.getNode());
1767 else {
1768 assert(N->getValueType(0) == RV.getValueType() &&
1769 N->getNumValues() == 1 && "Type mismatch");
1770 DAG.ReplaceAllUsesWith(N, &RV);
1771 }
1772
1773 // Push the new node and any users onto the worklist. Omit this if the
1774 // new node is the EntryToken (e.g. if a store managed to get optimized
1775 // out), because re-visiting the EntryToken and its users will not uncover
1776 // any additional opportunities, but there may be a large number of such
1777 // users, potentially causing compile time explosion.
1778 if (RV.getOpcode() != ISD::EntryToken)
1779 AddToWorklistWithUsers(RV.getNode());
1780
1781 // Finally, if the node is now dead, remove it from the graph. The node
1782 // may not be dead if the replacement process recursively simplified to
1783 // something else needing this node. This will also take care of adding any
1784 // operands which have lost a user to the worklist.
1785 recursivelyDeleteUnusedNodes(N);
1786 }
1787
1788 // If the root changed (e.g. it was a dead load, update the root).
1789 DAG.setRoot(Dummy.getValue());
1790 DAG.RemoveDeadNodes();
1791}
1792
1793SDValue DAGCombiner::visit(SDNode *N) {
1794 // clang-format off
1795 switch (N->getOpcode()) {
1796 default: break;
1797 case ISD::TokenFactor: return visitTokenFactor(N);
1798 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1799 case ISD::ADD: return visitADD(N);
1800 case ISD::SUB: return visitSUB(N);
1801 case ISD::SADDSAT:
1802 case ISD::UADDSAT: return visitADDSAT(N);
1803 case ISD::SSUBSAT:
1804 case ISD::USUBSAT: return visitSUBSAT(N);
1805 case ISD::ADDC: return visitADDC(N);
1806 case ISD::SADDO:
1807 case ISD::UADDO: return visitADDO(N);
1808 case ISD::SUBC: return visitSUBC(N);
1809 case ISD::SSUBO:
1810 case ISD::USUBO: return visitSUBO(N);
1811 case ISD::ADDE: return visitADDE(N);
1812 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1813 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1814 case ISD::SUBE: return visitSUBE(N);
1815 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1816 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1817 case ISD::SMULFIX:
1818 case ISD::SMULFIXSAT:
1819 case ISD::UMULFIX:
1820 case ISD::UMULFIXSAT: return visitMULFIX(N);
1821 case ISD::MUL: return visitMUL(N);
1822 case ISD::SDIV: return visitSDIV(N);
1823 case ISD::UDIV: return visitUDIV(N);
1824 case ISD::SREM:
1825 case ISD::UREM: return visitREM(N);
1826 case ISD::MULHU: return visitMULHU(N);
1827 case ISD::MULHS: return visitMULHS(N);
1828 case ISD::AVGFLOORS:
1829 case ISD::AVGFLOORU:
1830 case ISD::AVGCEILS:
1831 case ISD::AVGCEILU: return visitAVG(N);
1832 case ISD::ABDS:
1833 case ISD::ABDU: return visitABD(N);
1834 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1835 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1836 case ISD::SMULO:
1837 case ISD::UMULO: return visitMULO(N);
1838 case ISD::SMIN:
1839 case ISD::SMAX:
1840 case ISD::UMIN:
1841 case ISD::UMAX: return visitIMINMAX(N);
1842 case ISD::AND: return visitAND(N);
1843 case ISD::OR: return visitOR(N);
1844 case ISD::XOR: return visitXOR(N);
1845 case ISD::SHL: return visitSHL(N);
1846 case ISD::SRA: return visitSRA(N);
1847 case ISD::SRL: return visitSRL(N);
1848 case ISD::ROTR:
1849 case ISD::ROTL: return visitRotate(N);
1850 case ISD::FSHL:
1851 case ISD::FSHR: return visitFunnelShift(N);
1852 case ISD::SSHLSAT:
1853 case ISD::USHLSAT: return visitSHLSAT(N);
1854 case ISD::ABS: return visitABS(N);
1855 case ISD::BSWAP: return visitBSWAP(N);
1856 case ISD::BITREVERSE: return visitBITREVERSE(N);
1857 case ISD::CTLZ: return visitCTLZ(N);
1858 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1859 case ISD::CTTZ: return visitCTTZ(N);
1860 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1861 case ISD::CTPOP: return visitCTPOP(N);
1862 case ISD::SELECT: return visitSELECT(N);
1863 case ISD::VSELECT: return visitVSELECT(N);
1864 case ISD::SELECT_CC: return visitSELECT_CC(N);
1865 case ISD::SETCC: return visitSETCC(N);
1866 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1867 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1868 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1869 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1870 case ISD::AssertSext:
1871 case ISD::AssertZext: return visitAssertExt(N);
1872 case ISD::AssertAlign: return visitAssertAlign(N);
1873 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1876 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1877 case ISD::TRUNCATE: return visitTRUNCATE(N);
1878 case ISD::BITCAST: return visitBITCAST(N);
1879 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1880 case ISD::FADD: return visitFADD(N);
1881 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1882 case ISD::FSUB: return visitFSUB(N);
1883 case ISD::FMUL: return visitFMUL(N);
1884 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1885 case ISD::FMAD: return visitFMAD(N);
1886 case ISD::FDIV: return visitFDIV(N);
1887 case ISD::FREM: return visitFREM(N);
1888 case ISD::FSQRT: return visitFSQRT(N);
1889 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1890 case ISD::FPOW: return visitFPOW(N);
1891 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1892 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1893 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1894 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1895 case ISD::LRINT:
1896 case ISD::LLRINT: return visitXRINT(N);
1897 case ISD::FP_ROUND: return visitFP_ROUND(N);
1898 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1899 case ISD::FNEG: return visitFNEG(N);
1900 case ISD::FABS: return visitFABS(N);
1901 case ISD::FFLOOR: return visitFFLOOR(N);
1902 case ISD::FMINNUM:
1903 case ISD::FMAXNUM:
1904 case ISD::FMINIMUM:
1905 case ISD::FMAXIMUM: return visitFMinMax(N);
1906 case ISD::FCEIL: return visitFCEIL(N);
1907 case ISD::FTRUNC: return visitFTRUNC(N);
1908 case ISD::FFREXP: return visitFFREXP(N);
1909 case ISD::BRCOND: return visitBRCOND(N);
1910 case ISD::BR_CC: return visitBR_CC(N);
1911 case ISD::LOAD: return visitLOAD(N);
1912 case ISD::STORE: return visitSTORE(N);
1913 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1914 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1915 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1916 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1917 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1918 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1919 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1920 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1921 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1922 case ISD::MGATHER: return visitMGATHER(N);
1923 case ISD::MLOAD: return visitMLOAD(N);
1924 case ISD::MSCATTER: return visitMSCATTER(N);
1925 case ISD::MSTORE: return visitMSTORE(N);
1926 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1927 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1928 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1929 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1930 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1931 case ISD::FREEZE: return visitFREEZE(N);
1932 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1933 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1936 case ISD::VECREDUCE_ADD:
1937 case ISD::VECREDUCE_MUL:
1938 case ISD::VECREDUCE_AND:
1939 case ISD::VECREDUCE_OR:
1940 case ISD::VECREDUCE_XOR:
1948 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1949#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1950#include "llvm/IR/VPIntrinsics.def"
1951 return visitVPOp(N);
1952 }
1953 // clang-format on
1954 return SDValue();
1955}
1956
1957SDValue DAGCombiner::combine(SDNode *N) {
1958 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1959 return SDValue();
1960
1961 SDValue RV;
1962 if (!DisableGenericCombines)
1963 RV = visit(N);
1964
1965 // If nothing happened, try a target-specific DAG combine.
1966 if (!RV.getNode()) {
1967 assert(N->getOpcode() != ISD::DELETED_NODE &&
1968 "Node was deleted but visit returned NULL!");
1969
1970 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1971 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1972
1973 // Expose the DAG combiner to the target combiner impls.
1975 DagCombineInfo(DAG, Level, false, this);
1976
1977 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1978 }
1979 }
1980
1981 // If nothing happened still, try promoting the operation.
1982 if (!RV.getNode()) {
1983 switch (N->getOpcode()) {
1984 default: break;
1985 case ISD::ADD:
1986 case ISD::SUB:
1987 case ISD::MUL:
1988 case ISD::AND:
1989 case ISD::OR:
1990 case ISD::XOR:
1991 RV = PromoteIntBinOp(SDValue(N, 0));
1992 break;
1993 case ISD::SHL:
1994 case ISD::SRA:
1995 case ISD::SRL:
1996 RV = PromoteIntShiftOp(SDValue(N, 0));
1997 break;
1998 case ISD::SIGN_EXTEND:
1999 case ISD::ZERO_EXTEND:
2000 case ISD::ANY_EXTEND:
2001 RV = PromoteExtend(SDValue(N, 0));
2002 break;
2003 case ISD::LOAD:
2004 if (PromoteLoad(SDValue(N, 0)))
2005 RV = SDValue(N, 0);
2006 break;
2007 }
2008 }
2009
2010 // If N is a commutative binary node, try to eliminate it if the commuted
2011 // version is already present in the DAG.
2012 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2013 SDValue N0 = N->getOperand(0);
2014 SDValue N1 = N->getOperand(1);
2015
2016 // Constant operands are canonicalized to RHS.
2017 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2018 SDValue Ops[] = {N1, N0};
2019 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2020 N->getFlags());
2021 if (CSENode)
2022 return SDValue(CSENode, 0);
2023 }
2024 }
2025
2026 return RV;
2027}
2028
2029/// Given a node, return its input chain if it has one, otherwise return a null
2030/// sd operand.
2032 if (unsigned NumOps = N->getNumOperands()) {
2033 if (N->getOperand(0).getValueType() == MVT::Other)
2034 return N->getOperand(0);
2035 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2036 return N->getOperand(NumOps-1);
2037 for (unsigned i = 1; i < NumOps-1; ++i)
2038 if (N->getOperand(i).getValueType() == MVT::Other)
2039 return N->getOperand(i);
2040 }
2041 return SDValue();
2042}
2043
2044SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2045 // If N has two operands, where one has an input chain equal to the other,
2046 // the 'other' chain is redundant.
2047 if (N->getNumOperands() == 2) {
2048 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2049 return N->getOperand(0);
2050 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2051 return N->getOperand(1);
2052 }
2053
2054 // Don't simplify token factors if optnone.
2055 if (OptLevel == CodeGenOptLevel::None)
2056 return SDValue();
2057
2058 // Don't simplify the token factor if the node itself has too many operands.
2059 if (N->getNumOperands() > TokenFactorInlineLimit)
2060 return SDValue();
2061
2062 // If the sole user is a token factor, we should make sure we have a
2063 // chance to merge them together. This prevents TF chains from inhibiting
2064 // optimizations.
2065 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2066 AddToWorklist(*(N->use_begin()));
2067
2068 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2069 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2071 bool Changed = false; // If we should replace this token factor.
2072
2073 // Start out with this token factor.
2074 TFs.push_back(N);
2075
2076 // Iterate through token factors. The TFs grows when new token factors are
2077 // encountered.
2078 for (unsigned i = 0; i < TFs.size(); ++i) {
2079 // Limit number of nodes to inline, to avoid quadratic compile times.
2080 // We have to add the outstanding Token Factors to Ops, otherwise we might
2081 // drop Ops from the resulting Token Factors.
2082 if (Ops.size() > TokenFactorInlineLimit) {
2083 for (unsigned j = i; j < TFs.size(); j++)
2084 Ops.emplace_back(TFs[j], 0);
2085 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2086 // combiner worklist later.
2087 TFs.resize(i);
2088 break;
2089 }
2090
2091 SDNode *TF = TFs[i];
2092 // Check each of the operands.
2093 for (const SDValue &Op : TF->op_values()) {
2094 switch (Op.getOpcode()) {
2095 case ISD::EntryToken:
2096 // Entry tokens don't need to be added to the list. They are
2097 // redundant.
2098 Changed = true;
2099 break;
2100
2101 case ISD::TokenFactor:
2102 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2103 // Queue up for processing.
2104 TFs.push_back(Op.getNode());
2105 Changed = true;
2106 break;
2107 }
2108 [[fallthrough]];
2109
2110 default:
2111 // Only add if it isn't already in the list.
2112 if (SeenOps.insert(Op.getNode()).second)
2113 Ops.push_back(Op);
2114 else
2115 Changed = true;
2116 break;
2117 }
2118 }
2119 }
2120
2121 // Re-visit inlined Token Factors, to clean them up in case they have been
2122 // removed. Skip the first Token Factor, as this is the current node.
2123 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2124 AddToWorklist(TFs[i]);
2125
2126 // Remove Nodes that are chained to another node in the list. Do so
2127 // by walking up chains breath-first stopping when we've seen
2128 // another operand. In general we must climb to the EntryNode, but we can exit
2129 // early if we find all remaining work is associated with just one operand as
2130 // no further pruning is possible.
2131
2132 // List of nodes to search through and original Ops from which they originate.
2134 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2135 SmallPtrSet<SDNode *, 16> SeenChains;
2136 bool DidPruneOps = false;
2137
2138 unsigned NumLeftToConsider = 0;
2139 for (const SDValue &Op : Ops) {
2140 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2141 OpWorkCount.push_back(1);
2142 }
2143
2144 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2145 // If this is an Op, we can remove the op from the list. Remark any
2146 // search associated with it as from the current OpNumber.
2147 if (SeenOps.contains(Op)) {
2148 Changed = true;
2149 DidPruneOps = true;
2150 unsigned OrigOpNumber = 0;
2151 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2152 OrigOpNumber++;
2153 assert((OrigOpNumber != Ops.size()) &&
2154 "expected to find TokenFactor Operand");
2155 // Re-mark worklist from OrigOpNumber to OpNumber
2156 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2157 if (Worklist[i].second == OrigOpNumber) {
2158 Worklist[i].second = OpNumber;
2159 }
2160 }
2161 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2162 OpWorkCount[OrigOpNumber] = 0;
2163 NumLeftToConsider--;
2164 }
2165 // Add if it's a new chain
2166 if (SeenChains.insert(Op).second) {
2167 OpWorkCount[OpNumber]++;
2168 Worklist.push_back(std::make_pair(Op, OpNumber));
2169 }
2170 };
2171
2172 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2173 // We need at least be consider at least 2 Ops to prune.
2174 if (NumLeftToConsider <= 1)
2175 break;
2176 auto CurNode = Worklist[i].first;
2177 auto CurOpNumber = Worklist[i].second;
2178 assert((OpWorkCount[CurOpNumber] > 0) &&
2179 "Node should not appear in worklist");
2180 switch (CurNode->getOpcode()) {
2181 case ISD::EntryToken:
2182 // Hitting EntryToken is the only way for the search to terminate without
2183 // hitting
2184 // another operand's search. Prevent us from marking this operand
2185 // considered.
2186 NumLeftToConsider++;
2187 break;
2188 case ISD::TokenFactor:
2189 for (const SDValue &Op : CurNode->op_values())
2190 AddToWorklist(i, Op.getNode(), CurOpNumber);
2191 break;
2193 case ISD::LIFETIME_END:
2194 case ISD::CopyFromReg:
2195 case ISD::CopyToReg:
2196 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2197 break;
2198 default:
2199 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2200 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2201 break;
2202 }
2203 OpWorkCount[CurOpNumber]--;
2204 if (OpWorkCount[CurOpNumber] == 0)
2205 NumLeftToConsider--;
2206 }
2207
2208 // If we've changed things around then replace token factor.
2209 if (Changed) {
2211 if (Ops.empty()) {
2212 // The entry token is the only possible outcome.
2213 Result = DAG.getEntryNode();
2214 } else {
2215 if (DidPruneOps) {
2216 SmallVector<SDValue, 8> PrunedOps;
2217 //
2218 for (const SDValue &Op : Ops) {
2219 if (SeenChains.count(Op.getNode()) == 0)
2220 PrunedOps.push_back(Op);
2221 }
2222 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2223 } else {
2224 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2225 }
2226 }
2227 return Result;
2228 }
2229 return SDValue();
2230}
2231
2232/// MERGE_VALUES can always be eliminated.
2233SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2234 WorklistRemover DeadNodes(*this);
2235 // Replacing results may cause a different MERGE_VALUES to suddenly
2236 // be CSE'd with N, and carry its uses with it. Iterate until no
2237 // uses remain, to ensure that the node can be safely deleted.
2238 // First add the users of this node to the work list so that they
2239 // can be tried again once they have new operands.
2240 AddUsersToWorklist(N);
2241 do {
2242 // Do as a single replacement to avoid rewalking use lists.
2244 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2245 Ops.push_back(N->getOperand(i));
2246 DAG.ReplaceAllUsesWith(N, Ops.data());
2247 } while (!N->use_empty());
2248 deleteAndRecombine(N);
2249 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2250}
2251
2252/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2253/// ConstantSDNode pointer else nullptr.
2255 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2256 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2257}
2258
2259// isTruncateOf - If N is a truncate of some other value, return true, record
2260// the value being truncated in Op and which of Op's bits are zero/one in Known.
2261// This function computes KnownBits to avoid a duplicated call to
2262// computeKnownBits in the caller.
2264 KnownBits &Known) {
2265 if (N->getOpcode() == ISD::TRUNCATE) {
2266 Op = N->getOperand(0);
2267 Known = DAG.computeKnownBits(Op);
2268 return true;
2269 }
2270
2271 if (N.getOpcode() != ISD::SETCC ||
2272 N.getValueType().getScalarType() != MVT::i1 ||
2273 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2274 return false;
2275
2276 SDValue Op0 = N->getOperand(0);
2277 SDValue Op1 = N->getOperand(1);
2278 assert(Op0.getValueType() == Op1.getValueType());
2279
2280 if (isNullOrNullSplat(Op0))
2281 Op = Op1;
2282 else if (isNullOrNullSplat(Op1))
2283 Op = Op0;
2284 else
2285 return false;
2286
2287 Known = DAG.computeKnownBits(Op);
2288
2289 return (Known.Zero | 1).isAllOnes();
2290}
2291
2292/// Return true if 'Use' is a load or a store that uses N as its base pointer
2293/// and that N may be folded in the load / store addressing mode.
2295 const TargetLowering &TLI) {
2296 EVT VT;
2297 unsigned AS;
2298
2299 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2300 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2301 return false;
2302 VT = LD->getMemoryVT();
2303 AS = LD->getAddressSpace();
2304 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2305 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2306 return false;
2307 VT = ST->getMemoryVT();
2308 AS = ST->getAddressSpace();
2309 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2310 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2311 return false;
2312 VT = LD->getMemoryVT();
2313 AS = LD->getAddressSpace();
2314 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2315 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2316 return false;
2317 VT = ST->getMemoryVT();
2318 AS = ST->getAddressSpace();
2319 } else {
2320 return false;
2321 }
2322
2324 if (N->getOpcode() == ISD::ADD) {
2325 AM.HasBaseReg = true;
2326 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2327 if (Offset)
2328 // [reg +/- imm]
2329 AM.BaseOffs = Offset->getSExtValue();
2330 else
2331 // [reg +/- reg]
2332 AM.Scale = 1;
2333 } else if (N->getOpcode() == ISD::SUB) {
2334 AM.HasBaseReg = true;
2335 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2336 if (Offset)
2337 // [reg +/- imm]
2338 AM.BaseOffs = -Offset->getSExtValue();
2339 else
2340 // [reg +/- reg]
2341 AM.Scale = 1;
2342 } else {
2343 return false;
2344 }
2345
2346 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2347 VT.getTypeForEVT(*DAG.getContext()), AS);
2348}
2349
2350/// This inverts a canonicalization in IR that replaces a variable select arm
2351/// with an identity constant. Codegen improves if we re-use the variable
2352/// operand rather than load a constant. This can also be converted into a
2353/// masked vector operation if the target supports it.
2355 bool ShouldCommuteOperands) {
2356 // Match a select as operand 1. The identity constant that we are looking for
2357 // is only valid as operand 1 of a non-commutative binop.
2358 SDValue N0 = N->getOperand(0);
2359 SDValue N1 = N->getOperand(1);
2360 if (ShouldCommuteOperands)
2361 std::swap(N0, N1);
2362
2363 // TODO: Should this apply to scalar select too?
2364 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2365 return SDValue();
2366
2367 // We can't hoist all instructions because of immediate UB (not speculatable).
2368 // For example div/rem by zero.
2370 return SDValue();
2371
2372 unsigned Opcode = N->getOpcode();
2373 EVT VT = N->getValueType(0);
2374 SDValue Cond = N1.getOperand(0);
2375 SDValue TVal = N1.getOperand(1);
2376 SDValue FVal = N1.getOperand(2);
2377
2378 // This transform increases uses of N0, so freeze it to be safe.
2379 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2380 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2381 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2382 SDValue F0 = DAG.getFreeze(N0);
2383 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2384 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2385 }
2386 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2387 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2388 SDValue F0 = DAG.getFreeze(N0);
2389 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2390 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2391 }
2392
2393 return SDValue();
2394}
2395
2396SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2397 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2398 "Unexpected binary operator");
2399
2400 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2401 auto BinOpcode = BO->getOpcode();
2402 EVT VT = BO->getValueType(0);
2403 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2404 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2405 return Sel;
2406
2407 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2408 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2409 return Sel;
2410 }
2411
2412 // Don't do this unless the old select is going away. We want to eliminate the
2413 // binary operator, not replace a binop with a select.
2414 // TODO: Handle ISD::SELECT_CC.
2415 unsigned SelOpNo = 0;
2416 SDValue Sel = BO->getOperand(0);
2417 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2418 SelOpNo = 1;
2419 Sel = BO->getOperand(1);
2420
2421 // Peek through trunc to shift amount type.
2422 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2423 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2424 // This is valid when the truncated bits of x are already zero.
2425 SDValue Op;
2426 KnownBits Known;
2427 if (isTruncateOf(DAG, Sel, Op, Known) &&
2429 Sel = Op;
2430 }
2431 }
2432
2433 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2434 return SDValue();
2435
2436 SDValue CT = Sel.getOperand(1);
2437 if (!isConstantOrConstantVector(CT, true) &&
2439 return SDValue();
2440
2441 SDValue CF = Sel.getOperand(2);
2442 if (!isConstantOrConstantVector(CF, true) &&
2444 return SDValue();
2445
2446 // Bail out if any constants are opaque because we can't constant fold those.
2447 // The exception is "and" and "or" with either 0 or -1 in which case we can
2448 // propagate non constant operands into select. I.e.:
2449 // and (select Cond, 0, -1), X --> select Cond, 0, X
2450 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2451 bool CanFoldNonConst =
2452 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2455
2456 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2457 if (!CanFoldNonConst &&
2458 !isConstantOrConstantVector(CBO, true) &&
2460 return SDValue();
2461
2462 SDLoc DL(Sel);
2463 SDValue NewCT, NewCF;
2464
2465 if (CanFoldNonConst) {
2466 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2467 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2468 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2469 NewCT = CT;
2470 else
2471 NewCT = CBO;
2472
2473 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2474 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2475 NewCF = CF;
2476 else
2477 NewCF = CBO;
2478 } else {
2479 // We have a select-of-constants followed by a binary operator with a
2480 // constant. Eliminate the binop by pulling the constant math into the
2481 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2482 // CBO, CF + CBO
2483 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2484 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2485 if (!NewCT)
2486 return SDValue();
2487
2488 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2489 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2490 if (!NewCF)
2491 return SDValue();
2492 }
2493
2494 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2495 SelectOp->setFlags(BO->getFlags());
2496 return SelectOp;
2497}
2498
2500 SelectionDAG &DAG) {
2501 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2502 "Expecting add or sub");
2503
2504 // Match a constant operand and a zext operand for the math instruction:
2505 // add Z, C
2506 // sub C, Z
2507 bool IsAdd = N->getOpcode() == ISD::ADD;
2508 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2509 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2510 auto *CN = dyn_cast<ConstantSDNode>(C);
2511 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2512 return SDValue();
2513
2514 // Match the zext operand as a setcc of a boolean.
2515 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2516 Z.getOperand(0).getValueType() != MVT::i1)
2517 return SDValue();
2518
2519 // Match the compare as: setcc (X & 1), 0, eq.
2520 SDValue SetCC = Z.getOperand(0);
2521 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2522 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2523 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2524 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2525 return SDValue();
2526
2527 // We are adding/subtracting a constant and an inverted low bit. Turn that
2528 // into a subtract/add of the low bit with incremented/decremented constant:
2529 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2530 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2531 EVT VT = C.getValueType();
2532 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2533 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2534 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2535 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2536}
2537
2538// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2539SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2540 SDValue N0 = N->getOperand(0);
2541 EVT VT = N0.getValueType();
2542 SDValue A, B;
2543
2544 if (hasOperation(ISD::AVGCEILU, VT) &&
2547 m_SpecificInt(1))))) {
2548 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2549 }
2550 if (hasOperation(ISD::AVGCEILS, VT) &&
2553 m_SpecificInt(1))))) {
2554 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2555 }
2556 return SDValue();
2557}
2558
2559/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2560/// a shift and add with a different constant.
2562 SelectionDAG &DAG) {
2563 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2564 "Expecting add or sub");
2565
2566 // We need a constant operand for the add/sub, and the other operand is a
2567 // logical shift right: add (srl), C or sub C, (srl).
2568 bool IsAdd = N->getOpcode() == ISD::ADD;
2569 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2570 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2571 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2572 ShiftOp.getOpcode() != ISD::SRL)
2573 return SDValue();
2574
2575 // The shift must be of a 'not' value.
2576 SDValue Not = ShiftOp.getOperand(0);
2577 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2578 return SDValue();
2579
2580 // The shift must be moving the sign bit to the least-significant-bit.
2581 EVT VT = ShiftOp.getValueType();
2582 SDValue ShAmt = ShiftOp.getOperand(1);
2583 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2584 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2585 return SDValue();
2586
2587 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2588 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2589 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2590 if (SDValue NewC = DAG.FoldConstantArithmetic(
2591 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2592 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2593 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2594 Not.getOperand(0), ShAmt);
2595 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2596 }
2597
2598 return SDValue();
2599}
2600
2601static bool
2603 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2604 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2605}
2606
2607/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2608/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2609/// are no common bits set in the operands).
2610SDValue DAGCombiner::visitADDLike(SDNode *N) {
2611 SDValue N0 = N->getOperand(0);
2612 SDValue N1 = N->getOperand(1);
2613 EVT VT = N0.getValueType();
2614 SDLoc DL(N);
2615
2616 // fold (add x, undef) -> undef
2617 if (N0.isUndef())
2618 return N0;
2619 if (N1.isUndef())
2620 return N1;
2621
2622 // fold (add c1, c2) -> c1+c2
2623 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2624 return C;
2625
2626 // canonicalize constant to RHS
2629 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2630
2631 if (areBitwiseNotOfEachother(N0, N1))
2633 SDLoc(N), VT);
2634
2635 // fold vector ops
2636 if (VT.isVector()) {
2637 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2638 return FoldedVOp;
2639
2640 // fold (add x, 0) -> x, vector edition
2642 return N0;
2643 }
2644
2645 // fold (add x, 0) -> x
2646 if (isNullConstant(N1))
2647 return N0;
2648
2649 if (N0.getOpcode() == ISD::SUB) {
2650 SDValue N00 = N0.getOperand(0);
2651 SDValue N01 = N0.getOperand(1);
2652
2653 // fold ((A-c1)+c2) -> (A+(c2-c1))
2654 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2655 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2656
2657 // fold ((c1-A)+c2) -> (c1+c2)-A
2658 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2659 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2660 }
2661
2662 // add (sext i1 X), 1 -> zext (not i1 X)
2663 // We don't transform this pattern:
2664 // add (zext i1 X), -1 -> sext (not i1 X)
2665 // because most (?) targets generate better code for the zext form.
2666 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2667 isOneOrOneSplat(N1)) {
2668 SDValue X = N0.getOperand(0);
2669 if ((!LegalOperations ||
2670 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2672 X.getScalarValueSizeInBits() == 1) {
2673 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2674 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2675 }
2676 }
2677
2678 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2679 // iff (or x, c0) is equivalent to (add x, c0).
2680 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2681 // iff (xor x, c0) is equivalent to (add x, c0).
2682 if (DAG.isADDLike(N0)) {
2683 SDValue N01 = N0.getOperand(1);
2684 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2685 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2686 }
2687
2688 if (SDValue NewSel = foldBinOpIntoSelect(N))
2689 return NewSel;
2690
2691 // reassociate add
2692 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2693 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2694 return RADD;
2695
2696 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2697 // equivalent to (add x, c).
2698 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2699 // equivalent to (add x, c).
2700 // Do this optimization only when adding c does not introduce instructions
2701 // for adding carries.
2702 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2703 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2704 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2705 // If N0's type does not split or is a sign mask, it does not introduce
2706 // add carry.
2707 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2708 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2711 if (NoAddCarry)
2712 return DAG.getNode(
2713 ISD::ADD, DL, VT,
2714 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2715 N0.getOperand(1));
2716 }
2717 return SDValue();
2718 };
2719 if (SDValue Add = ReassociateAddOr(N0, N1))
2720 return Add;
2721 if (SDValue Add = ReassociateAddOr(N1, N0))
2722 return Add;
2723
2724 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2725 if (SDValue SD =
2726 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2727 return SD;
2728 }
2729
2730 SDValue A, B, C;
2731
2732 // fold ((0-A) + B) -> B-A
2733 if (sd_match(N0, m_Neg(m_Value(A))))
2734 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2735
2736 // fold (A + (0-B)) -> A-B
2737 if (sd_match(N1, m_Neg(m_Value(B))))
2738 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2739
2740 // fold (A+(B-A)) -> B
2741 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2742 return B;
2743
2744 // fold ((B-A)+A) -> B
2745 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2746 return B;
2747
2748 // fold ((A-B)+(C-A)) -> (C-B)
2749 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2751 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2752
2753 // fold ((A-B)+(B-C)) -> (A-C)
2754 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2756 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2757
2758 // fold (A+(B-(A+C))) to (B-C)
2759 // fold (A+(B-(C+A))) to (B-C)
2760 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2761 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2762
2763 // fold (A+((B-A)+or-C)) to (B+or-C)
2764 if (sd_match(N1,
2766 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2767 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2768
2769 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2770 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2771 N0->hasOneUse() && N1->hasOneUse()) {
2772 SDValue N00 = N0.getOperand(0);
2773 SDValue N01 = N0.getOperand(1);
2774 SDValue N10 = N1.getOperand(0);
2775 SDValue N11 = N1.getOperand(1);
2776
2778 return DAG.getNode(ISD::SUB, DL, VT,
2779 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2780 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2781 }
2782
2783 // fold (add (umax X, C), -C) --> (usubsat X, C)
2784 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2785 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2786 return (!Max && !Op) ||
2787 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2788 };
2789 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2790 /*AllowUndefs*/ true))
2791 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2792 N0.getOperand(1));
2793 }
2794
2796 return SDValue(N, 0);
2797
2798 if (isOneOrOneSplat(N1)) {
2799 // fold (add (xor a, -1), 1) -> (sub 0, a)
2800 if (isBitwiseNot(N0))
2801 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2802 N0.getOperand(0));
2803
2804 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2805 if (N0.getOpcode() == ISD::ADD) {
2806 SDValue A, Xor;
2807
2808 if (isBitwiseNot(N0.getOperand(0))) {
2809 A = N0.getOperand(1);
2810 Xor = N0.getOperand(0);
2811 } else if (isBitwiseNot(N0.getOperand(1))) {
2812 A = N0.getOperand(0);
2813 Xor = N0.getOperand(1);
2814 }
2815
2816 if (Xor)
2817 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2818 }
2819
2820 // Look for:
2821 // add (add x, y), 1
2822 // And if the target does not like this form then turn into:
2823 // sub y, (xor x, -1)
2824 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2825 N0.hasOneUse() &&
2826 // Limit this to after legalization if the add has wrap flags
2827 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2828 !N->getFlags().hasNoSignedWrap()))) {
2829 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2830 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2831 }
2832 }
2833
2834 // (x - y) + -1 -> add (xor y, -1), x
2835 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2836 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2837 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2838 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2839 }
2840
2841 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2842 return Combined;
2843
2844 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2845 return Combined;
2846
2847 return SDValue();
2848}
2849
2850// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2851SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2852 SDValue N0 = N->getOperand(0);
2853 EVT VT = N0.getValueType();
2854 SDValue A, B;
2855
2856 if (hasOperation(ISD::AVGFLOORU, VT) &&
2859 m_SpecificInt(1))))) {
2860 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2861 }
2862 if (hasOperation(ISD::AVGFLOORS, VT) &&
2865 m_SpecificInt(1))))) {
2866 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2867 }
2868
2869 return SDValue();
2870}
2871
2872SDValue DAGCombiner::visitADD(SDNode *N) {
2873 SDValue N0 = N->getOperand(0);
2874 SDValue N1 = N->getOperand(1);
2875 EVT VT = N0.getValueType();
2876 SDLoc DL(N);
2877
2878 if (SDValue Combined = visitADDLike(N))
2879 return Combined;
2880
2881 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2882 return V;
2883
2884 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2885 return V;
2886
2887 // Try to match AVGFLOOR fixedwidth pattern
2888 if (SDValue V = foldAddToAvg(N, DL))
2889 return V;
2890
2891 // fold (a+b) -> (a|b) iff a and b share no bits.
2892 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2893 DAG.haveNoCommonBitsSet(N0, N1)) {
2895 Flags.setDisjoint(true);
2896 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2897 }
2898
2899 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2900 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2901 const APInt &C0 = N0->getConstantOperandAPInt(0);
2902 const APInt &C1 = N1->getConstantOperandAPInt(0);
2903 return DAG.getVScale(DL, VT, C0 + C1);
2904 }
2905
2906 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2907 if (N0.getOpcode() == ISD::ADD &&
2908 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2909 N1.getOpcode() == ISD::VSCALE) {
2910 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2911 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2912 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2913 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2914 }
2915
2916 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2917 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2918 N1.getOpcode() == ISD::STEP_VECTOR) {
2919 const APInt &C0 = N0->getConstantOperandAPInt(0);
2920 const APInt &C1 = N1->getConstantOperandAPInt(0);
2921 APInt NewStep = C0 + C1;
2922 return DAG.getStepVector(DL, VT, NewStep);
2923 }
2924
2925 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2926 if (N0.getOpcode() == ISD::ADD &&
2928 N1.getOpcode() == ISD::STEP_VECTOR) {
2929 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2930 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2931 APInt NewStep = SV0 + SV1;
2932 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2933 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2934 }
2935
2936 return SDValue();
2937}
2938
2939SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2940 unsigned Opcode = N->getOpcode();
2941 SDValue N0 = N->getOperand(0);
2942 SDValue N1 = N->getOperand(1);
2943 EVT VT = N0.getValueType();
2944 bool IsSigned = Opcode == ISD::SADDSAT;
2945 SDLoc DL(N);
2946
2947 // fold (add_sat x, undef) -> -1
2948 if (N0.isUndef() || N1.isUndef())
2949 return DAG.getAllOnesConstant(DL, VT);
2950
2951 // fold (add_sat c1, c2) -> c3
2952 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2953 return C;
2954
2955 // canonicalize constant to RHS
2958 return DAG.getNode(Opcode, DL, VT, N1, N0);
2959
2960 // fold vector ops
2961 if (VT.isVector()) {
2962 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2963 return FoldedVOp;
2964
2965 // fold (add_sat x, 0) -> x, vector edition
2967 return N0;
2968 }
2969
2970 // fold (add_sat x, 0) -> x
2971 if (isNullConstant(N1))
2972 return N0;
2973
2974 // If it cannot overflow, transform into an add.
2975 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
2976 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2977
2978 return SDValue();
2979}
2980
2982 bool ForceCarryReconstruction = false) {
2983 bool Masked = false;
2984
2985 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2986 while (true) {
2987 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2988 V = V.getOperand(0);
2989 continue;
2990 }
2991
2992 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2993 if (ForceCarryReconstruction)
2994 return V;
2995
2996 Masked = true;
2997 V = V.getOperand(0);
2998 continue;
2999 }
3000
3001 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3002 return V;
3003
3004 break;
3005 }
3006
3007 // If this is not a carry, return.
3008 if (V.getResNo() != 1)
3009 return SDValue();
3010
3011 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3012 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3013 return SDValue();
3014
3015 EVT VT = V->getValueType(0);
3016 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3017 return SDValue();
3018
3019 // If the result is masked, then no matter what kind of bool it is we can
3020 // return. If it isn't, then we need to make sure the bool type is either 0 or
3021 // 1 and not other values.
3022 if (Masked ||
3023 TLI.getBooleanContents(V.getValueType()) ==
3025 return V;
3026
3027 return SDValue();
3028}
3029
3030/// Given the operands of an add/sub operation, see if the 2nd operand is a
3031/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3032/// the opcode and bypass the mask operation.
3033static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3034 SelectionDAG &DAG, const SDLoc &DL) {
3035 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3036 N1 = N1.getOperand(0);
3037
3038 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3039 return SDValue();
3040
3041 EVT VT = N0.getValueType();
3042 SDValue N10 = N1.getOperand(0);
3043 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3044 N10 = N10.getOperand(0);
3045
3046 if (N10.getValueType() != VT)
3047 return SDValue();
3048
3049 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3050 return SDValue();
3051
3052 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3053 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3054 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3055}
3056
3057/// Helper for doing combines based on N0 and N1 being added to each other.
3058SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3059 SDNode *LocReference) {
3060 EVT VT = N0.getValueType();
3061 SDLoc DL(LocReference);
3062
3063 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3064 SDValue Y, N;
3065 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3066 return DAG.getNode(ISD::SUB, DL, VT, N0,
3067 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3068
3069 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3070 return V;
3071
3072 // Look for:
3073 // add (add x, 1), y
3074 // And if the target does not like this form then turn into:
3075 // sub y, (xor x, -1)
3076 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3077 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3078 // Limit this to after legalization if the add has wrap flags
3079 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3080 !N0->getFlags().hasNoSignedWrap()))) {
3081 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3082 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3083 }
3084
3085 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3086 // Hoist one-use subtraction by non-opaque constant:
3087 // (x - C) + y -> (x + y) - C
3088 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3089 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3090 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3091 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3092 }
3093 // Hoist one-use subtraction from non-opaque constant:
3094 // (C - x) + y -> (y - x) + C
3095 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3096 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3097 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3098 }
3099 }
3100
3101 // add (mul x, C), x -> mul x, C+1
3102 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3103 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3104 N0.hasOneUse()) {
3105 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3106 DAG.getConstant(1, DL, VT));
3107 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3108 }
3109
3110 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3111 // rather than 'add 0/-1' (the zext should get folded).
3112 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3113 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3114 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3116 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3117 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3118 }
3119
3120 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3121 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3122 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3123 if (TN->getVT() == MVT::i1) {
3124 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3125 DAG.getConstant(1, DL, VT));
3126 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3127 }
3128 }
3129
3130 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3131 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3132 N1.getResNo() == 0)
3133 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3134 N0, N1.getOperand(0), N1.getOperand(2));
3135
3136 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3138 if (SDValue Carry = getAsCarry(TLI, N1))
3139 return DAG.getNode(ISD::UADDO_CARRY, DL,
3140 DAG.getVTList(VT, Carry.getValueType()), N0,
3141 DAG.getConstant(0, DL, VT), Carry);
3142
3143 return SDValue();
3144}
3145
3146SDValue DAGCombiner::visitADDC(SDNode *N) {
3147 SDValue N0 = N->getOperand(0);
3148 SDValue N1 = N->getOperand(1);
3149 EVT VT = N0.getValueType();
3150 SDLoc DL(N);
3151
3152 // If the flag result is dead, turn this into an ADD.
3153 if (!N->hasAnyUseOfValue(1))
3154 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3155 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3156
3157 // canonicalize constant to RHS.
3158 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3159 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3160 if (N0C && !N1C)
3161 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3162
3163 // fold (addc x, 0) -> x + no carry out
3164 if (isNullConstant(N1))
3165 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3166 DL, MVT::Glue));
3167
3168 // If it cannot overflow, transform into an add.
3170 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3171 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3172
3173 return SDValue();
3174}
3175
3176/**
3177 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3178 * then the flip also occurs if computing the inverse is the same cost.
3179 * This function returns an empty SDValue in case it cannot flip the boolean
3180 * without increasing the cost of the computation. If you want to flip a boolean
3181 * no matter what, use DAG.getLogicalNOT.
3182 */
3184 const TargetLowering &TLI,
3185 bool Force) {
3186 if (Force && isa<ConstantSDNode>(V))
3187 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3188
3189 if (V.getOpcode() != ISD::XOR)
3190 return SDValue();
3191
3192 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3193 if (!Const)
3194 return SDValue();
3195
3196 EVT VT = V.getValueType();
3197
3198 bool IsFlip = false;
3199 switch(TLI.getBooleanContents(VT)) {
3201 IsFlip = Const->isOne();
3202 break;
3204 IsFlip = Const->isAllOnes();
3205 break;
3207 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3208 break;
3209 }
3210
3211 if (IsFlip)
3212 return V.getOperand(0);
3213 if (Force)
3214 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3215 return SDValue();
3216}
3217
3218SDValue DAGCombiner::visitADDO(SDNode *N) {
3219 SDValue N0 = N->getOperand(0);
3220 SDValue N1 = N->getOperand(1);
3221 EVT VT = N0.getValueType();
3222 bool IsSigned = (ISD::SADDO == N->getOpcode());
3223
3224 EVT CarryVT = N->getValueType(1);
3225 SDLoc DL(N);
3226
3227 // If the flag result is dead, turn this into an ADD.
3228 if (!N->hasAnyUseOfValue(1))
3229 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3230 DAG.getUNDEF(CarryVT));
3231
3232 // canonicalize constant to RHS.
3235 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3236
3237 // fold (addo x, 0) -> x + no carry out
3238 if (isNullOrNullSplat(N1))
3239 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3240
3241 // If it cannot overflow, transform into an add.
3242 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3243 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3244 DAG.getConstant(0, DL, CarryVT));
3245
3246 if (IsSigned) {
3247 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3248 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3249 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3250 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3251 } else {
3252 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3253 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3254 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3255 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3256 return CombineTo(
3257 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3258 }
3259
3260 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3261 return Combined;
3262
3263 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3264 return Combined;
3265 }
3266
3267 return SDValue();
3268}
3269
3270SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3271 EVT VT = N0.getValueType();
3272 if (VT.isVector())
3273 return SDValue();
3274
3275 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3276 // If Y + 1 cannot overflow.
3277 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3278 SDValue Y = N1.getOperand(0);
3279 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3281 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3282 N1.getOperand(2));
3283 }
3284
3285 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3287 if (SDValue Carry = getAsCarry(TLI, N1))
3288 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3289 DAG.getConstant(0, SDLoc(N), VT), Carry);
3290
3291 return SDValue();
3292}
3293
3294SDValue DAGCombiner::visitADDE(SDNode *N) {
3295 SDValue N0 = N->getOperand(0);
3296 SDValue N1 = N->getOperand(1);
3297 SDValue CarryIn = N->getOperand(2);
3298
3299 // canonicalize constant to RHS
3300 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3301 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3302 if (N0C && !N1C)
3303 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3304 N1, N0, CarryIn);
3305
3306 // fold (adde x, y, false) -> (addc x, y)
3307 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3308 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3309
3310 return SDValue();
3311}
3312
3313SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3314 SDValue N0 = N->getOperand(0);
3315 SDValue N1 = N->getOperand(1);
3316 SDValue CarryIn = N->getOperand(2);
3317 SDLoc DL(N);
3318
3319 // canonicalize constant to RHS
3320 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3321 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3322 if (N0C && !N1C)
3323 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3324
3325 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3326 if (isNullConstant(CarryIn)) {
3327 if (!LegalOperations ||
3328 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3329 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3330 }
3331
3332 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3333 if (isNullConstant(N0) && isNullConstant(N1)) {
3334 EVT VT = N0.getValueType();
3335 EVT CarryVT = CarryIn.getValueType();
3336 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3337 AddToWorklist(CarryExt.getNode());
3338 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3339 DAG.getConstant(1, DL, VT)),
3340 DAG.getConstant(0, DL, CarryVT));
3341 }
3342
3343 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3344 return Combined;
3345
3346 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3347 return Combined;
3348
3349 // We want to avoid useless duplication.
3350 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3351 // not a binary operation, this is not really possible to leverage this
3352 // existing mechanism for it. However, if more operations require the same
3353 // deduplication logic, then it may be worth generalize.
3354 SDValue Ops[] = {N1, N0, CarryIn};
3355 SDNode *CSENode =
3356 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3357 if (CSENode)
3358 return SDValue(CSENode, 0);
3359
3360 return SDValue();
3361}
3362
3363/**
3364 * If we are facing some sort of diamond carry propagation pattern try to
3365 * break it up to generate something like:
3366 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3367 *
3368 * The end result is usually an increase in operation required, but because the
3369 * carry is now linearized, other transforms can kick in and optimize the DAG.
3370 *
3371 * Patterns typically look something like
3372 * (uaddo A, B)
3373 * / \
3374 * Carry Sum
3375 * | \
3376 * | (uaddo_carry *, 0, Z)
3377 * | /
3378 * \ Carry
3379 * | /
3380 * (uaddo_carry X, *, *)
3381 *
3382 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3383 * produce a combine with a single path for carry propagation.
3384 */
3386 SelectionDAG &DAG, SDValue X,
3387 SDValue Carry0, SDValue Carry1,
3388 SDNode *N) {
3389 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3390 return SDValue();
3391 if (Carry1.getOpcode() != ISD::UADDO)
3392 return SDValue();
3393
3394 SDValue Z;
3395
3396 /**
3397 * First look for a suitable Z. It will present itself in the form of
3398 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3399 */
3400 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3401 isNullConstant(Carry0.getOperand(1))) {
3402 Z = Carry0.getOperand(2);
3403 } else if (Carry0.getOpcode() == ISD::UADDO &&
3404 isOneConstant(Carry0.getOperand(1))) {
3405 EVT VT = Carry0->getValueType(1);
3406 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3407 } else {
3408 // We couldn't find a suitable Z.
3409 return SDValue();
3410 }
3411
3412
3413 auto cancelDiamond = [&](SDValue A,SDValue B) {
3414 SDLoc DL(N);
3415 SDValue NewY =
3416 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3417 Combiner.AddToWorklist(NewY.getNode());
3418 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3419 DAG.getConstant(0, DL, X.getValueType()),
3420 NewY.getValue(1));
3421 };
3422
3423 /**
3424 * (uaddo A, B)
3425 * |
3426 * Sum
3427 * |
3428 * (uaddo_carry *, 0, Z)
3429 */
3430 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3431 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3432 }
3433
3434 /**
3435 * (uaddo_carry A, 0, Z)
3436 * |
3437 * Sum
3438 * |
3439 * (uaddo *, B)
3440 */
3441 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3442 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3443 }
3444
3445 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3446 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3447 }
3448
3449 return SDValue();
3450}
3451
3452// If we are facing some sort of diamond carry/borrow in/out pattern try to
3453// match patterns like:
3454//
3455// (uaddo A, B) CarryIn
3456// | \ |
3457// | \ |
3458// PartialSum PartialCarryOutX /
3459// | | /
3460// | ____|____________/
3461// | / |
3462// (uaddo *, *) \________
3463// | \ \
3464// | \ |
3465// | PartialCarryOutY |
3466// | \ |
3467// | \ /
3468// AddCarrySum | ______/
3469// | /
3470// CarryOut = (or *, *)
3471//
3472// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3473//
3474// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3475//
3476// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3477// with a single path for carry/borrow out propagation.
3479 SDValue N0, SDValue N1, SDNode *N) {
3480 SDValue Carry0 = getAsCarry(TLI, N0);
3481 if (!Carry0)
3482 return SDValue();
3483 SDValue Carry1 = getAsCarry(TLI, N1);
3484 if (!Carry1)
3485 return SDValue();
3486
3487 unsigned Opcode = Carry0.getOpcode();
3488 if (Opcode != Carry1.getOpcode())
3489 return SDValue();
3490 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3491 return SDValue();
3492 // Guarantee identical type of CarryOut
3493 EVT CarryOutType = N->getValueType(0);
3494 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3495 CarryOutType != Carry1.getValue(1).getValueType())
3496 return SDValue();
3497
3498 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3499 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3500 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3501 std::swap(Carry0, Carry1);
3502
3503 // Check if nodes are connected in expected way.
3504 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3505 Carry1.getOperand(1) != Carry0.getValue(0))
3506 return SDValue();
3507
3508 // The carry in value must be on the righthand side for subtraction.
3509 unsigned CarryInOperandNum =
3510 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3511 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3512 return SDValue();
3513 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3514
3515 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3516 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3517 return SDValue();
3518
3519 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3520 CarryIn = getAsCarry(TLI, CarryIn, true);
3521 if (!CarryIn)
3522 return SDValue();
3523
3524 SDLoc DL(N);
3525 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3526 Carry1->getValueType(0));
3527 SDValue Merged =
3528 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3529 Carry0.getOperand(1), CarryIn);
3530
3531 // Please note that because we have proven that the result of the UADDO/USUBO
3532 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3533 // therefore prove that if the first UADDO/USUBO overflows, the second
3534 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3535 // maximum value.
3536 //
3537 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3538 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3539 //
3540 // This is important because it means that OR and XOR can be used to merge
3541 // carry flags; and that AND can return a constant zero.
3542 //
3543 // TODO: match other operations that can merge flags (ADD, etc)
3544 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3545 if (N->getOpcode() == ISD::AND)
3546 return DAG.getConstant(0, DL, CarryOutType);
3547 return Merged.getValue(1);
3548}
3549
3550SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3551 SDValue CarryIn, SDNode *N) {
3552 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3553 // carry.
3554 if (isBitwiseNot(N0))
3555 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3556 SDLoc DL(N);
3557 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3558 N0.getOperand(0), NotC);
3559 return CombineTo(
3560 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3561 }
3562
3563 // Iff the flag result is dead:
3564 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3565 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3566 // or the dependency between the instructions.
3567 if ((N0.getOpcode() == ISD::ADD ||
3568 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3569 N0.getValue(1) != CarryIn)) &&
3570 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3571 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3572 N0.getOperand(0), N0.getOperand(1), CarryIn);
3573
3574 /**
3575 * When one of the uaddo_carry argument is itself a carry, we may be facing
3576 * a diamond carry propagation. In which case we try to transform the DAG
3577 * to ensure linear carry propagation if that is possible.
3578 */
3579 if (auto Y = getAsCarry(TLI, N1)) {
3580 // Because both are carries, Y and Z can be swapped.
3581 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3582 return R;
3583 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3584 return R;
3585 }
3586
3587 return SDValue();
3588}
3589
3590SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3591 SDValue CarryIn, SDNode *N) {
3592 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3593 if (isBitwiseNot(N0)) {
3594 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3595 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3596 N0.getOperand(0), NotC);
3597 }
3598
3599 return SDValue();
3600}
3601
3602SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3603 SDValue N0 = N->getOperand(0);
3604 SDValue N1 = N->getOperand(1);
3605 SDValue CarryIn = N->getOperand(2);
3606 SDLoc DL(N);
3607
3608 // canonicalize constant to RHS
3609 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3610 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3611 if (N0C && !N1C)
3612 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3613
3614 // fold (saddo_carry x, y, false) -> (saddo x, y)
3615 if (isNullConstant(CarryIn)) {
3616 if (!LegalOperations ||
3617 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3618 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3619 }
3620
3621 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3622 return Combined;
3623
3624 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3625 return Combined;
3626
3627 return SDValue();
3628}
3629
3630// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3631// clamp/truncation if necessary.
3632static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3633 SDValue RHS, SelectionDAG &DAG,
3634 const SDLoc &DL) {
3635 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3636 "Illegal truncation");
3637
3638 if (DstVT == SrcVT)
3639 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3640
3641 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3642 // clamping RHS.
3644 DstVT.getScalarSizeInBits());
3645 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3646 return SDValue();
3647
3648 SDValue SatLimit =
3650 DstVT.getScalarSizeInBits()),
3651 DL, SrcVT);
3652 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3653 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3654 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3655 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3656}
3657
3658// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3659// usubsat(a,b), optionally as a truncated type.
3660SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3661 if (N->getOpcode() != ISD::SUB ||
3662 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3663 return SDValue();
3664
3665 EVT SubVT = N->getValueType(0);
3666 SDValue Op0 = N->getOperand(0);
3667 SDValue Op1 = N->getOperand(1);
3668
3669 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3670 // they may be converted to usubsat(a,b).
3671 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3672 SDValue MaxLHS = Op0.getOperand(0);
3673 SDValue MaxRHS = Op0.getOperand(1);
3674 if (MaxLHS == Op1)
3675 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3676 if (MaxRHS == Op1)
3677 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3678 }
3679
3680 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3681 SDValue MinLHS = Op1.getOperand(0);
3682 SDValue MinRHS = Op1.getOperand(1);
3683 if (MinLHS == Op0)
3684 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3685 if (MinRHS == Op0)
3686 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3687 }
3688
3689 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3690 if (Op1.getOpcode() == ISD::TRUNCATE &&
3691 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3692 Op1.getOperand(0).hasOneUse()) {
3693 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3694 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3695 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3696 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3697 DAG, DL);
3698 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3699 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3700 DAG, DL);
3701 }
3702
3703 return SDValue();
3704}
3705
3706// Since it may not be valid to emit a fold to zero for vector initializers
3707// check if we can before folding.
3708static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3709 SelectionDAG &DAG, bool LegalOperations) {
3710 if (!VT.isVector())
3711 return DAG.getConstant(0, DL, VT);
3712 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3713 return DAG.getConstant(0, DL, VT);
3714 return SDValue();
3715}
3716
3717SDValue DAGCombiner::visitSUB(SDNode *N) {
3718 SDValue N0 = N->getOperand(0);
3719 SDValue N1 = N->getOperand(1);
3720 EVT VT = N0.getValueType();
3721 unsigned BitWidth = VT.getScalarSizeInBits();
3722 SDLoc DL(N);
3723
3724 auto PeekThroughFreeze = [](SDValue N) {
3725 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3726 return N->getOperand(0);
3727 return N;
3728 };
3729
3730 // fold (sub x, x) -> 0
3731 // FIXME: Refactor this and xor and other similar operations together.
3732 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3733 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3734
3735 // fold (sub c1, c2) -> c3
3736 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3737 return C;
3738
3739 // fold vector ops
3740 if (VT.isVector()) {
3741 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3742 return FoldedVOp;
3743
3744 // fold (sub x, 0) -> x, vector edition
3746 return N0;
3747 }
3748
3749 if (SDValue NewSel = foldBinOpIntoSelect(N))
3750 return NewSel;
3751
3752 // fold (sub x, c) -> (add x, -c)
3754 return DAG.getNode(ISD::ADD, DL, VT, N0,
3755 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3756
3757 if (isNullOrNullSplat(N0)) {
3758 // Right-shifting everything out but the sign bit followed by negation is
3759 // the same as flipping arithmetic/logical shift type without the negation:
3760 // -(X >>u 31) -> (X >>s 31)
3761 // -(X >>s 31) -> (X >>u 31)
3762 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3764 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3765 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3766 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3767 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3768 }
3769 }
3770
3771 // 0 - X --> 0 if the sub is NUW.
3772 if (N->getFlags().hasNoUnsignedWrap())
3773 return N0;
3774
3776 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3777 // N1 must be 0 because negating the minimum signed value is undefined.
3778 if (N->getFlags().hasNoSignedWrap())
3779 return N0;
3780
3781 // 0 - X --> X if X is 0 or the minimum signed value.
3782 return N1;
3783 }
3784
3785 // Convert 0 - abs(x).
3786 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3788 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3789 return Result;
3790
3791 // Fold neg(splat(neg(x)) -> splat(x)
3792 if (VT.isVector()) {
3793 SDValue N1S = DAG.getSplatValue(N1, true);
3794 if (N1S && N1S.getOpcode() == ISD::SUB &&
3795 isNullConstant(N1S.getOperand(0)))
3796 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3797 }
3798 }
3799
3800 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3802 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3803
3804 // fold (A - (0-B)) -> A+B
3805 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3806 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3807
3808 // fold A-(A-B) -> B
3809 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3810 return N1.getOperand(1);
3811
3812 // fold (A+B)-A -> B
3813 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3814 return N0.getOperand(1);
3815
3816 // fold (A+B)-B -> A
3817 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3818 return N0.getOperand(0);
3819
3820 // fold (A+C1)-C2 -> A+(C1-C2)
3821 if (N0.getOpcode() == ISD::ADD) {
3822 SDValue N01 = N0.getOperand(1);
3823 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3824 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3825 }
3826
3827 // fold C2-(A+C1) -> (C2-C1)-A
3828 if (N1.getOpcode() == ISD::ADD) {
3829 SDValue N11 = N1.getOperand(1);
3830 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3831 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3832 }
3833
3834 // fold (A-C1)-C2 -> A-(C1+C2)
3835 if (N0.getOpcode() == ISD::SUB) {
3836 SDValue N01 = N0.getOperand(1);
3837 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3838 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3839 }
3840
3841 // fold (c1-A)-c2 -> (c1-c2)-A
3842 if (N0.getOpcode() == ISD::SUB) {
3843 SDValue N00 = N0.getOperand(0);
3844 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3845 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3846 }
3847
3848 SDValue A, B, C;
3849
3850 // fold ((A+(B+C))-B) -> A+C
3851 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3852 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3853
3854 // fold ((A+(B-C))-B) -> A-C
3855 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3856 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3857
3858 // fold ((A-(B-C))-C) -> A-B
3859 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3860 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3861
3862 // fold (A-(B-C)) -> A+(C-B)
3863 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3864 return DAG.getNode(ISD::ADD, DL, VT, N0,
3865 DAG.getNode(ISD::SUB, DL, VT, C, B));
3866
3867 // A - (A & B) -> A & (~B)
3868 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3869 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3870 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3871
3872 // fold (A - (-B * C)) -> (A + (B * C))
3873 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3874 return DAG.getNode(ISD::ADD, DL, VT, N0,
3875 DAG.getNode(ISD::MUL, DL, VT, B, C));
3876
3877 // If either operand of a sub is undef, the result is undef
3878 if (N0.isUndef())
3879 return N0;
3880 if (N1.isUndef())
3881 return N1;
3882
3883 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3884 return V;
3885
3886 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3887 return V;
3888
3889 // Try to match AVGCEIL fixedwidth pattern
3890 if (SDValue V = foldSubToAvg(N, DL))
3891 return V;
3892
3893 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3894 return V;
3895
3896 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3897 return V;
3898
3899 // (A - B) - 1 -> add (xor B, -1), A
3901 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3902
3903 // Look for:
3904 // sub y, (xor x, -1)
3905 // And if the target does not like this form then turn into:
3906 // add (add x, y), 1
3907 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3908 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3909 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3910 }
3911
3912 // Hoist one-use addition by non-opaque constant:
3913 // (x + C) - y -> (x - y) + C
3914 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3915 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3916 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3917 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3918 }
3919 // y - (x + C) -> (y - x) - C
3920 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3921 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3922 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3923 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3924 }
3925 // (x - C) - y -> (x - y) - C
3926 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3927 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3928 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3929 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3930 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3931 }
3932 // (C - x) - y -> C - (x + y)
3933 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3934 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3935 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3936 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3937 }
3938
3939 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3940 // rather than 'sub 0/1' (the sext should get folded).
3941 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3942 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3943 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3944 TLI.getBooleanContents(VT) ==
3946 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3947 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3948 }
3949
3950 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3951 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3952 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3953 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3954 SDValue S0 = N1.getOperand(0);
3955 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3957 if (C->getAPIntValue() == (BitWidth - 1))
3958 return DAG.getNode(ISD::ABS, DL, VT, S0);
3959 }
3960 }
3961
3962 // If the relocation model supports it, consider symbol offsets.
3963 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3964 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3965 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3966 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3967 if (GA->getGlobal() == GB->getGlobal())
3968 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3969 DL, VT);
3970 }
3971
3972 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3973 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3974 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3975 if (TN->getVT() == MVT::i1) {
3976 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3977 DAG.getConstant(1, DL, VT));
3978 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3979 }
3980 }
3981
3982 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3983 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3984 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3985 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3986 }
3987
3988 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3989 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3990 APInt NewStep = -N1.getConstantOperandAPInt(0);
3991 return DAG.getNode(ISD::ADD, DL, VT, N0,
3992 DAG.getStepVector(DL, VT, NewStep));
3993 }
3994
3995 // Prefer an add for more folding potential and possibly better codegen:
3996 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3997 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3998 SDValue ShAmt = N1.getOperand(1);
3999 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4000 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4001 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4002 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4003 }
4004 }
4005
4006 // As with the previous fold, prefer add for more folding potential.
4007 // Subtracting SMIN/0 is the same as adding SMIN/0:
4008 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4009 if (N1.getOpcode() == ISD::SHL) {
4011 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4012 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4013 }
4014
4015 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4016 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4017 N0.getResNo() == 0 && N0.hasOneUse())
4018 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4019 N0.getOperand(0), N1, N0.getOperand(2));
4020
4022 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4023 if (SDValue Carry = getAsCarry(TLI, N0)) {
4024 SDValue X = N1;
4025 SDValue Zero = DAG.getConstant(0, DL, VT);
4026 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4027 return DAG.getNode(ISD::UADDO_CARRY, DL,
4028 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4029 Carry);
4030 }
4031 }
4032
4033 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4034 // sub C0, X --> xor X, C0
4035 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4036 if (!C0->isOpaque()) {
4037 const APInt &C0Val = C0->getAPIntValue();
4038 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4039 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4040 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4041 }
4042 }
4043
4044 // smax(a,b) - smin(a,b) --> abds(a,b)
4045 if (hasOperation(ISD::ABDS, VT) &&
4046 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4048 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4049
4050 // umax(a,b) - umin(a,b) --> abdu(a,b)
4051 if (hasOperation(ISD::ABDU, VT) &&
4052 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4054 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4055
4056 return SDValue();
4057}
4058
4059SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4060 unsigned Opcode = N->getOpcode();
4061 SDValue N0 = N->getOperand(0);
4062 SDValue N1 = N->getOperand(1);
4063 EVT VT = N0.getValueType();
4064 bool IsSigned = Opcode == ISD::SSUBSAT;
4065 SDLoc DL(N);
4066
4067 // fold (sub_sat x, undef) -> 0
4068 if (N0.isUndef() || N1.isUndef())
4069 return DAG.getConstant(0, DL, VT);
4070
4071 // fold (sub_sat x, x) -> 0
4072 if (N0 == N1)
4073 return DAG.getConstant(0, DL, VT);
4074
4075 // fold (sub_sat c1, c2) -> c3
4076 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4077 return C;
4078
4079 // fold vector ops
4080 if (VT.isVector()) {
4081 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4082 return FoldedVOp;
4083
4084 // fold (sub_sat x, 0) -> x, vector edition
4086 return N0;
4087 }
4088
4089 // fold (sub_sat x, 0) -> x
4090 if (isNullConstant(N1))
4091 return N0;
4092
4093 // If it cannot overflow, transform into an sub.
4094 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4095 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4096
4097 return SDValue();
4098}
4099
4100SDValue DAGCombiner::visitSUBC(SDNode *N) {
4101 SDValue N0 = N->getOperand(0);
4102 SDValue N1 = N->getOperand(1);
4103 EVT VT = N0.getValueType();
4104 SDLoc DL(N);
4105
4106 // If the flag result is dead, turn this into an SUB.
4107 if (!N->hasAnyUseOfValue(1))
4108 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4109 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4110
4111 // fold (subc x, x) -> 0 + no borrow
4112 if (N0 == N1)
4113 return CombineTo(N, DAG.getConstant(0, DL, VT),
4114 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4115
4116 // fold (subc x, 0) -> x + no borrow
4117 if (isNullConstant(N1))
4118 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4119
4120 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4121 if (isAllOnesConstant(N0))
4122 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4123 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4124
4125 return SDValue();
4126}
4127
4128SDValue DAGCombiner::visitSUBO(SDNode *N) {
4129 SDValue N0 = N->getOperand(0);
4130 SDValue N1 = N->getOperand(1);
4131 EVT VT = N0.getValueType();
4132 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4133
4134 EVT CarryVT = N->getValueType(1);
4135 SDLoc DL(N);
4136
4137 // If the flag result is dead, turn this into an SUB.
4138 if (!N->hasAnyUseOfValue(1))
4139 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4140 DAG.getUNDEF(CarryVT));
4141
4142 // fold (subo x, x) -> 0 + no borrow
4143 if (N0 == N1)
4144 return CombineTo(N, DAG.getConstant(0, DL, VT),
4145 DAG.getConstant(0, DL, CarryVT));
4146
4147 // fold (subox, c) -> (addo x, -c)
4149 if (IsSigned && !N1C->isMinSignedValue())
4150 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4151 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4152
4153 // fold (subo x, 0) -> x + no borrow
4154 if (isNullOrNullSplat(N1))
4155 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4156
4157 // If it cannot overflow, transform into an sub.
4158 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4159 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4160 DAG.getConstant(0, DL, CarryVT));
4161
4162 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4163 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4164 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4165 DAG.getConstant(0, DL, CarryVT));
4166
4167 return SDValue();
4168}
4169
4170SDValue DAGCombiner::visitSUBE(SDNode *N) {
4171 SDValue N0 = N->getOperand(0);
4172 SDValue N1 = N->getOperand(1);
4173 SDValue CarryIn = N->getOperand(2);
4174
4175 // fold (sube x, y, false) -> (subc x, y)
4176 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4177 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4178
4179 return SDValue();
4180}
4181
4182SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4183 SDValue N0 = N->getOperand(0);
4184 SDValue N1 = N->getOperand(1);
4185 SDValue CarryIn = N->getOperand(2);
4186
4187 // fold (usubo_carry x, y, false) -> (usubo x, y)
4188 if (isNullConstant(CarryIn)) {
4189 if (!LegalOperations ||
4190 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4191 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4192 }
4193
4194 return SDValue();
4195}
4196
4197SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4198 SDValue N0 = N->getOperand(0);
4199 SDValue N1 = N->getOperand(1);
4200 SDValue CarryIn = N->getOperand(2);
4201
4202 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4203 if (isNullConstant(CarryIn)) {
4204 if (!LegalOperations ||
4205 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4206 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4207 }
4208
4209 return SDValue();
4210}
4211
4212// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4213// UMULFIXSAT here.
4214SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4215 SDValue N0 = N->getOperand(0);
4216 SDValue N1 = N->getOperand(1);
4217 SDValue Scale = N->getOperand(2);
4218 EVT VT = N0.getValueType();
4219
4220 // fold (mulfix x, undef, scale) -> 0
4221 if (N0.isUndef() || N1.isUndef())
4222 return DAG.getConstant(0, SDLoc(N), VT);
4223
4224 // Canonicalize constant to RHS (vector doesn't have to splat)
4227 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4228
4229 // fold (mulfix x, 0, scale) -> 0
4230 if (isNullConstant(N1))
4231 return DAG.getConstant(0, SDLoc(N), VT);
4232
4233 return SDValue();
4234}
4235
4236SDValue DAGCombiner::visitMUL(SDNode *N) {
4237 SDValue N0 = N->getOperand(0);
4238 SDValue N1 = N->getOperand(1);
4239 EVT VT = N0.getValueType();
4240 SDLoc DL(N);
4241
4242 // fold (mul x, undef) -> 0
4243 if (N0.isUndef() || N1.isUndef())
4244 return DAG.getConstant(0, DL, VT);
4245
4246 // fold (mul c1, c2) -> c1*c2
4247 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4248 return C;
4249
4250 // canonicalize constant to RHS (vector doesn't have to splat)
4253 return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4254
4255 bool N1IsConst = false;
4256 bool N1IsOpaqueConst = false;
4257 APInt ConstValue1;
4258
4259 // fold vector ops
4260 if (VT.isVector()) {
4261 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4262 return FoldedVOp;
4263
4264 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4265 assert((!N1IsConst ||
4266 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4267 "Splat APInt should be element width");
4268 } else {
4269 N1IsConst = isa<ConstantSDNode>(N1);
4270 if (N1IsConst) {
4271 ConstValue1 = N1->getAsAPIntVal();
4272 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4273 }
4274 }
4275
4276 // fold (mul x, 0) -> 0
4277 if (N1IsConst && ConstValue1.isZero())
4278 return N1;
4279
4280 // fold (mul x, 1) -> x
4281 if (N1IsConst && ConstValue1.isOne())
4282 return N0;
4283
4284 if (SDValue NewSel = foldBinOpIntoSelect(N))
4285 return NewSel;
4286
4287 // fold (mul x, -1) -> 0-x
4288 if (N1IsConst && ConstValue1.isAllOnes())
4289 return DAG.getNegative(N0, DL, VT);
4290
4291 // fold (mul x, (1 << c)) -> x << c
4292 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4293 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4294 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4295 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4296 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4297 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4298 }
4299 }
4300
4301 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4302 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4303 unsigned Log2Val = (-ConstValue1).logBase2();
4304 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4305
4306 // FIXME: If the input is something that is easily negated (e.g. a
4307 // single-use add), we should put the negate there.
4308 return DAG.getNode(ISD::SUB, DL, VT,
4309 DAG.getConstant(0, DL, VT),
4310 DAG.getNode(ISD::SHL, DL, VT, N0,
4311 DAG.getConstant(Log2Val, DL, ShiftVT)));
4312 }
4313
4314 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4315 // hi result is in use in case we hit this mid-legalization.
4316 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4317 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4318 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4319 // TODO: Can we match commutable operands with getNodeIfExists?
4320 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4321 if (LoHi->hasAnyUseOfValue(1))
4322 return SDValue(LoHi, 0);
4323 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4324 if (LoHi->hasAnyUseOfValue(1))
4325 return SDValue(LoHi, 0);
4326 }
4327 }
4328
4329 // Try to transform:
4330 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4331 // mul x, (2^N + 1) --> add (shl x, N), x
4332 // mul x, (2^N - 1) --> sub (shl x, N), x
4333 // Examples: x * 33 --> (x << 5) + x
4334 // x * 15 --> (x << 4) - x
4335 // x * -33 --> -((x << 5) + x)
4336 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4337 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4338 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4339 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4340 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4341 // x * 0xf800 --> (x << 16) - (x << 11)
4342 // x * -0x8800 --> -((x << 15) + (x << 11))
4343 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4344 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4345 // TODO: We could handle more general decomposition of any constant by
4346 // having the target set a limit on number of ops and making a
4347 // callback to determine that sequence (similar to sqrt expansion).
4348 unsigned MathOp = ISD::DELETED_NODE;
4349 APInt MulC = ConstValue1.abs();
4350 // The constant `2` should be treated as (2^0 + 1).
4351 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4352 MulC.lshrInPlace(TZeros);
4353 if ((MulC - 1).isPowerOf2())
4354 MathOp = ISD::ADD;
4355 else if ((MulC + 1).isPowerOf2())
4356 MathOp = ISD::SUB;
4357
4358 if (MathOp != ISD::DELETED_NODE) {
4359 unsigned ShAmt =
4360 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4361 ShAmt += TZeros;
4362 assert(ShAmt < VT.getScalarSizeInBits() &&
4363 "multiply-by-constant generated out of bounds shift");
4364 SDValue Shl =
4365 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4366 SDValue R =
4367 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4368 DAG.getNode(ISD::SHL, DL, VT, N0,
4369 DAG.getConstant(TZeros, DL, VT)))
4370 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4371 if (ConstValue1.isNegative())
4372 R = DAG.getNegative(R, DL, VT);
4373 return R;
4374 }
4375 }
4376
4377 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4378 if (N0.getOpcode() == ISD::SHL) {
4379 SDValue N01 = N0.getOperand(1);
4380 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4381 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4382 }
4383
4384 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4385 // use.
4386 {
4387 SDValue Sh, Y;
4388
4389 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4390 if (N0.getOpcode() == ISD::SHL &&
4392 Sh = N0; Y = N1;
4393 } else if (N1.getOpcode() == ISD::SHL &&
4395 N1->hasOneUse()) {
4396 Sh = N1; Y = N0;
4397 }
4398
4399 if (Sh.getNode()) {
4400 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4401 return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4402 }
4403 }
4404
4405 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4406 if (N0.getOpcode() == ISD::ADD &&
4410 return DAG.getNode(
4411 ISD::ADD, DL, VT,
4412 DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4413 DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4414
4415 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4417 if (N0.getOpcode() == ISD::VSCALE && NC1) {
4418 const APInt &C0 = N0.getConstantOperandAPInt(0);
4419 const APInt &C1 = NC1->getAPIntValue();
4420 return DAG.getVScale(DL, VT, C0 * C1);
4421 }
4422
4423 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4424 APInt MulVal;
4425 if (N0.getOpcode() == ISD::STEP_VECTOR &&
4426 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4427 const APInt &C0 = N0.getConstantOperandAPInt(0);
4428 APInt NewStep = C0 * MulVal;
4429 return DAG.getStepVector(DL, VT, NewStep);
4430 }
4431
4432 // Fold ((mul x, 0/undef) -> 0,
4433 // (mul x, 1) -> x) -> x)
4434 // -> and(x, mask)
4435 // We can replace vectors with '0' and '1' factors with a clearing mask.
4436 if (VT.isFixedLengthVector()) {
4437 unsigned NumElts = VT.getVectorNumElements();
4438 SmallBitVector ClearMask;
4439 ClearMask.reserve(NumElts);
4440 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4441 if (!V || V->isZero()) {
4442 ClearMask.push_back(true);
4443 return true;
4444 }
4445 ClearMask.push_back(false);
4446 return V->isOne();
4447 };
4448 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4449 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4450 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4451 EVT LegalSVT = N1.getOperand(0).getValueType();
4452 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4453 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4455 for (unsigned I = 0; I != NumElts; ++I)
4456 if (ClearMask[I])
4457 Mask[I] = Zero;
4458 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4459 }
4460 }
4461
4462 // reassociate mul
4463 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4464 return RMUL;
4465
4466 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4467 if (SDValue SD =
4468 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4469 return SD;
4470
4471 // Simplify the operands using demanded-bits information.
4473 return SDValue(N, 0);
4474
4475 return SDValue();
4476}
4477
4478/// Return true if divmod libcall is available.
4480 const TargetLowering &TLI) {
4481 RTLIB::Libcall LC;
4482 EVT NodeType = Node->getValueType(0);
4483 if (!NodeType.isSimple())
4484 return false;
4485 switch (NodeType.getSimpleVT().SimpleTy) {
4486 default: return false; // No libcall for vector types.
4487 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4488 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4489 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4490 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4491 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4492 }
4493
4494 return TLI.getLibcallName(LC) != nullptr;
4495}
4496
4497/// Issue divrem if both quotient and remainder are needed.
4498SDValue DAGCombiner::useDivRem(SDNode *Node) {
4499 if (Node->use_empty())
4500 return SDValue(); // This is a dead node, leave it alone.
4501
4502 unsigned Opcode = Node->getOpcode();
4503 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4504 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4505
4506 // DivMod lib calls can still work on non-legal types if using lib-calls.
4507 EVT VT = Node->getValueType(0);
4508 if (VT.isVector() || !VT.isInteger())
4509 return SDValue();
4510
4511 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4512 return SDValue();
4513
4514 // If DIVREM is going to get expanded into a libcall,
4515 // but there is no libcall available, then don't combine.
4516 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4518 return SDValue();
4519
4520 // If div is legal, it's better to do the normal expansion
4521 unsigned OtherOpcode = 0;
4522 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4523 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4524 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4525 return SDValue();
4526 } else {
4527 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4528 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4529 return SDValue();
4530 }
4531
4532 SDValue Op0 = Node->getOperand(0);
4533 SDValue Op1 = Node->getOperand(1);
4534 SDValue combined;
4535 for (SDNode *User : Op0->uses()) {
4536 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4537 User->use_empty())
4538 continue;
4539 // Convert the other matching node(s), too;
4540 // otherwise, the DIVREM may get target-legalized into something
4541 // target-specific that we won't be able to recognize.
4542 unsigned UserOpc = User->getOpcode();
4543 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4544 User->getOperand(0) == Op0 &&
4545 User->getOperand(1) == Op1) {
4546 if (!combined) {
4547 if (UserOpc == OtherOpcode) {
4548 SDVTList VTs = DAG.getVTList(VT, VT);
4549 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4550 } else if (UserOpc == DivRemOpc) {
4551 combined = SDValue(User, 0);
4552 } else {
4553 assert(UserOpc == Opcode);
4554 continue;
4555 }
4556 }
4557 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4558 CombineTo(User, combined);
4559 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4560 CombineTo(User, combined.getValue(1));
4561 }
4562 }
4563 return combined;
4564}
4565
4567 SDValue N0 = N->getOperand(0);
4568 SDValue N1 = N->getOperand(1);
4569 EVT VT = N->getValueType(0);
4570 SDLoc DL(N);
4571
4572 unsigned Opc = N->getOpcode();
4573 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4575
4576 // X / undef -> undef
4577 // X % undef -> undef
4578 // X / 0 -> undef
4579 // X % 0 -> undef
4580 // NOTE: This includes vectors where any divisor element is zero/undef.
4581 if (DAG.isUndef(Opc, {N0, N1}))
4582 return DAG.getUNDEF(VT);
4583
4584 // undef / X -> 0
4585 // undef % X -> 0
4586 if (N0.isUndef())
4587 return DAG.getConstant(0, DL, VT);
4588
4589 // 0 / X -> 0
4590 // 0 % X -> 0
4592 if (N0C && N0C->isZero())
4593 return N0;
4594
4595 // X / X -> 1
4596 // X % X -> 0
4597 if (N0 == N1)
4598 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4599
4600 // X / 1 -> X
4601 // X % 1 -> 0
4602 // If this is a boolean op (single-bit element type), we can't have
4603 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4604 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4605 // it's a 1.
4606 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4607 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4608
4609 return SDValue();
4610}
4611
4612SDValue DAGCombiner::visitSDIV(SDNode *N) {
4613 SDValue N0 = N->getOperand(0);
4614 SDValue N1 = N->getOperand(1);
4615 EVT VT = N->getValueType(0);
4616 EVT CCVT = getSetCCResultType(VT);
4617 SDLoc DL(N);
4618
4619 // fold (sdiv c1, c2) -> c1/c2
4620 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4621 return C;
4622
4623 // fold vector ops
4624 if (VT.isVector())
4625 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4626 return FoldedVOp;
4627
4628 // fold (sdiv X, -1) -> 0-X
4630 if (N1C && N1C->isAllOnes())
4631 return DAG.getNegative(N0, DL, VT);
4632
4633 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4634 if (N1C && N1C->isMinSignedValue())
4635 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4636 DAG.getConstant(1, DL, VT),
4637 DAG.getConstant(0, DL, VT));
4638
4639 if (SDValue V = simplifyDivRem(N, DAG))
4640 return V;
4641
4642 if (SDValue NewSel = foldBinOpIntoSelect(N))
4643 return NewSel;
4644
4645 // If we know the sign bits of both operands are zero, strength reduce to a
4646 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4647 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4648 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4649
4650 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4651 // If the corresponding remainder node exists, update its users with
4652 // (Dividend - (Quotient * Divisor).
4653 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4654 { N0, N1 })) {
4655 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4656 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4657 AddToWorklist(Mul.getNode());
4658 AddToWorklist(Sub.getNode());
4659 CombineTo(RemNode, Sub);
4660 }
4661 return V;
4662 }
4663
4664 // sdiv, srem -> sdivrem
4665 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4666 // true. Otherwise, we break the simplification logic in visitREM().
4668 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4669 if (SDValue DivRem = useDivRem(N))
4670 return DivRem;
4671
4672 return SDValue();
4673}
4674
4675static bool isDivisorPowerOfTwo(SDValue Divisor) {
4676 // Helper for determining whether a value is a power-2 constant scalar or a
4677 // vector of such elements.
4678 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4679 if (C->isZero() || C->isOpaque())
4680 return false;
4681 if (C->getAPIntValue().isPowerOf2())
4682 return true;
4683 if (C->getAPIntValue().isNegatedPowerOf2())
4684 return true;
4685 return false;
4686 };
4687
4688 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4689}
4690
4691SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4692 SDLoc DL(N);
4693 EVT VT = N->getValueType(0);
4694 EVT CCVT = getSetCCResultType(VT);
4695 unsigned BitWidth = VT.getScalarSizeInBits();
4696
4697 // fold (sdiv X, pow2) -> simple ops after legalize
4698 // FIXME: We check for the exact bit here because the generic lowering gives
4699 // better results in that case. The target-specific lowering should learn how
4700 // to handle exact sdivs efficiently.
4701 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4702 // Target-specific implementation of sdiv x, pow2.
4703 if (SDValue Res = BuildSDIVPow2(N))
4704 return Res;
4705
4706 // Create constants that are functions of the shift amount value.
4707 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4708 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4709 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4710 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4711 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4712 if (!isConstantOrConstantVector(Inexact))
4713 return SDValue();
4714
4715 // Splat the sign bit into the register
4716 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4717 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4718 AddToWorklist(Sign.getNode());
4719
4720 // Add (N0 < 0) ? abs2 - 1 : 0;
4721 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4722 AddToWorklist(Srl.getNode());
4723 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4724 AddToWorklist(Add.getNode());
4725 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4726 AddToWorklist(Sra.getNode());
4727
4728 // Special case: (sdiv X, 1) -> X
4729 // Special Case: (sdiv X, -1) -> 0-X
4730 SDValue One = DAG.getConstant(1, DL, VT);
4732 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4733 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4734 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4735 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4736
4737 // If dividing by a positive value, we're done. Otherwise, the result must
4738 // be negated.
4739 SDValue Zero = DAG.getConstant(0, DL, VT);
4740 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4741
4742 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4743 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4744 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4745 return Res;
4746 }
4747
4748 // If integer divide is expensive and we satisfy the requirements, emit an
4749 // alternate sequence. Targets may check function attributes for size/speed
4750 // trade-offs.
4753 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4754 if (SDValue Op = BuildSDIV(N))
4755 return Op;
4756
4757 return SDValue();
4758}
4759
4760SDValue DAGCombiner::visitUDIV(SDNode *N) {
4761 SDValue N0 = N->getOperand(0);
4762 SDValue N1 = N->getOperand(1);
4763 EVT VT = N->getValueType(0);
4764 EVT CCVT = getSetCCResultType(VT);
4765 SDLoc DL(N);
4766
4767 // fold (udiv c1, c2) -> c1/c2
4768 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4769 return C;
4770
4771 // fold vector ops
4772 if (VT.isVector())
4773 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4774 return FoldedVOp;
4775
4776 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4778 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4779 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4780 DAG.getConstant(1, DL, VT),
4781 DAG.getConstant(0, DL, VT));
4782 }
4783
4784 if (SDValue V = simplifyDivRem(N, DAG))
4785 return V;
4786
4787 if (SDValue NewSel = foldBinOpIntoSelect(N))
4788 return NewSel;
4789
4790 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4791 // If the corresponding remainder node exists, update its users with
4792 // (Dividend - (Quotient * Divisor).
4793 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4794 { N0, N1 })) {
4795 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4796 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4797 AddToWorklist(Mul.getNode());
4798 AddToWorklist(Sub.getNode());
4799 CombineTo(RemNode, Sub);
4800 }
4801 return V;
4802 }
4803
4804 // sdiv, srem -> sdivrem
4805 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4806 // true. Otherwise, we break the simplification logic in visitREM().
4808 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4809 if (SDValue DivRem = useDivRem(N))
4810 return DivRem;
4811
4812 return SDValue();
4813}
4814
4815SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4816 SDLoc DL(N);
4817 EVT VT = N->getValueType(0);
4818
4819 // fold (udiv x, (1 << c)) -> x >>u c
4820 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4821 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4822 AddToWorklist(LogBase2.getNode());
4823
4824 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4825 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4826 AddToWorklist(Trunc.getNode());
4827 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4828 }
4829 }
4830
4831 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4832 if (N1.getOpcode() == ISD::SHL) {
4833 SDValue N10 = N1.getOperand(0);
4834 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4835 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4836 AddToWorklist(LogBase2.getNode());
4837
4838 EVT ADDVT = N1.getOperand(1).getValueType();
4839 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4840 AddToWorklist(Trunc.getNode());
4841 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4842 AddToWorklist(Add.getNode());
4843 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4844 }
4845 }
4846 }
4847
4848 // fold (udiv x, c) -> alternate
4851 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4852 if (SDValue Op = BuildUDIV(N))
4853 return Op;
4854
4855 return SDValue();
4856}
4857
4858SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4859 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4860 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4861 // Target-specific implementation of srem x, pow2.
4862 if (SDValue Res = BuildSREMPow2(N))
4863 return Res;
4864 }
4865 return SDValue();
4866}
4867
4868// handles ISD::SREM and ISD::UREM
4869SDValue DAGCombiner::visitREM(SDNode *N) {
4870 unsigned Opcode = N->getOpcode();
4871 SDValue N0 = N->getOperand(0);
4872 SDValue N1 = N->getOperand(1);
4873 EVT VT = N->getValueType(0);
4874 EVT CCVT = getSetCCResultType(VT);
4875
4876 bool isSigned = (Opcode == ISD::SREM);
4877 SDLoc DL(N);
4878
4879 // fold (rem c1, c2) -> c1%c2
4880 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4881 return C;
4882
4883 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4884 // Freeze the numerator to avoid a miscompile with an undefined value.
4885 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4886 CCVT.isVector() == VT.isVector()) {
4887 SDValue F0 = DAG.getFreeze(N0);
4888 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4889 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4890 }
4891
4892 if (SDValue V = simplifyDivRem(N, DAG))
4893 return V;
4894
4895 if (SDValue NewSel = foldBinOpIntoSelect(N))
4896 return NewSel;
4897
4898 if (isSigned) {
4899 // If we know the sign bits of both operands are zero, strength reduce to a
4900 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4901 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4902 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4903 } else {
4904 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4905 // fold (urem x, pow2) -> (and x, pow2-1)
4906 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4907 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4908 AddToWorklist(Add.getNode());
4909 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4910 }
4911 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4912 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4913 // TODO: We should sink the following into isKnownToBePowerOfTwo
4914 // using a OrZero parameter analogous to our handling in ValueTracking.
4915 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4917 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4918 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4919 AddToWorklist(Add.getNode());
4920 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4921 }
4922 }
4923
4925
4926 // If X/C can be simplified by the division-by-constant logic, lower
4927 // X%C to the equivalent of X-X/C*C.
4928 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4929 // speculative DIV must not cause a DIVREM conversion. We guard against this
4930 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4931 // combine will not return a DIVREM. Regardless, checking cheapness here
4932 // makes sense since the simplification results in fatter code.
4933 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4934 if (isSigned) {
4935 // check if we can build faster implementation for srem
4936 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4937 return OptimizedRem;
4938 }
4939
4940 SDValue OptimizedDiv =
4941 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4942 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4943 // If the equivalent Div node also exists, update its users.
4944 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4945 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4946 { N0, N1 }))
4947 CombineTo(DivNode, OptimizedDiv);
4948 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4949 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4950 AddToWorklist(OptimizedDiv.getNode());
4951 AddToWorklist(Mul.getNode());
4952 return Sub;
4953 }
4954 }
4955
4956 // sdiv, srem -> sdivrem
4957 if (SDValue DivRem = useDivRem(N))
4958 return DivRem.getValue(1);
4959
4960 return SDValue();
4961}
4962
4963SDValue DAGCombiner::visitMULHS(SDNode *N) {
4964 SDValue N0 = N->getOperand(0);
4965 SDValue N1 = N->getOperand(1);
4966 EVT VT = N->getValueType(0);
4967 SDLoc DL(N);
4968
4969 // fold (mulhs c1, c2)
4970 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4971 return C;
4972
4973 // canonicalize constant to RHS.
4976 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4977
4978 if (VT.isVector()) {
4979 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4980 return FoldedVOp;
4981
4982 // fold (mulhs x, 0) -> 0
4983 // do not return N1, because undef node may exist.
4985 return DAG.getConstant(0, DL, VT);
4986 }
4987
4988 // fold (mulhs x, 0) -> 0
4989 if (isNullConstant(N1))
4990 return N1;
4991
4992 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4993 if (isOneConstant(N1))
4994 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4997
4998 // fold (mulhs x, undef) -> 0
4999 if (N0.isUndef() || N1.isUndef())
5000 return DAG.getConstant(0, DL, VT);
5001
5002 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5003 // plus a shift.
5004 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5005 !VT.isVector()) {
5006 MVT Simple = VT.getSimpleVT();
5007 unsigned SimpleSize = Simple.getSizeInBits();
5008 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5009 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5010 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5011 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5012 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5013 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5014 DAG.getConstant(SimpleSize, DL,
5016 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5017 }
5018 }
5019
5020 return SDValue();
5021}
5022
5023SDValue DAGCombiner::visitMULHU(SDNode *N) {
5024 SDValue N0 = N->getOperand(0);
5025 SDValue N1 = N->getOperand(1);
5026 EVT VT = N->getValueType(0);
5027 SDLoc DL(N);
5028
5029 // fold (mulhu c1, c2)
5030 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5031 return C;
5032
5033 // canonicalize constant to RHS.
5036 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5037
5038 if (VT.isVector()) {
5039 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5040 return FoldedVOp;
5041
5042 // fold (mulhu x, 0) -> 0
5043 // do not return N1, because undef node may exist.
5045 return DAG.getConstant(0, DL, VT);
5046 }
5047
5048 // fold (mulhu x, 0) -> 0
5049 if (isNullConstant(N1))
5050 return N1;
5051
5052 // fold (mulhu x, 1) -> 0
5053 if (isOneConstant(N1))
5054 return DAG.getConstant(0, DL, N0.getValueType());
5055
5056 // fold (mulhu x, undef) -> 0
5057 if (N0.isUndef() || N1.isUndef())
5058 return DAG.getConstant(0, DL, VT);
5059
5060 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5061 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5062 hasOperation(ISD::SRL, VT)) {
5063 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5064 unsigned NumEltBits = VT.getScalarSizeInBits();
5065 SDValue SRLAmt = DAG.getNode(
5066 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5067 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5068 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5069 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5070 }
5071 }
5072
5073 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5074 // plus a shift.
5075 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5076 !VT.isVector()) {
5077 MVT Simple = VT.getSimpleVT();
5078 unsigned SimpleSize = Simple.getSizeInBits();
5079 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5080 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5081 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5082 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5083 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5084 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5085 DAG.getConstant(SimpleSize, DL,
5087 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5088 }
5089 }
5090
5091 // Simplify the operands using demanded-bits information.
5092 // We don't have demanded bits support for MULHU so this just enables constant
5093 // folding based on known bits.
5095 return SDValue(N, 0);
5096
5097 return SDValue();
5098}
5099
5100SDValue DAGCombiner::visitAVG(SDNode *N) {
5101 unsigned Opcode = N->getOpcode();
5102 SDValue N0 = N->getOperand(0);
5103 SDValue N1 = N->getOperand(1);
5104 EVT VT = N->getValueType(0);
5105 SDLoc DL(N);
5106
5107 // fold (avg c1, c2)
5108 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5109 return C;
5110
5111 // canonicalize constant to RHS.
5114 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5115
5116 if (VT.isVector()) {
5117 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5118 return FoldedVOp;
5119
5120 // fold (avgfloor x, 0) -> x >> 1
5122 if (Opcode == ISD::AVGFLOORS)
5123 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5124 if (Opcode == ISD::AVGFLOORU)
5125 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5126 }
5127 }
5128
5129 // fold (avg x, undef) -> x
5130 if (N0.isUndef())
5131 return N1;
5132 if (N1.isUndef())
5133 return N0;
5134
5135 // Fold (avg x, x) --> x
5136 if (N0 == N1 && Level >= AfterLegalizeTypes)
5137 return N0;
5138
5139 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5140
5141 return SDValue();
5142}
5143
5144SDValue DAGCombiner::visitABD(SDNode *N) {
5145 unsigned Opcode = N->getOpcode();
5146 SDValue N0 = N->getOperand(0);
5147 SDValue N1 = N->getOperand(1);
5148 EVT VT = N->getValueType(0);
5149 SDLoc DL(N);
5150
5151 // fold (abd c1, c2)
5152 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5153 return C;
5154
5155 // canonicalize constant to RHS.
5158 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5159
5160 if (VT.isVector()) {
5161 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5162 return FoldedVOp;
5163
5164 // fold (abds x, 0) -> abs x
5165 // fold (abdu x, 0) -> x
5167 if (Opcode == ISD::ABDS)
5168 return DAG.getNode(ISD::ABS, DL, VT, N0);
5169 if (Opcode == ISD::ABDU)
5170 return N0;
5171 }
5172 }
5173
5174 // fold (abd x, undef) -> 0
5175 if (N0.isUndef() || N1.isUndef())
5176 return DAG.getConstant(0, DL, VT);
5177
5178 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5179 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5180 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5181 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5182
5183 return SDValue();
5184}
5185
5186/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5187/// give the opcodes for the two computations that are being performed. Return
5188/// true if a simplification was made.
5189SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5190 unsigned HiOp) {
5191 // If the high half is not needed, just compute the low half.
5192 bool HiExists = N->hasAnyUseOfValue(1);
5193 if (!HiExists && (!LegalOperations ||
5194 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5195 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5196 return CombineTo(N, Res, Res);
5197 }
5198
5199 // If the low half is not needed, just compute the high half.
5200 bool LoExists = N->hasAnyUseOfValue(0);
5201 if (!LoExists && (!LegalOperations ||
5202 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5203 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5204 return CombineTo(N, Res, Res);
5205 }
5206
5207 // If both halves are used, return as it is.
5208 if (LoExists && HiExists)
5209 return SDValue();
5210
5211 // If the two computed results can be simplified separately, separate them.
5212 if (LoExists) {
5213 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5214 AddToWorklist(Lo.getNode());
5215 SDValue LoOpt = combine(Lo.getNode());
5216 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5217 (!LegalOperations ||
5218 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5219 return CombineTo(N, LoOpt, LoOpt);
5220 }
5221
5222 if (HiExists) {
5223 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5224 AddToWorklist(Hi.getNode());
5225 SDValue HiOpt = combine(Hi.getNode());
5226 if (HiOpt.getNode() && HiOpt != Hi &&
5227 (!LegalOperations ||
5228 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5229 return CombineTo(N, HiOpt, HiOpt);
5230 }
5231
5232 return SDValue();
5233}
5234
5235SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5236 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5237 return Res;
5238
5239 SDValue N0 = N->getOperand(0);
5240 SDValue N1 = N->getOperand(1);
5241 EVT VT = N->getValueType(0);
5242 SDLoc DL(N);
5243
5244 // Constant fold.
5245 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5246 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5247
5248 // canonicalize constant to RHS (vector doesn't have to splat)
5251 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5252
5253 // If the type is twice as wide is legal, transform the mulhu to a wider
5254 // multiply plus a shift.
5255 if (VT.isSimple() && !VT.isVector()) {
5256 MVT Simple = VT.getSimpleVT();
5257 unsigned SimpleSize = Simple.getSizeInBits();
5258 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5259 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5260 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5261 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5262 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5263 // Compute the high part as N1.
5264 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5265 DAG.getConstant(SimpleSize, DL,
5266 getShiftAmountTy(Lo.getValueType())));
5267 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5268 // Compute the low part as N0.
5269 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5270 return CombineTo(N, Lo, Hi);
5271 }
5272 }
5273
5274 return SDValue();
5275}
5276
5277SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5278 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5279 return Res;
5280
5281 SDValue N0 = N->getOperand(0);
5282 SDValue N1 = N->getOperand(1);
5283 EVT VT = N->getValueType(0);
5284 SDLoc DL(N);
5285
5286 // Constant fold.
5287 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5288 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5289
5290 // canonicalize constant to RHS (vector doesn't have to splat)
5293 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5294
5295 // (umul_lohi N0, 0) -> (0, 0)
5296 if (isNullConstant(N1)) {
5297 SDValue Zero = DAG.getConstant(0, DL, VT);
5298 return CombineTo(N, Zero, Zero);
5299 }
5300
5301 // (umul_lohi N0, 1) -> (N0, 0)
5302 if (isOneConstant(N1)) {
5303 SDValue Zero = DAG.getConstant(0, DL, VT);
5304 return CombineTo(N, N0, Zero);
5305 }
5306
5307 // If the type is twice as wide is legal, transform the mulhu to a wider
5308 // multiply plus a shift.
5309 if (VT.isSimple() && !VT.isVector()) {
5310 MVT Simple = VT.getSimpleVT();
5311 unsigned SimpleSize = Simple.getSizeInBits();
5312 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5313 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5314 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5315 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5316 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5317 // Compute the high part as N1.
5318 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5319 DAG.getConstant(SimpleSize, DL,
5320 getShiftAmountTy(Lo.getValueType())));
5321 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5322 // Compute the low part as N0.
5323 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5324 return CombineTo(N, Lo, Hi);
5325 }
5326 }
5327
5328 return SDValue();
5329}
5330
5331SDValue DAGCombiner::visitMULO(SDNode *N) {
5332 SDValue N0 = N->getOperand(0);
5333 SDValue N1 = N->getOperand(1);
5334 EVT VT = N0.getValueType();
5335 bool IsSigned = (ISD::SMULO == N->getOpcode());
5336
5337 EVT CarryVT = N->getValueType(1);
5338 SDLoc DL(N);
5339
5342
5343 // fold operation with constant operands.
5344 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5345 // multiple results.
5346 if (N0C && N1C) {
5347 bool Overflow;
5348 APInt Result =
5349 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5350 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5351 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5352 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5353 }
5354
5355 // canonicalize constant to RHS.
5358 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5359
5360 // fold (mulo x, 0) -> 0 + no carry out
5361 if (isNullOrNullSplat(N1))
5362 return CombineTo(N, DAG.getConstant(0, DL, VT),
5363 DAG.getConstant(0, DL, CarryVT));
5364
5365 // (mulo x, 2) -> (addo x, x)
5366 // FIXME: This needs a freeze.
5367 if (N1C && N1C->getAPIntValue() == 2 &&
5368 (!IsSigned || VT.getScalarSizeInBits() > 2))
5369 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5370 N->getVTList(), N0, N0);
5371
5372 // A 1 bit SMULO overflows if both inputs are 1.
5373 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5374 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5375 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5376 DAG.getConstant(0, DL, VT), ISD::SETNE);
5377 return CombineTo(N, And, Cmp);
5378 }
5379
5380 // If it cannot overflow, transform into a mul.
5381 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5382 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5383 DAG.getConstant(0, DL, CarryVT));
5384 return SDValue();
5385}
5386
5387// Function to calculate whether the Min/Max pair of SDNodes (potentially
5388// swapped around) make a signed saturate pattern, clamping to between a signed
5389// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5390// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5391// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5392// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5394 SDValue N3, ISD::CondCode CC, unsigned &BW,
5395 bool &Unsigned, SelectionDAG &DAG) {
5396 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5397 ISD::CondCode CC) {
5398 // The compare and select operand should be the same or the select operands
5399 // should be truncated versions of the comparison.
5400 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5401 return 0;
5402 // The constants need to be the same or a truncated version of each other.
5405 if (!N1C || !N3C)
5406 return 0;
5407 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5408 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5409 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5410 return 0;
5411 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5412 };
5413
5414 // Check the initial value is a SMIN/SMAX equivalent.
5415 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5416 if (!Opcode0)
5417 return SDValue();
5418
5419 // We could only need one range check, if the fptosi could never produce
5420 // the upper value.
5421 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5422 if (isNullOrNullSplat(N3)) {
5423 EVT IntVT = N0.getValueType().getScalarType();
5424 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5425 if (FPVT.isSimple()) {
5426 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5427 const fltSemantics &Semantics = InputTy->getFltSemantics();
5428 uint32_t MinBitWidth =
5429 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5430 if (IntVT.getSizeInBits() >= MinBitWidth) {
5431 Unsigned = true;
5432 BW = PowerOf2Ceil(MinBitWidth);
5433 return N0;
5434 }
5435 }
5436 }
5437 }
5438
5439 SDValue N00, N01, N02, N03;
5440 ISD::CondCode N0CC;
5441 switch (N0.getOpcode()) {
5442 case ISD::SMIN:
5443 case ISD::SMAX:
5444 N00 = N02 = N0.getOperand(0);
5445 N01 = N03 = N0.getOperand(1);
5446 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5447 break;
5448 case ISD::SELECT_CC:
5449 N00 = N0.getOperand(0);
5450 N01 = N0.getOperand(1);
5451 N02 = N0.getOperand(2);
5452 N03 = N0.getOperand(3);
5453 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5454 break;
5455 case ISD::SELECT:
5456 case ISD::VSELECT:
5457 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5458 return SDValue();
5459 N00 = N0.getOperand(0).getOperand(0);
5460 N01 = N0.getOperand(0).getOperand(1);
5461 N02 = N0.getOperand(1);
5462 N03 = N0.getOperand(2);
5463 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5464 break;
5465 default:
5466 return SDValue();
5467 }
5468
5469 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5470 if (!Opcode1 || Opcode0 == Opcode1)
5471 return SDValue();
5472
5473 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5474 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5475 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5476 return SDValue();
5477
5478 const APInt &MinC = MinCOp->getAPIntValue();
5479 const APInt &MaxC = MaxCOp->getAPIntValue();
5480 APInt MinCPlus1 = MinC + 1;
5481 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5482 BW = MinCPlus1.exactLogBase2() + 1;
5483 Unsigned = false;
5484 return N02;
5485 }
5486
5487 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5488 BW = MinCPlus1.exactLogBase2();
5489 Unsigned = true;
5490 return N02;
5491 }
5492
5493 return SDValue();
5494}
5495
5498 SelectionDAG &DAG) {
5499 unsigned BW;
5500 bool Unsigned;
5501 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5502 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5503 return SDValue();
5504 EVT FPVT = Fp.getOperand(0).getValueType();
5505 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5506 if (FPVT.isVector())
5507 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5508 FPVT.getVectorElementCount());
5509 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5510 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5511 return SDValue();
5512 SDLoc DL(Fp);
5513 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5514 DAG.getValueType(NewVT.getScalarType()));
5515 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5516}
5517
5520 SelectionDAG &DAG) {
5521 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5522 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5523 // be truncated versions of the setcc (N0/N1).
5524 if ((N0 != N2 &&
5525 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5527 return SDValue();
5530 if (!N1C || !N3C)
5531 return SDValue();
5532 const APInt &C1 = N1C->getAPIntValue();
5533 const APInt &C3 = N3C->getAPIntValue();
5534 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5535 C1 != C3.zext(C1.getBitWidth()))
5536 return SDValue();
5537
5538 unsigned BW = (C1 + 1).exactLogBase2();
5539 EVT FPVT = N0.getOperand(0).getValueType();
5540 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5541 if (FPVT.isVector())
5542 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5543 FPVT.getVectorElementCount());
5545 FPVT, NewVT))
5546 return SDValue();
5547
5548 SDValue Sat =
5549 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5550 DAG.getValueType(NewVT.getScalarType()));
5551 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5552}
5553
5554SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5555 SDValue N0 = N->getOperand(0);
5556 SDValue N1 = N->getOperand(1);
5557 EVT VT = N0.getValueType();
5558 unsigned Opcode = N->getOpcode();
5559 SDLoc DL(N);
5560
5561 // fold operation with constant operands.
5562 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5563 return C;
5564
5565 // If the operands are the same, this is a no-op.
5566 if (N0 == N1)
5567 return N0;
5568
5569 // canonicalize constant to RHS
5572 return DAG.getNode(Opcode, DL, VT, N1, N0);
5573
5574 // fold vector ops
5575 if (VT.isVector())
5576 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5577 return FoldedVOp;
5578
5579 // reassociate minmax
5580 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5581 return RMINMAX;
5582
5583 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5584 // Only do this if:
5585 // 1. The current op isn't legal and the flipped is.
5586 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5587 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5588 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5589 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5590 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5591 unsigned AltOpcode;
5592 switch (Opcode) {
5593 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5594 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5595 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5596 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5597 default: llvm_unreachable("Unknown MINMAX opcode");
5598 }
5599 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5600 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5601 }
5602
5603 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5605 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5606 return S;
5607 if (Opcode == ISD::UMIN)
5608 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5609 return S;
5610
5611 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5612 auto ReductionOpcode = [](unsigned Opcode) {
5613 switch (Opcode) {
5614 case ISD::SMIN:
5615 return ISD::VECREDUCE_SMIN;
5616 case ISD::SMAX:
5617 return ISD::VECREDUCE_SMAX;
5618 case ISD::UMIN:
5619 return ISD::VECREDUCE_UMIN;
5620 case ISD::UMAX:
5621 return ISD::VECREDUCE_UMAX;
5622 default:
5623 llvm_unreachable("Unexpected opcode");
5624 }
5625 };
5626 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5627 SDLoc(N), VT, N0, N1))
5628 return SD;
5629
5630 // Simplify the operands using demanded-bits information.
5632 return SDValue(N, 0);
5633
5634 return SDValue();
5635}
5636
5637/// If this is a bitwise logic instruction and both operands have the same
5638/// opcode, try to sink the other opcode after the logic instruction.
5639SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5640 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5641 EVT VT = N0.getValueType();
5642 unsigned LogicOpcode = N->getOpcode();
5643 unsigned HandOpcode = N0.getOpcode();
5644 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5645 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5646
5647 // Bail early if none of these transforms apply.
5648 if (N0.getNumOperands() == 0)
5649 return SDValue();
5650
5651 // FIXME: We should check number of uses of the operands to not increase
5652 // the instruction count for all transforms.
5653
5654 // Handle size-changing casts (or sign_extend_inreg).
5655 SDValue X = N0.getOperand(0);
5656 SDValue Y = N1.getOperand(0);
5657 EVT XVT = X.getValueType();
5658 SDLoc DL(N);
5659 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5660 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5661 N0.getOperand(1) == N1.getOperand(1))) {
5662 // If both operands have other uses, this transform would create extra
5663 // instructions without eliminating anything.
5664 if (!N0.hasOneUse() && !N1.hasOneUse())
5665 return SDValue();
5666 // We need matching integer source types.
5667 if (XVT != Y.getValueType())
5668 return SDValue();
5669 // Don't create an illegal op during or after legalization. Don't ever
5670 // create an unsupported vector op.
5671 if ((VT.isVector() || LegalOperations) &&
5672 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5673 return SDValue();
5674 // Avoid infinite looping with PromoteIntBinOp.
5675 // TODO: Should we apply desirable/legal constraints to all opcodes?
5676 if ((HandOpcode == ISD::ANY_EXTEND ||
5677 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5678 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5679 return SDValue();
5680 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5681 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5682 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5683 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5684 return DAG.getNode(HandOpcode, DL, VT, Logic);
5685 }
5686
5687 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5688 if (HandOpcode == ISD::TRUNCATE) {
5689 // If both operands have other uses, this transform would create extra
5690 // instructions without eliminating anything.
5691 if (!N0.hasOneUse() && !N1.hasOneUse())
5692 return SDValue();
5693 // We need matching source types.
5694 if (XVT != Y.getValueType())
5695 return SDValue();
5696 // Don't create an illegal op during or after legalization.
5697 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5698 return SDValue();
5699 // Be extra careful sinking truncate. If it's free, there's no benefit in
5700 // widening a binop. Also, don't create a logic op on an illegal type.
5701 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5702 return SDValue();
5703 if (!TLI.isTypeLegal(XVT))
5704 return SDValue();
5705 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5706 return DAG.getNode(HandOpcode, DL, VT, Logic);
5707 }
5708
5709 // For binops SHL/SRL/SRA/AND:
5710 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5711 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5712 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5713 N0.getOperand(1) == N1.getOperand(1)) {
5714 // If either operand has other uses, this transform is not an improvement.
5715 if (!N0.hasOneUse() || !N1.hasOneUse())
5716 return SDValue();
5717 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5718 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5719 }
5720
5721 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5722 if (HandOpcode == ISD::BSWAP) {
5723 // If either operand has other uses, this transform is not an improvement.
5724 if (!N0.hasOneUse() || !N1.hasOneUse())
5725 return SDValue();
5726 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5727 return DAG.getNode(HandOpcode, DL, VT, Logic);
5728 }
5729
5730 // For funnel shifts FSHL/FSHR:
5731 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5732 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5733 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5734 N0.getOperand(2) == N1.getOperand(2)) {
5735 if (!N0.hasOneUse() || !N1.hasOneUse())
5736 return SDValue();
5737 SDValue X1 = N0.getOperand(1);
5738 SDValue Y1 = N1.getOperand(1);
5739 SDValue S = N0.getOperand(2);
5740 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5741 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5742 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5743 }
5744
5745 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5746 // Only perform this optimization up until type legalization, before
5747 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5748 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5749 // we don't want to undo this promotion.
5750 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5751 // on scalars.
5752 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5753 Level <= AfterLegalizeTypes) {
5754 // Input types must be integer and the same.
5755 if (XVT.isInteger() && XVT == Y.getValueType() &&
5756 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5757 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5758 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5759 return DAG.getNode(HandOpcode, DL, VT, Logic);
5760 }
5761 }
5762
5763 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5764 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5765 // If both shuffles use the same mask, and both shuffle within a single
5766 // vector, then it is worthwhile to move the swizzle after the operation.
5767 // The type-legalizer generates this pattern when loading illegal
5768 // vector types from memory. In many cases this allows additional shuffle
5769 // optimizations.
5770 // There are other cases where moving the shuffle after the xor/and/or
5771 // is profitable even if shuffles don't perform a swizzle.
5772 // If both shuffles use the same mask, and both shuffles have the same first
5773 // or second operand, then it might still be profitable to move the shuffle
5774 // after the xor/and/or operation.
5775 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5776 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5777 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5778 assert(X.getValueType() == Y.getValueType() &&
5779 "Inputs to shuffles are not the same type");
5780
5781 // Check that both shuffles use the same mask. The masks are known to be of
5782 // the same length because the result vector type is the same.
5783 // Check also that shuffles have only one use to avoid introducing extra
5784 // instructions.
5785 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5786 !SVN0->getMask().equals(SVN1->getMask()))
5787 return SDValue();
5788
5789 // Don't try to fold this node if it requires introducing a
5790 // build vector of all zeros that might be illegal at this stage.
5791 SDValue ShOp = N0.getOperand(1);
5792 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5793 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5794
5795 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5796 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5797 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5798 N0.getOperand(0), N1.getOperand(0));
5799 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5800 }
5801
5802 // Don't try to fold this node if it requires introducing a
5803 // build vector of all zeros that might be illegal at this stage.
5804 ShOp = N0.getOperand(0);
5805 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5806 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5807
5808 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5809 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5810 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5811 N1.getOperand(1));
5812 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5813 }
5814 }
5815
5816 return SDValue();
5817}
5818
5819/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5820SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5821 const SDLoc &DL) {
5822 SDValue LL, LR, RL, RR, N0CC, N1CC;
5823 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5824 !isSetCCEquivalent(N1, RL, RR, N1CC))
5825 return SDValue();
5826
5827 assert(N0.getValueType() == N1.getValueType() &&
5828 "Unexpected operand types for bitwise logic op");
5829 assert(LL.getValueType() == LR.getValueType() &&
5830 RL.getValueType() == RR.getValueType() &&
5831 "Unexpected operand types for setcc");
5832
5833 // If we're here post-legalization or the logic op type is not i1, the logic
5834 // op type must match a setcc result type. Also, all folds require new
5835 // operations on the left and right operands, so those types must match.
5836 EVT VT = N0.getValueType();
5837 EVT OpVT = LL.getValueType();
5838 if (LegalOperations || VT.getScalarType() != MVT::i1)
5839 if (VT != getSetCCResultType(OpVT))
5840 return SDValue();
5841 if (OpVT != RL.getValueType())
5842 return SDValue();
5843
5844 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5845 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5846 bool IsInteger = OpVT.isInteger();
5847 if (LR == RR && CC0 == CC1 && IsInteger) {
5848 bool IsZero = isNullOrNullSplat(LR);
5849 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5850
5851 // All bits clear?
5852 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5853 // All sign bits clear?
5854 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5855 // Any bits set?
5856 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5857 // Any sign bits set?
5858 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5859
5860 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5861 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5862 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5863 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5864 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5865 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5866 AddToWorklist(Or.getNode());
5867 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5868 }
5869
5870 // All bits set?
5871 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5872 // All sign bits set?
5873 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5874 // Any bits clear?
5875 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5876 // Any sign bits clear?
5877 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5878
5879 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5880 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5881 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5882 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5883 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5884 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5885 AddToWorklist(And.getNode());
5886 return DAG.getSetCC(DL, VT, And, LR, CC1);
5887 }
5888 }
5889
5890 // TODO: What is the 'or' equivalent of this fold?
5891 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5892 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5893 IsInteger && CC0 == ISD::SETNE &&
5894 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5895 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5896 SDValue One = DAG.getConstant(1, DL, OpVT);
5897 SDValue Two = DAG.getConstant(2, DL, OpVT);
5898 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5899 AddToWorklist(Add.getNode());
5900 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5901 }
5902
5903 // Try more general transforms if the predicates match and the only user of
5904 // the compares is the 'and' or 'or'.
5905 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5906 N0.hasOneUse() && N1.hasOneUse()) {
5907 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5908 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5909 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5910 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5911 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5912 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5913 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5914 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5915 }
5916
5917 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5918 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5919 // Match a shared variable operand and 2 non-opaque constant operands.
5920 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5921 // The difference of the constants must be a single bit.
5922 const APInt &CMax =
5923 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5924 const APInt &CMin =
5925 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5926 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5927 };
5928 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5929 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5930 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5931 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5932 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5933 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5934 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5935 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5936 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5937 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5938 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5939 }
5940 }
5941 }
5942
5943 // Canonicalize equivalent operands to LL == RL.
5944 if (LL == RR && LR == RL) {
5946 std::swap(RL, RR);
5947 }
5948
5949 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5950 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5951 if (LL == RL && LR == RR) {
5952 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5953 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5954 if (NewCC != ISD::SETCC_INVALID &&
5955 (!LegalOperations ||
5956 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5957 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5958 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5959 }
5960
5961 return SDValue();
5962}
5963
5964static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
5965 SelectionDAG &DAG) {
5966 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
5967}
5968
5969static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
5970 SelectionDAG &DAG) {
5971 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
5972}
5973
5974static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
5975 ISD::CondCode CC, unsigned OrAndOpcode,
5976 SelectionDAG &DAG,
5977 bool isFMAXNUMFMINNUM_IEEE,
5978 bool isFMAXNUMFMINNUM) {
5979 // The optimization cannot be applied for all the predicates because
5980 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
5981 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
5982 // applied at all if one of the operands is a signaling NaN.
5983
5984 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
5985 // are non NaN values.
5986 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
5987 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
5988 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5989 isFMAXNUMFMINNUM_IEEE
5992 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
5993 (OrAndOpcode == ISD::OR)) ||
5994 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
5995 (OrAndOpcode == ISD::AND)))
5996 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5997 isFMAXNUMFMINNUM_IEEE
6000 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6001 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6002 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6003 // that there are not any sNaNs, then the optimization is not valid
6004 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6005 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6006 // we can prove that we do not have any sNaNs, then we can do the
6007 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6008 // cases.
6009 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6010 (OrAndOpcode == ISD::OR)) ||
6011 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6012 (OrAndOpcode == ISD::AND)))
6013 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6014 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6015 isFMAXNUMFMINNUM_IEEE
6018 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6019 (OrAndOpcode == ISD::OR)) ||
6020 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6021 (OrAndOpcode == ISD::AND)))
6022 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6023 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6024 isFMAXNUMFMINNUM_IEEE
6027 return ISD::DELETED_NODE;
6028}
6029
6032 assert(
6033 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6034 "Invalid Op to combine SETCC with");
6035
6036 // TODO: Search past casts/truncates.
6037 SDValue LHS = LogicOp->getOperand(0);
6038 SDValue RHS = LogicOp->getOperand(1);
6039 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6040 !LHS->hasOneUse() || !RHS->hasOneUse())
6041 return SDValue();
6042
6043 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6045 LogicOp, LHS.getNode(), RHS.getNode());
6046
6047 SDValue LHS0 = LHS->getOperand(0);
6048 SDValue RHS0 = RHS->getOperand(0);
6049 SDValue LHS1 = LHS->getOperand(1);
6050 SDValue RHS1 = RHS->getOperand(1);
6051 // TODO: We don't actually need a splat here, for vectors we just need the
6052 // invariants to hold for each element.
6053 auto *LHS1C = isConstOrConstSplat(LHS1);
6054 auto *RHS1C = isConstOrConstSplat(RHS1);
6055 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6056 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6057 EVT VT = LogicOp->getValueType(0);
6058 EVT OpVT = LHS0.getValueType();
6059 SDLoc DL(LogicOp);
6060
6061 // Check if the operands of an and/or operation are comparisons and if they
6062 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6063 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6064 // sequence will be replaced with min-cmp sequence:
6065 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6066 // and and-cmp-cmp will be replaced with max-cmp sequence:
6067 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6068 // The optimization does not work for `==` or `!=` .
6069 // The two comparisons should have either the same predicate or the
6070 // predicate of one of the comparisons is the opposite of the other one.
6071 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6073 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6075 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6076 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6077 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6078 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6079 (OpVT.isFloatingPoint() &&
6080 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6082 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6083 CCL != ISD::SETTRUE &&
6084 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6085
6086 SDValue CommonValue, Operand1, Operand2;
6088 if (CCL == CCR) {
6089 if (LHS0 == RHS0) {
6090 CommonValue = LHS0;
6091 Operand1 = LHS1;
6092 Operand2 = RHS1;
6094 } else if (LHS1 == RHS1) {
6095 CommonValue = LHS1;
6096 Operand1 = LHS0;
6097 Operand2 = RHS0;
6098 CC = CCL;
6099 }
6100 } else {
6101 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6102 if (LHS0 == RHS1) {
6103 CommonValue = LHS0;
6104 Operand1 = LHS1;
6105 Operand2 = RHS0;
6106 CC = CCR;
6107 } else if (RHS0 == LHS1) {
6108 CommonValue = LHS1;
6109 Operand1 = LHS0;
6110 Operand2 = RHS1;
6111 CC = CCL;
6112 }
6113 }
6114
6115 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6116 // handle it using OR/AND.
6117 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6119 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6121
6122 if (CC != ISD::SETCC_INVALID) {
6123 unsigned NewOpcode = ISD::DELETED_NODE;
6124 bool IsSigned = isSignedIntSetCC(CC);
6125 if (OpVT.isInteger()) {
6126 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6127 CC == ISD::SETLT || CC == ISD::SETULT);
6128 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6129 if (IsLess == IsOr)
6130 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6131 else
6132 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6133 } else if (OpVT.isFloatingPoint())
6134 NewOpcode =
6135 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6136 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6137
6138 if (NewOpcode != ISD::DELETED_NODE) {
6139 SDValue MinMaxValue =
6140 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6141 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6142 }
6143 }
6144 }
6145
6146 if (TargetPreference == AndOrSETCCFoldKind::None)
6147 return SDValue();
6148
6149 if (CCL == CCR &&
6150 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6151 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6152 const APInt &APLhs = LHS1C->getAPIntValue();
6153 const APInt &APRhs = RHS1C->getAPIntValue();
6154
6155 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6156 // case this is just a compare).
6157 if (APLhs == (-APRhs) &&
6158 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6159 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6160 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6161 // (icmp eq A, C) | (icmp eq A, -C)
6162 // -> (icmp eq Abs(A), C)
6163 // (icmp ne A, C) & (icmp ne A, -C)
6164 // -> (icmp ne Abs(A), C)
6165 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6166 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6167 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6168 } else if (TargetPreference &
6170
6171 // AndOrSETCCFoldKind::AddAnd:
6172 // A == C0 | A == C1
6173 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6174 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6175 // A != C0 & A != C1
6176 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6177 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6178
6179 // AndOrSETCCFoldKind::NotAnd:
6180 // A == C0 | A == C1
6181 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6182 // -> ~A & smin(C0, C1) == 0
6183 // A != C0 & A != C1
6184 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6185 // -> ~A & smin(C0, C1) != 0
6186
6187 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6188 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6189 APInt Dif = MaxC - MinC;
6190 if (!Dif.isZero() && Dif.isPowerOf2()) {
6191 if (MaxC.isAllOnes() &&
6192 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6193 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6194 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6195 DAG.getConstant(MinC, DL, OpVT));
6196 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6197 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6198 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6199
6200 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6201 DAG.getConstant(-MinC, DL, OpVT));
6202 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6203 DAG.getConstant(~Dif, DL, OpVT));
6204 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6205 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6206 }
6207 }
6208 }
6209 }
6210
6211 return SDValue();
6212}
6213
6214// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6215// We canonicalize to the `select` form in the middle end, but the `and` form
6216// gets better codegen and all tested targets (arm, x86, riscv)
6218 const SDLoc &DL, SelectionDAG &DAG) {
6219 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6220 if (!isNullConstant(F))
6221 return SDValue();
6222
6223 EVT CondVT = Cond.getValueType();
6224 if (TLI.getBooleanContents(CondVT) !=
6226 return SDValue();
6227
6228 if (T.getOpcode() != ISD::AND)
6229 return SDValue();
6230
6231 if (!isOneConstant(T.getOperand(1)))
6232 return SDValue();
6233
6234 EVT OpVT = T.getValueType();
6235
6236 SDValue CondMask =
6237 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6238 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6239}
6240
6241/// This contains all DAGCombine rules which reduce two values combined by
6242/// an And operation to a single value. This makes them reusable in the context
6243/// of visitSELECT(). Rules involving constants are not included as
6244/// visitSELECT() already handles those cases.
6245SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6246 EVT VT = N1.getValueType();
6247 SDLoc DL(N);
6248
6249 // fold (and x, undef) -> 0
6250 if (N0.isUndef() || N1.isUndef())
6251 return DAG.getConstant(0, DL, VT);
6252
6253 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6254 return V;
6255
6256 // Canonicalize:
6257 // and(x, add) -> and(add, x)
6258 if (N1.getOpcode() == ISD::ADD)
6259 std::swap(N0, N1);
6260
6261 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6262 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6263 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6264 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6265 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6266 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6267 // immediate for an add, but it is legal if its top c2 bits are set,
6268 // transform the ADD so the immediate doesn't need to be materialized
6269 // in a register.
6270 APInt ADDC = ADDI->getAPIntValue();
6271 APInt SRLC = SRLI->getAPIntValue();
6272 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6273 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6275 SRLC.getZExtValue());
6276 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6277 ADDC |= Mask;
6278 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6279 SDLoc DL0(N0);
6280 SDValue NewAdd =
6281 DAG.getNode(ISD::ADD, DL0, VT,
6282 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6283 CombineTo(N0.getNode(), NewAdd);
6284 // Return N so it doesn't get rechecked!
6285 return SDValue(N, 0);
6286 }
6287 }
6288 }
6289 }
6290 }
6291 }
6292
6293 return SDValue();
6294}
6295
6296bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6297 EVT LoadResultTy, EVT &ExtVT) {
6298 if (!AndC->getAPIntValue().isMask())
6299 return false;
6300
6301 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6302
6303 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6304 EVT LoadedVT = LoadN->getMemoryVT();
6305
6306 if (ExtVT == LoadedVT &&
6307 (!LegalOperations ||
6308 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6309 // ZEXTLOAD will match without needing to change the size of the value being
6310 // loaded.
6311 return true;
6312 }
6313
6314 // Do not change the width of a volatile or atomic loads.
6315 if (!LoadN->isSimple())
6316 return false;
6317
6318 // Do not generate loads of non-round integer types since these can
6319 // be expensive (and would be wrong if the type is not byte sized).
6320 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6321 return false;
6322
6323 if (LegalOperations &&
6324 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6325 return false;
6326
6327 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6328 return false;
6329
6330 return true;
6331}
6332
6333bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6334 ISD::LoadExtType ExtType, EVT &MemVT,
6335 unsigned ShAmt) {
6336 if (!LDST)
6337 return false;
6338 // Only allow byte offsets.
6339 if (ShAmt % 8)
6340 return false;
6341
6342 // Do not generate loads of non-round integer types since these can
6343 // be expensive (and would be wrong if the type is not byte sized).
6344 if (!MemVT.isRound())
6345 return false;
6346
6347 // Don't change the width of a volatile or atomic loads.
6348 if (!LDST->isSimple())
6349 return false;
6350
6351 EVT LdStMemVT = LDST->getMemoryVT();
6352
6353 // Bail out when changing the scalable property, since we can't be sure that
6354 // we're actually narrowing here.
6355 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6356 return false;
6357
6358 // Verify that we are actually reducing a load width here.
6359 if (LdStMemVT.bitsLT(MemVT))
6360 return false;
6361
6362 // Ensure that this isn't going to produce an unsupported memory access.
6363 if (ShAmt) {
6364 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6365 const unsigned ByteShAmt = ShAmt / 8;
6366 const Align LDSTAlign = LDST->getAlign();
6367 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6368 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6369 LDST->getAddressSpace(), NarrowAlign,
6370 LDST->getMemOperand()->getFlags()))
6371 return false;
6372 }
6373
6374 // It's not possible to generate a constant of extended or untyped type.
6375 EVT PtrType = LDST->getBasePtr().getValueType();
6376 if (PtrType == MVT::Untyped || PtrType.isExtended())
6377 return false;
6378
6379 if (isa<LoadSDNode>(LDST)) {
6380 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6381 // Don't transform one with multiple uses, this would require adding a new
6382 // load.
6383 if (!SDValue(Load, 0).hasOneUse())
6384 return false;
6385
6386 if (LegalOperations &&
6387 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6388 return false;
6389
6390 // For the transform to be legal, the load must produce only two values
6391 // (the value loaded and the chain). Don't transform a pre-increment
6392 // load, for example, which produces an extra value. Otherwise the
6393 // transformation is not equivalent, and the downstream logic to replace
6394 // uses gets things wrong.
6395 if (Load->getNumValues() > 2)
6396 return false;
6397
6398 // If the load that we're shrinking is an extload and we're not just
6399 // discarding the extension we can't simply shrink the load. Bail.
6400 // TODO: It would be possible to merge the extensions in some cases.
6401 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6402 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6403 return false;
6404
6405 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6406 return false;
6407 } else {
6408 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6409 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6410 // Can't write outside the original store
6411 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6412 return false;
6413
6414 if (LegalOperations &&
6415 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6416 return false;
6417 }
6418 return true;
6419}
6420
6421bool DAGCombiner::SearchForAndLoads(SDNode *N,
6423 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6424 ConstantSDNode *Mask,
6425 SDNode *&NodeToMask) {
6426 // Recursively search for the operands, looking for loads which can be
6427 // narrowed.
6428 for (SDValue Op : N->op_values()) {
6429 if (Op.getValueType().isVector())
6430 return false;
6431
6432 // Some constants may need fixing up later if they are too large.
6433 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6434 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6435 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6436 NodesWithConsts.insert(N);
6437 continue;
6438 }
6439
6440 if (!Op.hasOneUse())
6441 return false;
6442
6443 switch(Op.getOpcode()) {
6444 case ISD::LOAD: {
6445 auto *Load = cast<LoadSDNode>(Op);
6446 EVT ExtVT;
6447 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6448 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6449
6450 // ZEXTLOAD is already small enough.
6451 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6452 ExtVT.bitsGE(Load->getMemoryVT()))
6453 continue;
6454
6455 // Use LE to convert equal sized loads to zext.
6456 if (ExtVT.bitsLE(Load->getMemoryVT()))
6457 Loads.push_back(Load);
6458
6459 continue;
6460 }
6461 return false;
6462 }
6463 case ISD::ZERO_EXTEND:
6464 case ISD::AssertZext: {
6465 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6466 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6467 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6468 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6469 Op.getOperand(0).getValueType();
6470
6471 // We can accept extending nodes if the mask is wider or an equal
6472 // width to the original type.
6473 if (ExtVT.bitsGE(VT))
6474 continue;
6475 break;
6476 }
6477 case ISD::OR:
6478 case ISD::XOR:
6479 case ISD::AND:
6480 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6481 NodeToMask))
6482 return false;
6483 continue;
6484 }
6485
6486 // Allow one node which will masked along with any loads found.
6487 if (NodeToMask)
6488 return false;
6489
6490 // Also ensure that the node to be masked only produces one data result.
6491 NodeToMask = Op.getNode();
6492 if (NodeToMask->getNumValues() > 1) {
6493 bool HasValue = false;
6494 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6495 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6496 if (VT != MVT::Glue && VT != MVT::Other) {
6497 if (HasValue) {
6498 NodeToMask = nullptr;
6499 return false;
6500 }
6501 HasValue = true;
6502 }
6503 }
6504 assert(HasValue && "Node to be masked has no data result?");
6505 }
6506 }
6507 return true;
6508}
6509
6510bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6511 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6512 if (!Mask)
6513 return false;
6514
6515 if (!Mask->getAPIntValue().isMask())
6516 return false;
6517
6518 // No need to do anything if the and directly uses a load.
6519 if (isa<LoadSDNode>(N->getOperand(0)))
6520 return false;
6521
6523 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6524 SDNode *FixupNode = nullptr;
6525 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6526 if (Loads.empty())
6527 return false;
6528
6529 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6530 SDValue MaskOp = N->getOperand(1);
6531
6532 // If it exists, fixup the single node we allow in the tree that needs
6533 // masking.
6534 if (FixupNode) {
6535 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6536 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6537 FixupNode->getValueType(0),
6538 SDValue(FixupNode, 0), MaskOp);
6539 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6540 if (And.getOpcode() == ISD ::AND)
6541 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6542 }
6543
6544 // Narrow any constants that need it.
6545 for (auto *LogicN : NodesWithConsts) {
6546 SDValue Op0 = LogicN->getOperand(0);
6547 SDValue Op1 = LogicN->getOperand(1);
6548
6549 if (isa<ConstantSDNode>(Op0))
6550 Op0 =
6551 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6552
6553 if (isa<ConstantSDNode>(Op1))
6554 Op1 =
6555 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6556
6557 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6558 std::swap(Op0, Op1);
6559
6560 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6561 }
6562
6563 // Create narrow loads.
6564 for (auto *Load : Loads) {
6565 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6566 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6567 SDValue(Load, 0), MaskOp);
6568 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6569 if (And.getOpcode() == ISD ::AND)
6570 And = SDValue(
6571 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6572 SDValue NewLoad = reduceLoadWidth(And.getNode());
6573 assert(NewLoad &&
6574 "Shouldn't be masking the load if it can't be narrowed");
6575 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6576 }
6577 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6578 return true;
6579 }
6580 return false;
6581}
6582
6583// Unfold
6584// x & (-1 'logical shift' y)
6585// To
6586// (x 'opposite logical shift' y) 'logical shift' y
6587// if it is better for performance.
6588SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6589 assert(N->getOpcode() == ISD::AND);
6590
6591 SDValue N0 = N->getOperand(0);
6592 SDValue N1 = N->getOperand(1);
6593
6594 // Do we actually prefer shifts over mask?
6596 return SDValue();
6597
6598 // Try to match (-1 '[outer] logical shift' y)
6599 unsigned OuterShift;
6600 unsigned InnerShift; // The opposite direction to the OuterShift.
6601 SDValue Y; // Shift amount.
6602 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6603 if (!M.hasOneUse())
6604 return false;
6605 OuterShift = M->getOpcode();
6606 if (OuterShift == ISD::SHL)
6607 InnerShift = ISD::SRL;
6608 else if (OuterShift == ISD::SRL)
6609 InnerShift = ISD::SHL;
6610 else
6611 return false;
6612 if (!isAllOnesConstant(M->getOperand(0)))
6613 return false;
6614 Y = M->getOperand(1);
6615 return true;
6616 };
6617
6618 SDValue X;
6619 if (matchMask(N1))
6620 X = N0;
6621 else if (matchMask(N0))
6622 X = N1;
6623 else
6624 return SDValue();
6625
6626 SDLoc DL(N);
6627 EVT VT = N->getValueType(0);
6628
6629 // tmp = x 'opposite logical shift' y
6630 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6631 // ret = tmp 'logical shift' y
6632 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6633
6634 return T1;
6635}
6636
6637/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6638/// For a target with a bit test, this is expected to become test + set and save
6639/// at least 1 instruction.
6641 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6642
6643 // Look through an optional extension.
6644 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6645 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6646 And0 = And0.getOperand(0);
6647 if (!isOneConstant(And1) || !And0.hasOneUse())
6648 return SDValue();
6649
6650 SDValue Src = And0;
6651
6652 // Attempt to find a 'not' op.
6653 // TODO: Should we favor test+set even without the 'not' op?
6654 bool FoundNot = false;
6655 if (isBitwiseNot(Src)) {
6656 FoundNot = true;
6657 Src = Src.getOperand(0);
6658
6659 // Look though an optional truncation. The source operand may not be the
6660 // same type as the original 'and', but that is ok because we are masking
6661 // off everything but the low bit.
6662 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6663 Src = Src.getOperand(0);
6664 }
6665
6666 // Match a shift-right by constant.
6667 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6668 return SDValue();
6669
6670 // This is probably not worthwhile without a supported type.
6671 EVT SrcVT = Src.getValueType();
6672 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6673 if (!TLI.isTypeLegal(SrcVT))
6674 return SDValue();
6675
6676 // We might have looked through casts that make this transform invalid.
6677 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6678 SDValue ShiftAmt = Src.getOperand(1);
6679 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6680 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6681 return SDValue();
6682
6683 // Set source to shift source.
6684 Src = Src.getOperand(0);
6685
6686 // Try again to find a 'not' op.
6687 // TODO: Should we favor test+set even with two 'not' ops?
6688 if (!FoundNot) {
6689 if (!isBitwiseNot(Src))
6690 return SDValue();
6691 Src = Src.getOperand(0);
6692 }
6693
6694 if (!TLI.hasBitTest(Src, ShiftAmt))
6695 return SDValue();
6696
6697 // Turn this into a bit-test pattern using mask op + setcc:
6698 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6699 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6700 SDLoc DL(And);
6701 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6702 EVT CCVT =
6703 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6704 SDValue Mask = DAG.getConstant(
6705 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6706 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6707 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6708 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6709 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6710}
6711
6712/// For targets that support usubsat, match a bit-hack form of that operation
6713/// that ends in 'and' and convert it.
6715 EVT VT = N->getValueType(0);
6716 unsigned BitWidth = VT.getScalarSizeInBits();
6717 APInt SignMask = APInt::getSignMask(BitWidth);
6718
6719 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6720 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6721 // xor/add with SMIN (signmask) are logically equivalent.
6722 SDValue X;
6723 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6725 m_SpecificInt(BitWidth - 1))))) &&
6728 m_SpecificInt(BitWidth - 1))))))
6729 return SDValue();
6730
6731 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6732 DAG.getConstant(SignMask, DL, VT));
6733}
6734
6735/// Given a bitwise logic operation N with a matching bitwise logic operand,
6736/// fold a pattern where 2 of the source operands are identically shifted
6737/// values. For example:
6738/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6740 SelectionDAG &DAG) {
6741 unsigned LogicOpcode = N->getOpcode();
6742 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6743 "Expected bitwise logic operation");
6744
6745 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6746 return SDValue();
6747
6748 // Match another bitwise logic op and a shift.
6749 unsigned ShiftOpcode = ShiftOp.getOpcode();
6750 if (LogicOp.getOpcode() != LogicOpcode ||
6751 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6752 ShiftOpcode == ISD::SRA))
6753 return SDValue();
6754
6755 // Match another shift op inside the first logic operand. Handle both commuted
6756 // possibilities.
6757 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6758 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6759 SDValue X1 = ShiftOp.getOperand(0);
6760 SDValue Y = ShiftOp.getOperand(1);
6761 SDValue X0, Z;
6762 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6763 LogicOp.getOperand(0).getOperand(1) == Y) {
6764 X0 = LogicOp.getOperand(0).getOperand(0);
6765 Z = LogicOp.getOperand(1);
6766 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6767 LogicOp.getOperand(1).getOperand(1) == Y) {
6768 X0 = LogicOp.getOperand(1).getOperand(0);
6769 Z = LogicOp.getOperand(0);
6770 } else {
6771 return SDValue();
6772 }
6773
6774 EVT VT = N->getValueType(0);
6775 SDLoc DL(N);
6776 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6777 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6778 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6779}
6780
6781/// Given a tree of logic operations with shape like
6782/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6783/// try to match and fold shift operations with the same shift amount.
6784/// For example:
6785/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6786/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6788 SDValue RightHand, SelectionDAG &DAG) {
6789 unsigned LogicOpcode = N->getOpcode();
6790 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6791 "Expected bitwise logic operation");
6792 if (LeftHand.getOpcode() != LogicOpcode ||
6793 RightHand.getOpcode() != LogicOpcode)
6794 return SDValue();
6795 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6796 return SDValue();
6797
6798 // Try to match one of following patterns:
6799 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6800 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6801 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6802 // itself.
6803 SDValue CombinedShifts, W;
6804 SDValue R0 = RightHand.getOperand(0);
6805 SDValue R1 = RightHand.getOperand(1);
6806 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6807 W = R1;
6808 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6809 W = R0;
6810 else
6811 return SDValue();
6812
6813 EVT VT = N->getValueType(0);
6814 SDLoc DL(N);
6815 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6816}
6817
6818SDValue DAGCombiner::visitAND(SDNode *N) {
6819 SDValue N0 = N->getOperand(0);
6820 SDValue N1 = N->getOperand(1);
6821 EVT VT = N1.getValueType();
6822 SDLoc DL(N);
6823
6824 // x & x --> x
6825 if (N0 == N1)
6826 return N0;
6827
6828 // fold (and c1, c2) -> c1&c2
6829 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6830 return C;
6831
6832 // canonicalize constant to RHS
6835 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6836
6837 if (areBitwiseNotOfEachother(N0, N1))
6838 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6839
6840 // fold vector ops
6841 if (VT.isVector()) {
6842 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6843 return FoldedVOp;
6844
6845 // fold (and x, 0) -> 0, vector edition
6847 // do not return N1, because undef node may exist in N1
6849 N1.getValueType());
6850
6851 // fold (and x, -1) -> x, vector edition
6853 return N0;
6854
6855 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6856 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6857 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6858 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6859 N1.hasOneUse()) {
6860 EVT LoadVT = MLoad->getMemoryVT();
6861 EVT ExtVT = VT;
6862 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6863 // For this AND to be a zero extension of the masked load the elements
6864 // of the BuildVec must mask the bottom bits of the extended element
6865 // type
6866 uint64_t ElementSize =
6868 if (Splat->getAPIntValue().isMask(ElementSize)) {
6869 SDValue NewLoad = DAG.getMaskedLoad(
6870 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6871 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6872 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6873 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6874 bool LoadHasOtherUsers = !N0.hasOneUse();
6875 CombineTo(N, NewLoad);
6876 if (LoadHasOtherUsers)
6877 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6878 return SDValue(N, 0);
6879 }
6880 }
6881 }
6882 }
6883
6884 // fold (and x, -1) -> x
6885 if (isAllOnesConstant(N1))
6886 return N0;
6887
6888 // if (and x, c) is known to be zero, return 0
6889 unsigned BitWidth = VT.getScalarSizeInBits();
6892 return DAG.getConstant(0, DL, VT);
6893
6894 if (SDValue R = foldAndOrOfSETCC(N, DAG))
6895 return R;
6896
6897 if (SDValue NewSel = foldBinOpIntoSelect(N))
6898 return NewSel;
6899
6900 // reassociate and
6901 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
6902 return RAND;
6903
6904 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
6905 if (SDValue SD =
6906 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
6907 return SD;
6908
6909 // fold (and (or x, C), D) -> D if (C & D) == D
6910 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6911 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6912 };
6913 if (N0.getOpcode() == ISD::OR &&
6914 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6915 return N1;
6916
6917 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6918 SDValue N0Op0 = N0.getOperand(0);
6919 EVT SrcVT = N0Op0.getValueType();
6920 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
6921 APInt Mask = ~N1C->getAPIntValue();
6922 Mask = Mask.trunc(SrcBitWidth);
6923
6924 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6925 if (DAG.MaskedValueIsZero(N0Op0, Mask))
6926 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
6927
6928 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
6929 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
6930 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
6931 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
6932 TLI.isNarrowingProfitable(VT, SrcVT))
6933 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
6934 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
6935 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
6936 }
6937
6938 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
6939 if (ISD::isExtOpcode(N0.getOpcode())) {
6940 unsigned ExtOpc = N0.getOpcode();
6941 SDValue N0Op0 = N0.getOperand(0);
6942 if (N0Op0.getOpcode() == ISD::AND &&
6943 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
6946 N0->hasOneUse() && N0Op0->hasOneUse()) {
6947 SDValue NewMask =
6948 DAG.getNode(ISD::AND, DL, VT, N1,
6949 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
6950 return DAG.getNode(ISD::AND, DL, VT,
6951 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
6952 NewMask);
6953 }
6954 }
6955
6956 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6957 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6958 // already be zero by virtue of the width of the base type of the load.
6959 //
6960 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6961 // more cases.
6962 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6964 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6965 N0.getOperand(0).getResNo() == 0) ||
6966 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6967 auto *Load =
6968 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
6969
6970 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6971 // This can be a pure constant or a vector splat, in which case we treat the
6972 // vector as a scalar and use the splat value.
6975 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
6976 Constant = C->getAPIntValue();
6977 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6978 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6979 APInt SplatValue, SplatUndef;
6980 unsigned SplatBitSize;
6981 bool HasAnyUndefs;
6982 // Endianness should not matter here. Code below makes sure that we only
6983 // use the result if the SplatBitSize is a multiple of the vector element
6984 // size. And after that we AND all element sized parts of the splat
6985 // together. So the end result should be the same regardless of in which
6986 // order we do those operations.
6987 const bool IsBigEndian = false;
6988 bool IsSplat =
6989 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
6990 HasAnyUndefs, EltBitWidth, IsBigEndian);
6991
6992 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6993 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6994 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
6995 // Undef bits can contribute to a possible optimisation if set, so
6996 // set them.
6997 SplatValue |= SplatUndef;
6998
6999 // The splat value may be something like "0x00FFFFFF", which means 0 for
7000 // the first vector value and FF for the rest, repeating. We need a mask
7001 // that will apply equally to all members of the vector, so AND all the
7002 // lanes of the constant together.
7003 Constant = APInt::getAllOnes(EltBitWidth);
7004 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7005 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7006 }
7007 }
7008
7009 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7010 // actually legal and isn't going to get expanded, else this is a false
7011 // optimisation.
7012 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7013 Load->getValueType(0),
7014 Load->getMemoryVT());
7015
7016 // Resize the constant to the same size as the original memory access before
7017 // extension. If it is still the AllOnesValue then this AND is completely
7018 // unneeded.
7019 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7020
7021 bool B;
7022 switch (Load->getExtensionType()) {
7023 default: B = false; break;
7024 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7025 case ISD::ZEXTLOAD:
7026 case ISD::NON_EXTLOAD: B = true; break;
7027 }
7028
7029 if (B && Constant.isAllOnes()) {
7030 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7031 // preserve semantics once we get rid of the AND.
7032 SDValue NewLoad(Load, 0);
7033
7034 // Fold the AND away. NewLoad may get replaced immediately.
7035 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7036
7037 if (Load->getExtensionType() == ISD::EXTLOAD) {
7038 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7039 Load->getValueType(0), SDLoc(Load),
7040 Load->getChain(), Load->getBasePtr(),
7041 Load->getOffset(), Load->getMemoryVT(),
7042 Load->getMemOperand());
7043 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7044 if (Load->getNumValues() == 3) {
7045 // PRE/POST_INC loads have 3 values.
7046 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7047 NewLoad.getValue(2) };
7048 CombineTo(Load, To, 3, true);
7049 } else {
7050 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7051 }
7052 }
7053
7054 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7055 }
7056 }
7057
7058 // Try to convert a constant mask AND into a shuffle clear mask.
7059 if (VT.isVector())
7060 if (SDValue Shuffle = XformToShuffleWithZero(N))
7061 return Shuffle;
7062
7063 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7064 return Combined;
7065
7066 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7068 SDValue Ext = N0.getOperand(0);
7069 EVT ExtVT = Ext->getValueType(0);
7070 SDValue Extendee = Ext->getOperand(0);
7071
7072 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7073 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7074 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7075 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7076 // => (extract_subvector (iN_zeroext v))
7077 SDValue ZeroExtExtendee =
7078 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7079
7080 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7081 N0.getOperand(1));
7082 }
7083 }
7084
7085 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7086 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7087 EVT MemVT = GN0->getMemoryVT();
7088 EVT ScalarVT = MemVT.getScalarType();
7089
7090 if (SDValue(GN0, 0).hasOneUse() &&
7091 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7093 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7094 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7095
7096 SDValue ZExtLoad = DAG.getMaskedGather(
7097 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7098 GN0->getIndexType(), ISD::ZEXTLOAD);
7099
7100 CombineTo(N, ZExtLoad);
7101 AddToWorklist(ZExtLoad.getNode());
7102 // Avoid recheck of N.
7103 return SDValue(N, 0);
7104 }
7105 }
7106
7107 // fold (and (load x), 255) -> (zextload x, i8)
7108 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7109 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7110 if (SDValue Res = reduceLoadWidth(N))
7111 return Res;
7112
7113 if (LegalTypes) {
7114 // Attempt to propagate the AND back up to the leaves which, if they're
7115 // loads, can be combined to narrow loads and the AND node can be removed.
7116 // Perform after legalization so that extend nodes will already be
7117 // combined into the loads.
7118 if (BackwardsPropagateMask(N))
7119 return SDValue(N, 0);
7120 }
7121
7122 if (SDValue Combined = visitANDLike(N0, N1, N))
7123 return Combined;
7124
7125 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7126 if (N0.getOpcode() == N1.getOpcode())
7127 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7128 return V;
7129
7130 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7131 return R;
7132 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7133 return R;
7134
7135 // Masking the negated extension of a boolean is just the zero-extended
7136 // boolean:
7137 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7138 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7139 //
7140 // Note: the SimplifyDemandedBits fold below can make an information-losing
7141 // transform, and then we have no way to find this better fold.
7142 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7143 if (isNullOrNullSplat(N0.getOperand(0))) {
7144 SDValue SubRHS = N0.getOperand(1);
7145 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7146 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7147 return SubRHS;
7148 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7149 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7150 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7151 }
7152 }
7153
7154 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7155 // fold (and (sra)) -> (and (srl)) when possible.
7157 return SDValue(N, 0);
7158
7159 // fold (zext_inreg (extload x)) -> (zextload x)
7160 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7161 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7162 (ISD::isEXTLoad(N0.getNode()) ||
7163 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7164 auto *LN0 = cast<LoadSDNode>(N0);
7165 EVT MemVT = LN0->getMemoryVT();
7166 // If we zero all the possible extended bits, then we can turn this into
7167 // a zextload if we are running before legalize or the operation is legal.
7168 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7169 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7170 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7171 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7172 ((!LegalOperations && LN0->isSimple()) ||
7173 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7174 SDValue ExtLoad =
7175 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7176 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7177 AddToWorklist(N);
7178 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7179 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7180 }
7181 }
7182
7183 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7184 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7185 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7186 N0.getOperand(1), false))
7187 return BSwap;
7188 }
7189
7190 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7191 return Shifts;
7192
7193 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7194 return V;
7195
7196 // Recognize the following pattern:
7197 //
7198 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7199 //
7200 // where bitmask is a mask that clears the upper bits of AndVT. The
7201 // number of bits in bitmask must be a power of two.
7202 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7203 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7204 return false;
7205
7206 auto *C = dyn_cast<ConstantSDNode>(RHS);
7207 if (!C)
7208 return false;
7209
7210 if (!C->getAPIntValue().isMask(
7211 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7212 return false;
7213
7214 return true;
7215 };
7216
7217 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7218 if (IsAndZeroExtMask(N0, N1))
7219 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7220
7221 if (hasOperation(ISD::USUBSAT, VT))
7222 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7223 return V;
7224
7225 // Postpone until legalization completed to avoid interference with bswap
7226 // folding
7227 if (LegalOperations || VT.isVector())
7228 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7229 return R;
7230
7231 return SDValue();
7232}
7233
7234/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7235SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7236 bool DemandHighBits) {
7237 if (!LegalOperations)
7238 return SDValue();
7239
7240 EVT VT = N->getValueType(0);
7241 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7242 return SDValue();
7244 return SDValue();
7245
7246 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7247 bool LookPassAnd0 = false;
7248 bool LookPassAnd1 = false;
7249 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7250 std::swap(N0, N1);
7251 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7252 std::swap(N0, N1);
7253 if (N0.getOpcode() == ISD::AND) {
7254 if (!N0->hasOneUse())
7255 return SDValue();
7256 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7257 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7258 // This is needed for X86.
7259 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7260 N01C->getZExtValue() != 0xFFFF))
7261 return SDValue();
7262 N0 = N0.getOperand(0);
7263 LookPassAnd0 = true;
7264 }
7265
7266 if (N1.getOpcode() == ISD::AND) {
7267 if (!N1->hasOneUse())
7268 return SDValue();
7269 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7270 if (!N11C || N11C->getZExtValue() != 0xFF)
7271 return SDValue();
7272 N1 = N1.getOperand(0);
7273 LookPassAnd1 = true;
7274 }
7275
7276 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7277 std::swap(N0, N1);
7278 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7279 return SDValue();
7280 if (!N0->hasOneUse() || !N1->hasOneUse())
7281 return SDValue();
7282
7283 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7284 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7285 if (!N01C || !N11C)
7286 return SDValue();
7287 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7288 return SDValue();
7289
7290 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7291 SDValue N00 = N0->getOperand(0);
7292 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7293 if (!N00->hasOneUse())
7294 return SDValue();
7295 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7296 if (!N001C || N001C->getZExtValue() != 0xFF)
7297 return SDValue();
7298 N00 = N00.getOperand(0);
7299 LookPassAnd0 = true;
7300 }
7301
7302 SDValue N10 = N1->getOperand(0);
7303 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7304 if (!N10->hasOneUse())
7305 return SDValue();
7306 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7307 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7308 // for X86.
7309 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7310 N101C->getZExtValue() != 0xFFFF))
7311 return SDValue();
7312 N10 = N10.getOperand(0);
7313 LookPassAnd1 = true;
7314 }
7315
7316 if (N00 != N10)
7317 return SDValue();
7318
7319 // Make sure everything beyond the low halfword gets set to zero since the SRL
7320 // 16 will clear the top bits.
7321 unsigned OpSizeInBits = VT.getSizeInBits();
7322 if (OpSizeInBits > 16) {
7323 // If the left-shift isn't masked out then the only way this is a bswap is
7324 // if all bits beyond the low 8 are 0. In that case the entire pattern
7325 // reduces to a left shift anyway: leave it for other parts of the combiner.
7326 if (DemandHighBits && !LookPassAnd0)
7327 return SDValue();
7328
7329 // However, if the right shift isn't masked out then it might be because
7330 // it's not needed. See if we can spot that too. If the high bits aren't
7331 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7332 // upper bits to be zero.
7333 if (!LookPassAnd1) {
7334 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7335 if (!DAG.MaskedValueIsZero(N10,
7336 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7337 return SDValue();
7338 }
7339 }
7340
7341 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7342 if (OpSizeInBits > 16) {
7343 SDLoc DL(N);
7344 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7345 DAG.getConstant(OpSizeInBits - 16, DL,
7346 getShiftAmountTy(VT)));
7347 }
7348 return Res;
7349}
7350
7351/// Return true if the specified node is an element that makes up a 32-bit
7352/// packed halfword byteswap.
7353/// ((x & 0x000000ff) << 8) |
7354/// ((x & 0x0000ff00) >> 8) |
7355/// ((x & 0x00ff0000) << 8) |
7356/// ((x & 0xff000000) >> 8)
7358 if (!N->hasOneUse())
7359 return false;
7360
7361 unsigned Opc = N.getOpcode();
7362 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7363 return false;
7364
7365 SDValue N0 = N.getOperand(0);
7366 unsigned Opc0 = N0.getOpcode();
7367 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7368 return false;
7369
7370 ConstantSDNode *N1C = nullptr;
7371 // SHL or SRL: look upstream for AND mask operand
7372 if (Opc == ISD::AND)
7373 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7374 else if (Opc0 == ISD::AND)
7375 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7376 if (!N1C)
7377 return false;
7378
7379 unsigned MaskByteOffset;
7380 switch (N1C->getZExtValue()) {
7381 default:
7382 return false;
7383 case 0xFF: MaskByteOffset = 0; break;
7384 case 0xFF00: MaskByteOffset = 1; break;
7385 case 0xFFFF:
7386 // In case demanded bits didn't clear the bits that will be shifted out.
7387 // This is needed for X86.
7388 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7389 MaskByteOffset = 1;
7390 break;
7391 }
7392 return false;
7393 case 0xFF0000: MaskByteOffset = 2; break;
7394 case 0xFF000000: MaskByteOffset = 3; break;
7395 }
7396
7397 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7398 if (Opc == ISD::AND) {
7399 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7400 // (x >> 8) & 0xff
7401 // (x >> 8) & 0xff0000
7402 if (Opc0 != ISD::SRL)
7403 return false;
7404 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7405 if (!C || C->getZExtValue() != 8)
7406 return false;
7407 } else {
7408 // (x << 8) & 0xff00
7409 // (x << 8) & 0xff000000
7410 if (Opc0 != ISD::SHL)
7411 return false;
7412 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7413 if (!C || C->getZExtValue() != 8)
7414 return false;
7415 }
7416 } else if (Opc == ISD::SHL) {
7417 // (x & 0xff) << 8
7418 // (x & 0xff0000) << 8
7419 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7420 return false;
7421 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7422 if (!C || C->getZExtValue() != 8)
7423 return false;
7424 } else { // Opc == ISD::SRL
7425 // (x & 0xff00) >> 8
7426 // (x & 0xff000000) >> 8
7427 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7428 return false;
7429 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7430 if (!C || C->getZExtValue() != 8)
7431 return false;
7432 }
7433
7434 if (Parts[MaskByteOffset])
7435 return false;
7436
7437 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7438 return true;
7439}
7440
7441// Match 2 elements of a packed halfword bswap.
7443 if (N.getOpcode() == ISD::OR)
7444 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7445 isBSwapHWordElement(N.getOperand(1), Parts);
7446
7447 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7448 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7449 if (!C || C->getAPIntValue() != 16)
7450 return false;
7451 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7452 return true;
7453 }
7454
7455 return false;
7456}
7457
7458// Match this pattern:
7459// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7460// And rewrite this to:
7461// (rotr (bswap A), 16)
7463 SelectionDAG &DAG, SDNode *N, SDValue N0,
7464 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7465 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7466 "MatchBSwapHWordOrAndAnd: expecting i32");
7467 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7468 return SDValue();
7469 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7470 return SDValue();
7471 // TODO: this is too restrictive; lifting this restriction requires more tests
7472 if (!N0->hasOneUse() || !N1->hasOneUse())
7473 return SDValue();
7476 if (!Mask0 || !Mask1)
7477 return SDValue();
7478 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7479 Mask1->getAPIntValue() != 0x00ff00ff)
7480 return SDValue();
7481 SDValue Shift0 = N0.getOperand(0);
7482 SDValue Shift1 = N1.getOperand(0);
7483 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7484 return SDValue();
7485 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7486 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7487 if (!ShiftAmt0 || !ShiftAmt1)
7488 return SDValue();
7489 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7490 return SDValue();
7491 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7492 return SDValue();
7493
7494 SDLoc DL(N);
7495 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7496 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7497 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7498}
7499
7500/// Match a 32-bit packed halfword bswap. That is
7501/// ((x & 0x000000ff) << 8) |
7502/// ((x & 0x0000ff00) >> 8) |
7503/// ((x & 0x00ff0000) << 8) |
7504/// ((x & 0xff000000) >> 8)
7505/// => (rotl (bswap x), 16)
7506SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7507 if (!LegalOperations)
7508 return SDValue();
7509
7510 EVT VT = N->getValueType(0);
7511 if (VT != MVT::i32)
7512 return SDValue();
7514 return SDValue();
7515
7516 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7517 getShiftAmountTy(VT)))
7518 return BSwap;
7519
7520 // Try again with commuted operands.
7521 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7522 getShiftAmountTy(VT)))
7523 return BSwap;
7524
7525
7526 // Look for either
7527 // (or (bswaphpair), (bswaphpair))
7528 // (or (or (bswaphpair), (and)), (and))
7529 // (or (or (and), (bswaphpair)), (and))
7530 SDNode *Parts[4] = {};
7531
7532 if (isBSwapHWordPair(N0, Parts)) {
7533 // (or (or (and), (and)), (or (and), (and)))
7534 if (!isBSwapHWordPair(N1, Parts))
7535 return SDValue();
7536 } else if (N0.getOpcode() == ISD::OR) {
7537 // (or (or (or (and), (and)), (and)), (and))
7538 if (!isBSwapHWordElement(N1, Parts))
7539 return SDValue();
7540 SDValue N00 = N0.getOperand(0);
7541 SDValue N01 = N0.getOperand(1);
7542 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7543 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7544 return SDValue();
7545 } else {
7546 return SDValue();
7547 }
7548
7549 // Make sure the parts are all coming from the same node.
7550 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7551 return SDValue();
7552
7553 SDLoc DL(N);
7554 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7555 SDValue(Parts[0], 0));
7556
7557 // Result of the bswap should be rotated by 16. If it's not legal, then
7558 // do (x << 16) | (x >> 16).
7559 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7561 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7563 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7564 return DAG.getNode(ISD::OR, DL, VT,
7565 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7566 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7567}
7568
7569/// This contains all DAGCombine rules which reduce two values combined by
7570/// an Or operation to a single value \see visitANDLike().
7571SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7572 EVT VT = N1.getValueType();
7573
7574 // fold (or x, undef) -> -1
7575 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7576 return DAG.getAllOnesConstant(DL, VT);
7577
7578 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7579 return V;
7580
7581 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7582 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7583 // Don't increase # computations.
7584 (N0->hasOneUse() || N1->hasOneUse())) {
7585 // We can only do this xform if we know that bits from X that are set in C2
7586 // but not in C1 are already zero. Likewise for Y.
7587 if (const ConstantSDNode *N0O1C =
7589 if (const ConstantSDNode *N1O1C =
7591 // We can only do this xform if we know that bits from X that are set in
7592 // C2 but not in C1 are already zero. Likewise for Y.
7593 const APInt &LHSMask = N0O1C->getAPIntValue();
7594 const APInt &RHSMask = N1O1C->getAPIntValue();
7595
7596 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7597 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7598 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7599 N0.getOperand(0), N1.getOperand(0));
7600 return DAG.getNode(ISD::AND, DL, VT, X,
7601 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7602 }
7603 }
7604 }
7605 }
7606
7607 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7608 if (N0.getOpcode() == ISD::AND &&
7609 N1.getOpcode() == ISD::AND &&
7610 N0.getOperand(0) == N1.getOperand(0) &&
7611 // Don't increase # computations.
7612 (N0->hasOneUse() || N1->hasOneUse())) {
7613 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7614 N0.getOperand(1), N1.getOperand(1));
7615 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7616 }
7617
7618 return SDValue();
7619}
7620
7621/// OR combines for which the commuted variant will be tried as well.
7623 SDNode *N) {
7624 EVT VT = N0.getValueType();
7625 unsigned BW = VT.getScalarSizeInBits();
7626 SDLoc DL(N);
7627
7628 auto peekThroughResize = [](SDValue V) {
7629 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7630 return V->getOperand(0);
7631 return V;
7632 };
7633
7634 SDValue N0Resized = peekThroughResize(N0);
7635 if (N0Resized.getOpcode() == ISD::AND) {
7636 SDValue N1Resized = peekThroughResize(N1);
7637 SDValue N00 = N0Resized.getOperand(0);
7638 SDValue N01 = N0Resized.getOperand(1);
7639
7640 // fold or (and x, y), x --> x
7641 if (N00 == N1Resized || N01 == N1Resized)
7642 return N1;
7643
7644 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7645 // TODO: Set AllowUndefs = true.
7646 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7647 /* AllowUndefs */ false)) {
7648 if (peekThroughResize(NotOperand) == N1Resized)
7649 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7650 N1);
7651 }
7652
7653 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7654 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7655 /* AllowUndefs */ false)) {
7656 if (peekThroughResize(NotOperand) == N1Resized)
7657 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7658 N1);
7659 }
7660 }
7661
7662 SDValue X, Y;
7663
7664 // fold or (xor X, N1), N1 --> or X, N1
7665 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7666 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7667
7668 // fold or (xor x, y), (x and/or y) --> or x, y
7669 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7670 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7672 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7673
7674 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7675 return R;
7676
7677 auto peekThroughZext = [](SDValue V) {
7678 if (V->getOpcode() == ISD::ZERO_EXTEND)
7679 return V->getOperand(0);
7680 return V;
7681 };
7682
7683 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7684 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7685 N0.getOperand(0) == N1.getOperand(0) &&
7686 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7687 return N0;
7688
7689 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7690 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7691 N0.getOperand(1) == N1.getOperand(0) &&
7692 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7693 return N0;
7694
7695 // Attempt to match a legalized build_pair-esque pattern:
7696 // or(shl(aext(Hi),BW/2),zext(Lo))
7697 SDValue Lo, Hi;
7698 if (sd_match(N0,
7700 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7701 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7702 Lo.getValueType() == Hi.getValueType()) {
7703 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7704 SDValue NotLo, NotHi;
7705 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7706 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7707 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7708 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7709 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7710 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7711 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7712 }
7713 }
7714
7715 return SDValue();
7716}
7717
7718SDValue DAGCombiner::visitOR(SDNode *N) {
7719 SDValue N0 = N->getOperand(0);
7720 SDValue N1 = N->getOperand(1);
7721 EVT VT = N1.getValueType();
7722 SDLoc DL(N);
7723
7724 // x | x --> x
7725 if (N0 == N1)
7726 return N0;
7727
7728 // fold (or c1, c2) -> c1|c2
7729 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7730 return C;
7731
7732 // canonicalize constant to RHS
7735 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7736
7737 // fold vector ops
7738 if (VT.isVector()) {
7739 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7740 return FoldedVOp;
7741
7742 // fold (or x, 0) -> x, vector edition
7744 return N0;
7745
7746 // fold (or x, -1) -> -1, vector edition
7748 // do not return N1, because undef node may exist in N1
7749 return DAG.getAllOnesConstant(DL, N1.getValueType());
7750
7751 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7752 // Do this only if the resulting type / shuffle is legal.
7753 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7754 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7755 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7756 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7757 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7758 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7759 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7760 // Ensure both shuffles have a zero input.
7761 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7762 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7763 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7764 bool CanFold = true;
7765 int NumElts = VT.getVectorNumElements();
7766 SmallVector<int, 4> Mask(NumElts, -1);
7767
7768 for (int i = 0; i != NumElts; ++i) {
7769 int M0 = SV0->getMaskElt(i);
7770 int M1 = SV1->getMaskElt(i);
7771
7772 // Determine if either index is pointing to a zero vector.
7773 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7774 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7775
7776 // If one element is zero and the otherside is undef, keep undef.
7777 // This also handles the case that both are undef.
7778 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7779 continue;
7780
7781 // Make sure only one of the elements is zero.
7782 if (M0Zero == M1Zero) {
7783 CanFold = false;
7784 break;
7785 }
7786
7787 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7788
7789 // We have a zero and non-zero element. If the non-zero came from
7790 // SV0 make the index a LHS index. If it came from SV1, make it
7791 // a RHS index. We need to mod by NumElts because we don't care
7792 // which operand it came from in the original shuffles.
7793 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7794 }
7795
7796 if (CanFold) {
7797 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7798 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7799 SDValue LegalShuffle =
7800 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7801 if (LegalShuffle)
7802 return LegalShuffle;
7803 }
7804 }
7805 }
7806 }
7807
7808 // fold (or x, 0) -> x
7809 if (isNullConstant(N1))
7810 return N0;
7811
7812 // fold (or x, -1) -> -1
7813 if (isAllOnesConstant(N1))
7814 return N1;
7815
7816 if (SDValue NewSel = foldBinOpIntoSelect(N))
7817 return NewSel;
7818
7819 // fold (or x, c) -> c iff (x & ~c) == 0
7820 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7821 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7822 return N1;
7823
7824 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7825 return R;
7826
7827 if (SDValue Combined = visitORLike(N0, N1, DL))
7828 return Combined;
7829
7830 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7831 return Combined;
7832
7833 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7834 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7835 return BSwap;
7836 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7837 return BSwap;
7838
7839 // reassociate or
7840 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7841 return ROR;
7842
7843 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7844 if (SDValue SD =
7845 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7846 return SD;
7847
7848 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7849 // iff (c1 & c2) != 0 or c1/c2 are undef.
7850 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7851 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7852 };
7853 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7854 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7855 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7856 {N1, N0.getOperand(1)})) {
7857 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7858 AddToWorklist(IOR.getNode());
7859 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
7860 }
7861 }
7862
7863 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7864 return Combined;
7865 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7866 return Combined;
7867
7868 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7869 if (N0.getOpcode() == N1.getOpcode())
7870 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7871 return V;
7872
7873 // See if this is some rotate idiom.
7874 if (SDValue Rot = MatchRotate(N0, N1, DL))
7875 return Rot;
7876
7877 if (SDValue Load = MatchLoadCombine(N))
7878 return Load;
7879
7880 // Simplify the operands using demanded-bits information.
7882 return SDValue(N, 0);
7883
7884 // If OR can be rewritten into ADD, try combines based on ADD.
7885 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7886 DAG.isADDLike(SDValue(N, 0)))
7887 if (SDValue Combined = visitADDLike(N))
7888 return Combined;
7889
7890 // Postpone until legalization completed to avoid interference with bswap
7891 // folding
7892 if (LegalOperations || VT.isVector())
7893 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7894 return R;
7895
7896 return SDValue();
7897}
7898
7900 SDValue &Mask) {
7901 if (Op.getOpcode() == ISD::AND &&
7902 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7903 Mask = Op.getOperand(1);
7904 return Op.getOperand(0);
7905 }
7906 return Op;
7907}
7908
7909/// Match "(X shl/srl V1) & V2" where V2 may not be present.
7910static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7911 SDValue &Mask) {
7912 Op = stripConstantMask(DAG, Op, Mask);
7913 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7914 Shift = Op;
7915 return true;
7916 }
7917 return false;
7918}
7919
7920/// Helper function for visitOR to extract the needed side of a rotate idiom
7921/// from a shl/srl/mul/udiv. This is meant to handle cases where
7922/// InstCombine merged some outside op with one of the shifts from
7923/// the rotate pattern.
7924/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7925/// Otherwise, returns an expansion of \p ExtractFrom based on the following
7926/// patterns:
7927///
7928/// (or (add v v) (shrl v bitwidth-1)):
7929/// expands (add v v) -> (shl v 1)
7930///
7931/// (or (mul v c0) (shrl (mul v c1) c2)):
7932/// expands (mul v c0) -> (shl (mul v c1) c3)
7933///
7934/// (or (udiv v c0) (shl (udiv v c1) c2)):
7935/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
7936///
7937/// (or (shl v c0) (shrl (shl v c1) c2)):
7938/// expands (shl v c0) -> (shl (shl v c1) c3)
7939///
7940/// (or (shrl v c0) (shl (shrl v c1) c2)):
7941/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
7942///
7943/// Such that in all cases, c3+c2==bitwidth(op v c1).
7945 SDValue ExtractFrom, SDValue &Mask,
7946 const SDLoc &DL) {
7947 assert(OppShift && ExtractFrom && "Empty SDValue");
7948 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
7949 return SDValue();
7950
7951 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7952
7953 // Value and Type of the shift.
7954 SDValue OppShiftLHS = OppShift.getOperand(0);
7955 EVT ShiftedVT = OppShiftLHS.getValueType();
7956
7957 // Amount of the existing shift.
7958 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7959
7960 // (add v v) -> (shl v 1)
7961 // TODO: Should this be a general DAG canonicalization?
7962 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7963 ExtractFrom.getOpcode() == ISD::ADD &&
7964 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7965 ExtractFrom.getOperand(0) == OppShiftLHS &&
7966 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7967 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7968 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7969
7970 // Preconditions:
7971 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7972 //
7973 // Find opcode of the needed shift to be extracted from (op0 v c0).
7974 unsigned Opcode = ISD::DELETED_NODE;
7975 bool IsMulOrDiv = false;
7976 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7977 // opcode or its arithmetic (mul or udiv) variant.
7978 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7979 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7980 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7981 return false;
7982 Opcode = NeededShift;
7983 return true;
7984 };
7985 // op0 must be either the needed shift opcode or the mul/udiv equivalent
7986 // that the needed shift can be extracted from.
7987 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7988 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7989 return SDValue();
7990
7991 // op0 must be the same opcode on both sides, have the same LHS argument,
7992 // and produce the same value type.
7993 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7994 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7995 ShiftedVT != ExtractFrom.getValueType())
7996 return SDValue();
7997
7998 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7999 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8000 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8001 ConstantSDNode *ExtractFromCst =
8002 isConstOrConstSplat(ExtractFrom.getOperand(1));
8003 // TODO: We should be able to handle non-uniform constant vectors for these values
8004 // Check that we have constant values.
8005 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8006 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8007 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8008 return SDValue();
8009
8010 // Compute the shift amount we need to extract to complete the rotate.
8011 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8012 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8013 return SDValue();
8014 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8015 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8016 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8017 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8018 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8019
8020 // Now try extract the needed shift from the ExtractFrom op and see if the
8021 // result matches up with the existing shift's LHS op.
8022 if (IsMulOrDiv) {
8023 // Op to extract from is a mul or udiv by a constant.
8024 // Check:
8025 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8026 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8027 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8028 NeededShiftAmt.getZExtValue());
8029 APInt ResultAmt;
8030 APInt Rem;
8031 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8032 if (Rem != 0 || ResultAmt != OppLHSAmt)
8033 return SDValue();
8034 } else {
8035 // Op to extract from is a shift by a constant.
8036 // Check:
8037 // c2 - (bitwidth(op0 v c0) - c1) == c0
8038 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8039 ExtractFromAmt.getBitWidth()))
8040 return SDValue();
8041 }
8042
8043 // Return the expanded shift op that should allow a rotate to be formed.
8044 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8045 EVT ResVT = ExtractFrom.getValueType();
8046 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8047 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8048}
8049
8050// Return true if we can prove that, whenever Neg and Pos are both in the
8051// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8052// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8053//
8054// (or (shift1 X, Neg), (shift2 X, Pos))
8055//
8056// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8057// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8058// to consider shift amounts with defined behavior.
8059//
8060// The IsRotate flag should be set when the LHS of both shifts is the same.
8061// Otherwise if matching a general funnel shift, it should be clear.
8062static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8063 SelectionDAG &DAG, bool IsRotate) {
8064 const auto &TLI = DAG.getTargetLoweringInfo();
8065 // If EltSize is a power of 2 then:
8066 //
8067 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8068 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8069 //
8070 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8071 // for the stronger condition:
8072 //
8073 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8074 //
8075 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8076 // we can just replace Neg with Neg' for the rest of the function.
8077 //
8078 // In other cases we check for the even stronger condition:
8079 //
8080 // Neg == EltSize - Pos [B]
8081 //
8082 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8083 // behavior if Pos == 0 (and consequently Neg == EltSize).
8084 //
8085 // We could actually use [A] whenever EltSize is a power of 2, but the
8086 // only extra cases that it would match are those uninteresting ones
8087 // where Neg and Pos are never in range at the same time. E.g. for
8088 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8089 // as well as (sub 32, Pos), but:
8090 //
8091 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8092 //
8093 // always invokes undefined behavior for 32-bit X.
8094 //
8095 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8096 // This allows us to peek through any operations that only affect Mask's
8097 // un-demanded bits.
8098 //
8099 // NOTE: We can only do this when matching operations which won't modify the
8100 // least Log2(EltSize) significant bits and not a general funnel shift.
8101 unsigned MaskLoBits = 0;
8102 if (IsRotate && isPowerOf2_64(EltSize)) {
8103 unsigned Bits = Log2_64(EltSize);
8104 unsigned NegBits = Neg.getScalarValueSizeInBits();
8105 if (NegBits >= Bits) {
8106 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8107 if (SDValue Inner =
8109 Neg = Inner;
8110 MaskLoBits = Bits;
8111 }
8112 }
8113 }
8114
8115 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8116 if (Neg.getOpcode() != ISD::SUB)
8117 return false;
8119 if (!NegC)
8120 return false;
8121 SDValue NegOp1 = Neg.getOperand(1);
8122
8123 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8124 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8125 // are redundant for the purpose of the equality.
8126 if (MaskLoBits) {
8127 unsigned PosBits = Pos.getScalarValueSizeInBits();
8128 if (PosBits >= MaskLoBits) {
8129 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8130 if (SDValue Inner =
8132 Pos = Inner;
8133 }
8134 }
8135 }
8136
8137 // The condition we need is now:
8138 //
8139 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8140 //
8141 // If NegOp1 == Pos then we need:
8142 //
8143 // EltSize & Mask == NegC & Mask
8144 //
8145 // (because "x & Mask" is a truncation and distributes through subtraction).
8146 //
8147 // We also need to account for a potential truncation of NegOp1 if the amount
8148 // has already been legalized to a shift amount type.
8149 APInt Width;
8150 if ((Pos == NegOp1) ||
8151 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8152 Width = NegC->getAPIntValue();
8153
8154 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8155 // Then the condition we want to prove becomes:
8156 //
8157 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8158 //
8159 // which, again because "x & Mask" is a truncation, becomes:
8160 //
8161 // NegC & Mask == (EltSize - PosC) & Mask
8162 // EltSize & Mask == (NegC + PosC) & Mask
8163 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8164 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8165 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8166 else
8167 return false;
8168 } else
8169 return false;
8170
8171 // Now we just need to check that EltSize & Mask == Width & Mask.
8172 if (MaskLoBits)
8173 // EltSize & Mask is 0 since Mask is EltSize - 1.
8174 return Width.getLoBits(MaskLoBits) == 0;
8175 return Width == EltSize;
8176}
8177
8178// A subroutine of MatchRotate used once we have found an OR of two opposite
8179// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8180// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8181// former being preferred if supported. InnerPos and InnerNeg are Pos and
8182// Neg with outer conversions stripped away.
8183SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8184 SDValue Neg, SDValue InnerPos,
8185 SDValue InnerNeg, bool HasPos,
8186 unsigned PosOpcode, unsigned NegOpcode,
8187 const SDLoc &DL) {
8188 // fold (or (shl x, (*ext y)),
8189 // (srl x, (*ext (sub 32, y)))) ->
8190 // (rotl x, y) or (rotr x, (sub 32, y))
8191 //
8192 // fold (or (shl x, (*ext (sub 32, y))),
8193 // (srl x, (*ext y))) ->
8194 // (rotr x, y) or (rotl x, (sub 32, y))
8195 EVT VT = Shifted.getValueType();
8196 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8197 /*IsRotate*/ true)) {
8198 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8199 HasPos ? Pos : Neg);
8200 }
8201
8202 return SDValue();
8203}
8204
8205// A subroutine of MatchRotate used once we have found an OR of two opposite
8206// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8207// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8208// former being preferred if supported. InnerPos and InnerNeg are Pos and
8209// Neg with outer conversions stripped away.
8210// TODO: Merge with MatchRotatePosNeg.
8211SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8212 SDValue Neg, SDValue InnerPos,
8213 SDValue InnerNeg, bool HasPos,
8214 unsigned PosOpcode, unsigned NegOpcode,
8215 const SDLoc &DL) {
8216 EVT VT = N0.getValueType();
8217 unsigned EltBits = VT.getScalarSizeInBits();
8218
8219 // fold (or (shl x0, (*ext y)),
8220 // (srl x1, (*ext (sub 32, y)))) ->
8221 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8222 //
8223 // fold (or (shl x0, (*ext (sub 32, y))),
8224 // (srl x1, (*ext y))) ->
8225 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8226 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8227 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8228 HasPos ? Pos : Neg);
8229 }
8230
8231 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8232 // so for now just use the PosOpcode case if its legal.
8233 // TODO: When can we use the NegOpcode case?
8234 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8235 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8236 if (Op.getOpcode() != BinOpc)
8237 return false;
8238 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8239 return Cst && (Cst->getAPIntValue() == Imm);
8240 };
8241
8242 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8243 // -> (fshl x0, x1, y)
8244 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8245 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8246 InnerPos == InnerNeg.getOperand(0) &&
8248 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8249 }
8250
8251 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8252 // -> (fshr x0, x1, y)
8253 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8254 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8255 InnerNeg == InnerPos.getOperand(0) &&
8257 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8258 }
8259
8260 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8261 // -> (fshr x0, x1, y)
8262 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8263 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8264 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8265 InnerNeg == InnerPos.getOperand(0) &&
8267 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8268 }
8269 }
8270
8271 return SDValue();
8272}
8273
8274// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8275// idioms for rotate, and if the target supports rotation instructions, generate
8276// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8277// with different shifted sources.
8278SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8279 EVT VT = LHS.getValueType();
8280
8281 // The target must have at least one rotate/funnel flavor.
8282 // We still try to match rotate by constant pre-legalization.
8283 // TODO: Support pre-legalization funnel-shift by constant.
8284 bool HasROTL = hasOperation(ISD::ROTL, VT);
8285 bool HasROTR = hasOperation(ISD::ROTR, VT);
8286 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8287 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8288
8289 // If the type is going to be promoted and the target has enabled custom
8290 // lowering for rotate, allow matching rotate by non-constants. Only allow
8291 // this for scalar types.
8292 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8296 }
8297
8298 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8299 return SDValue();
8300
8301 // Check for truncated rotate.
8302 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8303 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8304 assert(LHS.getValueType() == RHS.getValueType());
8305 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8306 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8307 }
8308 }
8309
8310 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8311 SDValue LHSShift; // The shift.
8312 SDValue LHSMask; // AND value if any.
8313 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8314
8315 SDValue RHSShift; // The shift.
8316 SDValue RHSMask; // AND value if any.
8317 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8318
8319 // If neither side matched a rotate half, bail
8320 if (!LHSShift && !RHSShift)
8321 return SDValue();
8322
8323 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8324 // side of the rotate, so try to handle that here. In all cases we need to
8325 // pass the matched shift from the opposite side to compute the opcode and
8326 // needed shift amount to extract. We still want to do this if both sides
8327 // matched a rotate half because one half may be a potential overshift that
8328 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8329 // single one).
8330
8331 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8332 if (LHSShift)
8333 if (SDValue NewRHSShift =
8334 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8335 RHSShift = NewRHSShift;
8336 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8337 if (RHSShift)
8338 if (SDValue NewLHSShift =
8339 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8340 LHSShift = NewLHSShift;
8341
8342 // If a side is still missing, nothing else we can do.
8343 if (!RHSShift || !LHSShift)
8344 return SDValue();
8345
8346 // At this point we've matched or extracted a shift op on each side.
8347
8348 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8349 return SDValue(); // Shifts must disagree.
8350
8351 // Canonicalize shl to left side in a shl/srl pair.
8352 if (RHSShift.getOpcode() == ISD::SHL) {
8353 std::swap(LHS, RHS);
8354 std::swap(LHSShift, RHSShift);
8355 std::swap(LHSMask, RHSMask);
8356 }
8357
8358 // Something has gone wrong - we've lost the shl/srl pair - bail.
8359 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8360 return SDValue();
8361
8362 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8363 SDValue LHSShiftArg = LHSShift.getOperand(0);
8364 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8365 SDValue RHSShiftArg = RHSShift.getOperand(0);
8366 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8367
8368 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8370 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8371 };
8372
8373 auto ApplyMasks = [&](SDValue Res) {
8374 // If there is an AND of either shifted operand, apply it to the result.
8375 if (LHSMask.getNode() || RHSMask.getNode()) {
8378
8379 if (LHSMask.getNode()) {
8380 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8381 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8382 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8383 }
8384 if (RHSMask.getNode()) {
8385 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8386 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8387 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8388 }
8389
8390 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8391 }
8392
8393 return Res;
8394 };
8395
8396 // TODO: Support pre-legalization funnel-shift by constant.
8397 bool IsRotate = LHSShiftArg == RHSShiftArg;
8398 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8399 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8400 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8401 // Look for a disguised rotate by constant.
8402 // The common shifted operand X may be hidden inside another 'or'.
8403 SDValue X, Y;
8404 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8405 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8406 return false;
8407 if (CommonOp == Or.getOperand(0)) {
8408 X = CommonOp;
8409 Y = Or.getOperand(1);
8410 return true;
8411 }
8412 if (CommonOp == Or.getOperand(1)) {
8413 X = CommonOp;
8414 Y = Or.getOperand(0);
8415 return true;
8416 }
8417 return false;
8418 };
8419
8420 SDValue Res;
8421 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8422 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8423 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8424 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8425 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8426 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8427 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8428 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8429 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8430 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8431 } else {
8432 return SDValue();
8433 }
8434
8435 return ApplyMasks(Res);
8436 }
8437
8438 return SDValue(); // Requires funnel shift support.
8439 }
8440
8441 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8442 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8443 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8444 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8445 // iff C1+C2 == EltSizeInBits
8446 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8447 SDValue Res;
8448 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8449 bool UseROTL = !LegalOperations || HasROTL;
8450 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8451 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8452 } else {
8453 bool UseFSHL = !LegalOperations || HasFSHL;
8454 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8455 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8456 }
8457
8458 return ApplyMasks(Res);
8459 }
8460
8461 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8462 // shift.
8463 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8464 return SDValue();
8465
8466 // If there is a mask here, and we have a variable shift, we can't be sure
8467 // that we're masking out the right stuff.
8468 if (LHSMask.getNode() || RHSMask.getNode())
8469 return SDValue();
8470
8471 // If the shift amount is sign/zext/any-extended just peel it off.
8472 SDValue LExtOp0 = LHSShiftAmt;
8473 SDValue RExtOp0 = RHSShiftAmt;
8474 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8475 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8476 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8477 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8478 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8479 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8480 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8481 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8482 LExtOp0 = LHSShiftAmt.getOperand(0);
8483 RExtOp0 = RHSShiftAmt.getOperand(0);
8484 }
8485
8486 if (IsRotate && (HasROTL || HasROTR)) {
8487 SDValue TryL =
8488 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8489 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8490 if (TryL)
8491 return TryL;
8492
8493 SDValue TryR =
8494 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8495 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8496 if (TryR)
8497 return TryR;
8498 }
8499
8500 SDValue TryL =
8501 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8502 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8503 if (TryL)
8504 return TryL;
8505
8506 SDValue TryR =
8507 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8508 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8509 if (TryR)
8510 return TryR;
8511
8512 return SDValue();
8513}
8514
8515/// Recursively traverses the expression calculating the origin of the requested
8516/// byte of the given value. Returns std::nullopt if the provider can't be
8517/// calculated.
8518///
8519/// For all the values except the root of the expression, we verify that the
8520/// value has exactly one use and if not then return std::nullopt. This way if
8521/// the origin of the byte is returned it's guaranteed that the values which
8522/// contribute to the byte are not used outside of this expression.
8523
8524/// However, there is a special case when dealing with vector loads -- we allow
8525/// more than one use if the load is a vector type. Since the values that
8526/// contribute to the byte ultimately come from the ExtractVectorElements of the
8527/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8528/// because those operations are independent from the pattern to be combined.
8529/// For vector loads, we simply care that the ByteProviders are adjacent
8530/// positions of the same vector, and their index matches the byte that is being
8531/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8532/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8533/// byte position we are trying to provide for the LoadCombine. If these do
8534/// not match, then we can not combine the vector loads. \p Index uses the
8535/// byte position we are trying to provide for and is matched against the
8536/// shl and load size. The \p Index algorithm ensures the requested byte is
8537/// provided for by the pattern, and the pattern does not over provide bytes.
8538///
8539///
8540/// The supported LoadCombine pattern for vector loads is as follows
8541/// or
8542/// / \
8543/// or shl
8544/// / \ |
8545/// or shl zext
8546/// / \ | |
8547/// shl zext zext EVE*
8548/// | | | |
8549/// zext EVE* EVE* LOAD
8550/// | | |
8551/// EVE* LOAD LOAD
8552/// |
8553/// LOAD
8554///
8555/// *ExtractVectorElement
8557
8558static std::optional<SDByteProvider>
8560 std::optional<uint64_t> VectorIndex,
8561 unsigned StartingIndex = 0) {
8562
8563 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8564 if (Depth == 10)
8565 return std::nullopt;
8566
8567 // Only allow multiple uses if the instruction is a vector load (in which
8568 // case we will use the load for every ExtractVectorElement)
8569 if (Depth && !Op.hasOneUse() &&
8570 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8571 return std::nullopt;
8572
8573 // Fail to combine if we have encountered anything but a LOAD after handling
8574 // an ExtractVectorElement.
8575 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8576 return std::nullopt;
8577
8578 unsigned BitWidth = Op.getValueSizeInBits();
8579 if (BitWidth % 8 != 0)
8580 return std::nullopt;
8581 unsigned ByteWidth = BitWidth / 8;
8582 assert(Index < ByteWidth && "invalid index requested");
8583 (void) ByteWidth;
8584
8585 switch (Op.getOpcode()) {
8586 case ISD::OR: {
8587 auto LHS =
8588 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8589 if (!LHS)
8590 return std::nullopt;
8591 auto RHS =
8592 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8593 if (!RHS)
8594 return std::nullopt;
8595
8596 if (LHS->isConstantZero())
8597 return RHS;
8598 if (RHS->isConstantZero())
8599 return LHS;
8600 return std::nullopt;
8601 }
8602 case ISD::SHL: {
8603 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8604 if (!ShiftOp)
8605 return std::nullopt;
8606
8607 uint64_t BitShift = ShiftOp->getZExtValue();
8608
8609 if (BitShift % 8 != 0)
8610 return std::nullopt;
8611 uint64_t ByteShift = BitShift / 8;
8612
8613 // If we are shifting by an amount greater than the index we are trying to
8614 // provide, then do not provide anything. Otherwise, subtract the index by
8615 // the amount we shifted by.
8616 return Index < ByteShift
8618 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8619 Depth + 1, VectorIndex, Index);
8620 }
8621 case ISD::ANY_EXTEND:
8622 case ISD::SIGN_EXTEND:
8623 case ISD::ZERO_EXTEND: {
8624 SDValue NarrowOp = Op->getOperand(0);
8625 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8626 if (NarrowBitWidth % 8 != 0)
8627 return std::nullopt;
8628 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8629
8630 if (Index >= NarrowByteWidth)
8631 return Op.getOpcode() == ISD::ZERO_EXTEND
8632 ? std::optional<SDByteProvider>(
8634 : std::nullopt;
8635 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8636 StartingIndex);
8637 }
8638 case ISD::BSWAP:
8639 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8640 Depth + 1, VectorIndex, StartingIndex);
8642 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8643 if (!OffsetOp)
8644 return std::nullopt;
8645
8646 VectorIndex = OffsetOp->getZExtValue();
8647
8648 SDValue NarrowOp = Op->getOperand(0);
8649 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8650 if (NarrowBitWidth % 8 != 0)
8651 return std::nullopt;
8652 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8653 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8654 // type, leaving the high bits undefined.
8655 if (Index >= NarrowByteWidth)
8656 return std::nullopt;
8657
8658 // Check to see if the position of the element in the vector corresponds
8659 // with the byte we are trying to provide for. In the case of a vector of
8660 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8661 // the element will provide a range of bytes. For example, if we have a
8662 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8663 // 3).
8664 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8665 return std::nullopt;
8666 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8667 return std::nullopt;
8668
8669 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8670 VectorIndex, StartingIndex);
8671 }
8672 case ISD::LOAD: {
8673 auto L = cast<LoadSDNode>(Op.getNode());
8674 if (!L->isSimple() || L->isIndexed())
8675 return std::nullopt;
8676
8677 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8678 if (NarrowBitWidth % 8 != 0)
8679 return std::nullopt;
8680 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8681
8682 // If the width of the load does not reach byte we are trying to provide for
8683 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8684 // question
8685 if (Index >= NarrowByteWidth)
8686 return L->getExtensionType() == ISD::ZEXTLOAD
8687 ? std::optional<SDByteProvider>(
8689 : std::nullopt;
8690
8691 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8692 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8693 }
8694 }
8695
8696 return std::nullopt;
8697}
8698
8699static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8700 return i;
8701}
8702
8703static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8704 return BW - i - 1;
8705}
8706
8707// Check if the bytes offsets we are looking at match with either big or
8708// little endian value loaded. Return true for big endian, false for little
8709// endian, and std::nullopt if match failed.
8710static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8711 int64_t FirstOffset) {
8712 // The endian can be decided only when it is 2 bytes at least.
8713 unsigned Width = ByteOffsets.size();
8714 if (Width < 2)
8715 return std::nullopt;
8716
8717 bool BigEndian = true, LittleEndian = true;
8718 for (unsigned i = 0; i < Width; i++) {
8719 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8720 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8721 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8722 if (!BigEndian && !LittleEndian)
8723 return std::nullopt;
8724 }
8725
8726 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8727 "little endian");
8728 return BigEndian;
8729}
8730
8732 switch (Value.getOpcode()) {
8733 case ISD::TRUNCATE:
8734 case ISD::ZERO_EXTEND:
8735 case ISD::SIGN_EXTEND:
8736 case ISD::ANY_EXTEND:
8737 return stripTruncAndExt(Value.getOperand(0));
8738 }
8739 return Value;
8740}
8741
8742/// Match a pattern where a wide type scalar value is stored by several narrow
8743/// stores. Fold it into a single store or a BSWAP and a store if the targets
8744/// supports it.
8745///
8746/// Assuming little endian target:
8747/// i8 *p = ...
8748/// i32 val = ...
8749/// p[0] = (val >> 0) & 0xFF;
8750/// p[1] = (val >> 8) & 0xFF;
8751/// p[2] = (val >> 16) & 0xFF;
8752/// p[3] = (val >> 24) & 0xFF;
8753/// =>
8754/// *((i32)p) = val;
8755///
8756/// i8 *p = ...
8757/// i32 val = ...
8758/// p[0] = (val >> 24) & 0xFF;
8759/// p[1] = (val >> 16) & 0xFF;
8760/// p[2] = (val >> 8) & 0xFF;
8761/// p[3] = (val >> 0) & 0xFF;
8762/// =>
8763/// *((i32)p) = BSWAP(val);
8764SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8765 // The matching looks for "store (trunc x)" patterns that appear early but are
8766 // likely to be replaced by truncating store nodes during combining.
8767 // TODO: If there is evidence that running this later would help, this
8768 // limitation could be removed. Legality checks may need to be added
8769 // for the created store and optional bswap/rotate.
8770 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8771 return SDValue();
8772
8773 // We only handle merging simple stores of 1-4 bytes.
8774 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8775 EVT MemVT = N->getMemoryVT();
8776 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8777 !N->isSimple() || N->isIndexed())
8778 return SDValue();
8779
8780 // Collect all of the stores in the chain, upto the maximum store width (i64).
8781 SDValue Chain = N->getChain();
8783 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8784 unsigned MaxWideNumBits = 64;
8785 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8786 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8787 // All stores must be the same size to ensure that we are writing all of the
8788 // bytes in the wide value.
8789 // This store should have exactly one use as a chain operand for another
8790 // store in the merging set. If there are other chain uses, then the
8791 // transform may not be safe because order of loads/stores outside of this
8792 // set may not be preserved.
8793 // TODO: We could allow multiple sizes by tracking each stored byte.
8794 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8795 Store->isIndexed() || !Store->hasOneUse())
8796 return SDValue();
8797 Stores.push_back(Store);
8798 Chain = Store->getChain();
8799 if (MaxStores < Stores.size())
8800 return SDValue();
8801 }
8802 // There is no reason to continue if we do not have at least a pair of stores.
8803 if (Stores.size() < 2)
8804 return SDValue();
8805
8806 // Handle simple types only.
8807 LLVMContext &Context = *DAG.getContext();
8808 unsigned NumStores = Stores.size();
8809 unsigned WideNumBits = NumStores * NarrowNumBits;
8810 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8811 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8812 return SDValue();
8813
8814 // Check if all bytes of the source value that we are looking at are stored
8815 // to the same base address. Collect offsets from Base address into OffsetMap.
8816 SDValue SourceValue;
8817 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8818 int64_t FirstOffset = INT64_MAX;
8819 StoreSDNode *FirstStore = nullptr;
8820 std::optional<BaseIndexOffset> Base;
8821 for (auto *Store : Stores) {
8822 // All the stores store different parts of the CombinedValue. A truncate is
8823 // required to get the partial value.
8824 SDValue Trunc = Store->getValue();
8825 if (Trunc.getOpcode() != ISD::TRUNCATE)
8826 return SDValue();
8827 // Other than the first/last part, a shift operation is required to get the
8828 // offset.
8829 int64_t Offset = 0;
8830 SDValue WideVal = Trunc.getOperand(0);
8831 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8832 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8833 // The shift amount must be a constant multiple of the narrow type.
8834 // It is translated to the offset address in the wide source value "y".
8835 //
8836 // x = srl y, ShiftAmtC
8837 // i8 z = trunc x
8838 // store z, ...
8839 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8840 if (ShiftAmtC % NarrowNumBits != 0)
8841 return SDValue();
8842
8843 // Make sure we aren't reading bits that are shifted in.
8844 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
8845 return SDValue();
8846
8847 Offset = ShiftAmtC / NarrowNumBits;
8848 WideVal = WideVal.getOperand(0);
8849 }
8850
8851 // Stores must share the same source value with different offsets.
8852 // Truncate and extends should be stripped to get the single source value.
8853 if (!SourceValue)
8854 SourceValue = WideVal;
8855 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8856 return SDValue();
8857 else if (SourceValue.getValueType() != WideVT) {
8858 if (WideVal.getValueType() == WideVT ||
8859 WideVal.getScalarValueSizeInBits() >
8860 SourceValue.getScalarValueSizeInBits())
8861 SourceValue = WideVal;
8862 // Give up if the source value type is smaller than the store size.
8863 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8864 return SDValue();
8865 }
8866
8867 // Stores must share the same base address.
8869 int64_t ByteOffsetFromBase = 0;
8870 if (!Base)
8871 Base = Ptr;
8872 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8873 return SDValue();
8874
8875 // Remember the first store.
8876 if (ByteOffsetFromBase < FirstOffset) {
8877 FirstStore = Store;
8878 FirstOffset = ByteOffsetFromBase;
8879 }
8880 // Map the offset in the store and the offset in the combined value, and
8881 // early return if it has been set before.
8882 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8883 return SDValue();
8884 OffsetMap[Offset] = ByteOffsetFromBase;
8885 }
8886
8887 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8888 assert(FirstStore && "First store must be set");
8889
8890 // Check that a store of the wide type is both allowed and fast on the target
8891 const DataLayout &Layout = DAG.getDataLayout();
8892 unsigned Fast = 0;
8893 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8894 *FirstStore->getMemOperand(), &Fast);
8895 if (!Allowed || !Fast)
8896 return SDValue();
8897
8898 // Check if the pieces of the value are going to the expected places in memory
8899 // to merge the stores.
8900 auto checkOffsets = [&](bool MatchLittleEndian) {
8901 if (MatchLittleEndian) {
8902 for (unsigned i = 0; i != NumStores; ++i)
8903 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
8904 return false;
8905 } else { // MatchBigEndian by reversing loop counter.
8906 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
8907 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
8908 return false;
8909 }
8910 return true;
8911 };
8912
8913 // Check if the offsets line up for the native data layout of this target.
8914 bool NeedBswap = false;
8915 bool NeedRotate = false;
8916 if (!checkOffsets(Layout.isLittleEndian())) {
8917 // Special-case: check if byte offsets line up for the opposite endian.
8918 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
8919 NeedBswap = true;
8920 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
8921 NeedRotate = true;
8922 else
8923 return SDValue();
8924 }
8925
8926 SDLoc DL(N);
8927 if (WideVT != SourceValue.getValueType()) {
8928 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
8929 "Unexpected store value to merge");
8930 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8931 }
8932
8933 // Before legalize we can introduce illegal bswaps/rotates which will be later
8934 // converted to an explicit bswap sequence. This way we end up with a single
8935 // store and byte shuffling instead of several stores and byte shuffling.
8936 if (NeedBswap) {
8937 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8938 } else if (NeedRotate) {
8939 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
8940 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8941 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8942 }
8943
8944 SDValue NewStore =
8945 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8946 FirstStore->getPointerInfo(), FirstStore->getAlign());
8947
8948 // Rely on other DAG combine rules to remove the other individual stores.
8949 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8950 return NewStore;
8951}
8952
8953/// Match a pattern where a wide type scalar value is loaded by several narrow
8954/// loads and combined by shifts and ors. Fold it into a single load or a load
8955/// and a BSWAP if the targets supports it.
8956///
8957/// Assuming little endian target:
8958/// i8 *a = ...
8959/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8960/// =>
8961/// i32 val = *((i32)a)
8962///
8963/// i8 *a = ...
8964/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8965/// =>
8966/// i32 val = BSWAP(*((i32)a))
8967///
8968/// TODO: This rule matches complex patterns with OR node roots and doesn't
8969/// interact well with the worklist mechanism. When a part of the pattern is
8970/// updated (e.g. one of the loads) its direct users are put into the worklist,
8971/// but the root node of the pattern which triggers the load combine is not
8972/// necessarily a direct user of the changed node. For example, once the address
8973/// of t28 load is reassociated load combine won't be triggered:
8974/// t25: i32 = add t4, Constant:i32<2>
8975/// t26: i64 = sign_extend t25
8976/// t27: i64 = add t2, t26
8977/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8978/// t29: i32 = zero_extend t28
8979/// t32: i32 = shl t29, Constant:i8<8>
8980/// t33: i32 = or t23, t32
8981/// As a possible fix visitLoad can check if the load can be a part of a load
8982/// combine pattern and add corresponding OR roots to the worklist.
8983SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8984 assert(N->getOpcode() == ISD::OR &&
8985 "Can only match load combining against OR nodes");
8986
8987 // Handles simple types only
8988 EVT VT = N->getValueType(0);
8989 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8990 return SDValue();
8991 unsigned ByteWidth = VT.getSizeInBits() / 8;
8992
8993 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8994 auto MemoryByteOffset = [&](SDByteProvider P) {
8995 assert(P.hasSrc() && "Must be a memory byte provider");
8996 auto *Load = cast<LoadSDNode>(P.Src.value());
8997
8998 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
8999
9000 assert(LoadBitWidth % 8 == 0 &&
9001 "can only analyze providers for individual bytes not bit");
9002 unsigned LoadByteWidth = LoadBitWidth / 8;
9003 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9004 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9005 };
9006
9007 std::optional<BaseIndexOffset> Base;
9008 SDValue Chain;
9009
9011 std::optional<SDByteProvider> FirstByteProvider;
9012 int64_t FirstOffset = INT64_MAX;
9013
9014 // Check if all the bytes of the OR we are looking at are loaded from the same
9015 // base address. Collect bytes offsets from Base address in ByteOffsets.
9016 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9017 unsigned ZeroExtendedBytes = 0;
9018 for (int i = ByteWidth - 1; i >= 0; --i) {
9019 auto P =
9020 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9021 /*StartingIndex*/ i);
9022 if (!P)
9023 return SDValue();
9024
9025 if (P->isConstantZero()) {
9026 // It's OK for the N most significant bytes to be 0, we can just
9027 // zero-extend the load.
9028 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9029 return SDValue();
9030 continue;
9031 }
9032 assert(P->hasSrc() && "provenance should either be memory or zero");
9033 auto *L = cast<LoadSDNode>(P->Src.value());
9034
9035 // All loads must share the same chain
9036 SDValue LChain = L->getChain();
9037 if (!Chain)
9038 Chain = LChain;
9039 else if (Chain != LChain)
9040 return SDValue();
9041
9042 // Loads must share the same base address
9044 int64_t ByteOffsetFromBase = 0;
9045
9046 // For vector loads, the expected load combine pattern will have an
9047 // ExtractElement for each index in the vector. While each of these
9048 // ExtractElements will be accessing the same base address as determined
9049 // by the load instruction, the actual bytes they interact with will differ
9050 // due to different ExtractElement indices. To accurately determine the
9051 // byte position of an ExtractElement, we offset the base load ptr with
9052 // the index multiplied by the byte size of each element in the vector.
9053 if (L->getMemoryVT().isVector()) {
9054 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9055 if (LoadWidthInBit % 8 != 0)
9056 return SDValue();
9057 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9058 Ptr.addToOffset(ByteOffsetFromVector);
9059 }
9060
9061 if (!Base)
9062 Base = Ptr;
9063
9064 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9065 return SDValue();
9066
9067 // Calculate the offset of the current byte from the base address
9068 ByteOffsetFromBase += MemoryByteOffset(*P);
9069 ByteOffsets[i] = ByteOffsetFromBase;
9070
9071 // Remember the first byte load
9072 if (ByteOffsetFromBase < FirstOffset) {
9073 FirstByteProvider = P;
9074 FirstOffset = ByteOffsetFromBase;
9075 }
9076
9077 Loads.insert(L);
9078 }
9079
9080 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9081 "memory, so there must be at least one load which produces the value");
9082 assert(Base && "Base address of the accessed memory location must be set");
9083 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9084
9085 bool NeedsZext = ZeroExtendedBytes > 0;
9086
9087 EVT MemVT =
9088 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9089
9090 if (!MemVT.isSimple())
9091 return SDValue();
9092
9093 // Before legalize we can introduce too wide illegal loads which will be later
9094 // split into legal sized loads. This enables us to combine i64 load by i8
9095 // patterns to a couple of i32 loads on 32 bit targets.
9096 if (LegalOperations &&
9098 MemVT))
9099 return SDValue();
9100
9101 // Check if the bytes of the OR we are looking at match with either big or
9102 // little endian value load
9103 std::optional<bool> IsBigEndian = isBigEndian(
9104 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9105 if (!IsBigEndian)
9106 return SDValue();
9107
9108 assert(FirstByteProvider && "must be set");
9109
9110 // Ensure that the first byte is loaded from zero offset of the first load.
9111 // So the combined value can be loaded from the first load address.
9112 if (MemoryByteOffset(*FirstByteProvider) != 0)
9113 return SDValue();
9114 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9115
9116 // The node we are looking at matches with the pattern, check if we can
9117 // replace it with a single (possibly zero-extended) load and bswap + shift if
9118 // needed.
9119
9120 // If the load needs byte swap check if the target supports it
9121 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9122
9123 // Before legalize we can introduce illegal bswaps which will be later
9124 // converted to an explicit bswap sequence. This way we end up with a single
9125 // load and byte shuffling instead of several loads and byte shuffling.
9126 // We do not introduce illegal bswaps when zero-extending as this tends to
9127 // introduce too many arithmetic instructions.
9128 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9129 !TLI.isOperationLegal(ISD::BSWAP, VT))
9130 return SDValue();
9131
9132 // If we need to bswap and zero extend, we have to insert a shift. Check that
9133 // it is legal.
9134 if (NeedsBswap && NeedsZext && LegalOperations &&
9135 !TLI.isOperationLegal(ISD::SHL, VT))
9136 return SDValue();
9137
9138 // Check that a load of the wide type is both allowed and fast on the target
9139 unsigned Fast = 0;
9140 bool Allowed =
9141 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9142 *FirstLoad->getMemOperand(), &Fast);
9143 if (!Allowed || !Fast)
9144 return SDValue();
9145
9146 SDValue NewLoad =
9147 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9148 Chain, FirstLoad->getBasePtr(),
9149 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9150
9151 // Transfer chain users from old loads to the new load.
9152 for (LoadSDNode *L : Loads)
9153 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9154
9155 if (!NeedsBswap)
9156 return NewLoad;
9157
9158 SDValue ShiftedLoad =
9159 NeedsZext
9160 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9161 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9162 SDLoc(N), LegalOperations))
9163 : NewLoad;
9164 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9165}
9166
9167// If the target has andn, bsl, or a similar bit-select instruction,
9168// we want to unfold masked merge, with canonical pattern of:
9169// | A | |B|
9170// ((x ^ y) & m) ^ y
9171// | D |
9172// Into:
9173// (x & m) | (y & ~m)
9174// If y is a constant, m is not a 'not', and the 'andn' does not work with
9175// immediates, we unfold into a different pattern:
9176// ~(~x & m) & (m | y)
9177// If x is a constant, m is a 'not', and the 'andn' does not work with
9178// immediates, we unfold into a different pattern:
9179// (x | ~m) & ~(~m & ~y)
9180// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9181// the very least that breaks andnpd / andnps patterns, and because those
9182// patterns are simplified in IR and shouldn't be created in the DAG
9183SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9184 assert(N->getOpcode() == ISD::XOR);
9185
9186 // Don't touch 'not' (i.e. where y = -1).
9187 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9188 return SDValue();
9189
9190 EVT VT = N->getValueType(0);
9191
9192 // There are 3 commutable operators in the pattern,
9193 // so we have to deal with 8 possible variants of the basic pattern.
9194 SDValue X, Y, M;
9195 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9196 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9197 return false;
9198 SDValue Xor = And.getOperand(XorIdx);
9199 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9200 return false;
9201 SDValue Xor0 = Xor.getOperand(0);
9202 SDValue Xor1 = Xor.getOperand(1);
9203 // Don't touch 'not' (i.e. where y = -1).
9204 if (isAllOnesOrAllOnesSplat(Xor1))
9205 return false;
9206 if (Other == Xor0)
9207 std::swap(Xor0, Xor1);
9208 if (Other != Xor1)
9209 return false;
9210 X = Xor0;
9211 Y = Xor1;
9212 M = And.getOperand(XorIdx ? 0 : 1);
9213 return true;
9214 };
9215
9216 SDValue N0 = N->getOperand(0);
9217 SDValue N1 = N->getOperand(1);
9218 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9219 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9220 return SDValue();
9221
9222 // Don't do anything if the mask is constant. This should not be reachable.
9223 // InstCombine should have already unfolded this pattern, and DAGCombiner
9224 // probably shouldn't produce it, too.
9225 if (isa<ConstantSDNode>(M.getNode()))
9226 return SDValue();
9227
9228 // We can transform if the target has AndNot
9229 if (!TLI.hasAndNot(M))
9230 return SDValue();
9231
9232 SDLoc DL(N);
9233
9234 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9235 // a bitwise not that would already allow ANDN to be used.
9236 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9237 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9238 // If not, we need to do a bit more work to make sure andn is still used.
9239 SDValue NotX = DAG.getNOT(DL, X, VT);
9240 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9241 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9242 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9243 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9244 }
9245
9246 // If X is a constant and M is a bitwise not, check that 'andn' works with
9247 // immediates.
9248 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9249 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9250 // If not, we need to do a bit more work to make sure andn is still used.
9251 SDValue NotM = M.getOperand(0);
9252 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9253 SDValue NotY = DAG.getNOT(DL, Y, VT);
9254 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9255 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9256 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9257 }
9258
9259 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9260 SDValue NotM = DAG.getNOT(DL, M, VT);
9261 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9262
9263 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9264}
9265
9266SDValue DAGCombiner::visitXOR(SDNode *N) {
9267 SDValue N0 = N->getOperand(0);
9268 SDValue N1 = N->getOperand(1);
9269 EVT VT = N0.getValueType();
9270 SDLoc DL(N);
9271
9272 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9273 if (N0.isUndef() && N1.isUndef())
9274 return DAG.getConstant(0, DL, VT);
9275
9276 // fold (xor x, undef) -> undef
9277 if (N0.isUndef())
9278 return N0;
9279 if (N1.isUndef())
9280 return N1;
9281
9282 // fold (xor c1, c2) -> c1^c2
9283 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9284 return C;
9285
9286 // canonicalize constant to RHS
9289 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9290
9291 // fold vector ops
9292 if (VT.isVector()) {
9293 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9294 return FoldedVOp;
9295
9296 // fold (xor x, 0) -> x, vector edition
9298 return N0;
9299 }
9300
9301 // fold (xor x, 0) -> x
9302 if (isNullConstant(N1))
9303 return N0;
9304
9305 if (SDValue NewSel = foldBinOpIntoSelect(N))
9306 return NewSel;
9307
9308 // reassociate xor
9309 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9310 return RXOR;
9311
9312 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9313 if (SDValue SD =
9314 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9315 return SD;
9316
9317 // fold (a^b) -> (a|b) iff a and b share no bits.
9318 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9319 DAG.haveNoCommonBitsSet(N0, N1)) {
9321 Flags.setDisjoint(true);
9322 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9323 }
9324
9325 // look for 'add-like' folds:
9326 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9327 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9329 if (SDValue Combined = visitADDLike(N))
9330 return Combined;
9331
9332 // fold !(x cc y) -> (x !cc y)
9333 unsigned N0Opcode = N0.getOpcode();
9334 SDValue LHS, RHS, CC;
9335 if (TLI.isConstTrueVal(N1) &&
9336 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9337 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9338 LHS.getValueType());
9339 if (!LegalOperations ||
9340 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9341 switch (N0Opcode) {
9342 default:
9343 llvm_unreachable("Unhandled SetCC Equivalent!");
9344 case ISD::SETCC:
9345 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9346 case ISD::SELECT_CC:
9347 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9348 N0.getOperand(3), NotCC);
9349 case ISD::STRICT_FSETCC:
9350 case ISD::STRICT_FSETCCS: {
9351 if (N0.hasOneUse()) {
9352 // FIXME Can we handle multiple uses? Could we token factor the chain
9353 // results from the new/old setcc?
9354 SDValue SetCC =
9355 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9356 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9357 CombineTo(N, SetCC);
9358 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9359 recursivelyDeleteUnusedNodes(N0.getNode());
9360 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9361 }
9362 break;
9363 }
9364 }
9365 }
9366 }
9367
9368 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9369 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9370 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9371 SDValue V = N0.getOperand(0);
9372 SDLoc DL0(N0);
9373 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9374 DAG.getConstant(1, DL0, V.getValueType()));
9375 AddToWorklist(V.getNode());
9376 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9377 }
9378
9379 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9380 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9381 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9382 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9383 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9384 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9385 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9386 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9387 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9388 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9389 }
9390 }
9391 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9392 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9393 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9394 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9395 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9396 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9397 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9398 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9399 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9400 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9401 }
9402 }
9403
9404 // fold (not (neg x)) -> (add X, -1)
9405 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9406 // Y is a constant or the subtract has a single use.
9407 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9408 isNullConstant(N0.getOperand(0))) {
9409 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9410 DAG.getAllOnesConstant(DL, VT));
9411 }
9412
9413 // fold (not (add X, -1)) -> (neg X)
9414 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9416 return DAG.getNegative(N0.getOperand(0), DL, VT);
9417 }
9418
9419 // fold (xor (and x, y), y) -> (and (not x), y)
9420 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9421 SDValue X = N0.getOperand(0);
9422 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9423 AddToWorklist(NotX.getNode());
9424 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9425 }
9426
9427 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9428 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9429 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9430 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9431 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9432 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9433 SDValue S0 = S.getOperand(0);
9434 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9436 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9437 return DAG.getNode(ISD::ABS, DL, VT, S0);
9438 }
9439 }
9440
9441 // fold (xor x, x) -> 0
9442 if (N0 == N1)
9443 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9444
9445 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9446 // Here is a concrete example of this equivalence:
9447 // i16 x == 14
9448 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9449 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9450 //
9451 // =>
9452 //
9453 // i16 ~1 == 0b1111111111111110
9454 // i16 rol(~1, 14) == 0b1011111111111111
9455 //
9456 // Some additional tips to help conceptualize this transform:
9457 // - Try to see the operation as placing a single zero in a value of all ones.
9458 // - There exists no value for x which would allow the result to contain zero.
9459 // - Values of x larger than the bitwidth are undefined and do not require a
9460 // consistent result.
9461 // - Pushing the zero left requires shifting one bits in from the right.
9462 // A rotate left of ~1 is a nice way of achieving the desired result.
9463 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9465 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9466 N0.getOperand(1));
9467 }
9468
9469 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9470 if (N0Opcode == N1.getOpcode())
9471 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9472 return V;
9473
9474 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9475 return R;
9476 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9477 return R;
9478 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9479 return R;
9480
9481 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9482 if (SDValue MM = unfoldMaskedMerge(N))
9483 return MM;
9484
9485 // Simplify the expression using non-local knowledge.
9487 return SDValue(N, 0);
9488
9489 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9490 return Combined;
9491
9492 return SDValue();
9493}
9494
9495/// If we have a shift-by-constant of a bitwise logic op that itself has a
9496/// shift-by-constant operand with identical opcode, we may be able to convert
9497/// that into 2 independent shifts followed by the logic op. This is a
9498/// throughput improvement.
9500 // Match a one-use bitwise logic op.
9501 SDValue LogicOp = Shift->getOperand(0);
9502 if (!LogicOp.hasOneUse())
9503 return SDValue();
9504
9505 unsigned LogicOpcode = LogicOp.getOpcode();
9506 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9507 LogicOpcode != ISD::XOR)
9508 return SDValue();
9509
9510 // Find a matching one-use shift by constant.
9511 unsigned ShiftOpcode = Shift->getOpcode();
9512 SDValue C1 = Shift->getOperand(1);
9513 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9514 assert(C1Node && "Expected a shift with constant operand");
9515 const APInt &C1Val = C1Node->getAPIntValue();
9516 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9517 const APInt *&ShiftAmtVal) {
9518 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9519 return false;
9520
9521 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9522 if (!ShiftCNode)
9523 return false;
9524
9525 // Capture the shifted operand and shift amount value.
9526 ShiftOp = V.getOperand(0);
9527 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9528
9529 // Shift amount types do not have to match their operand type, so check that
9530 // the constants are the same width.
9531 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9532 return false;
9533
9534 // The fold is not valid if the sum of the shift values doesn't fit in the
9535 // given shift amount type.
9536 bool Overflow = false;
9537 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9538 if (Overflow)
9539 return false;
9540
9541 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9542 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9543 return false;
9544
9545 return true;
9546 };
9547
9548 // Logic ops are commutative, so check each operand for a match.
9549 SDValue X, Y;
9550 const APInt *C0Val;
9551 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9552 Y = LogicOp.getOperand(1);
9553 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9554 Y = LogicOp.getOperand(0);
9555 else
9556 return SDValue();
9557
9558 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9559 SDLoc DL(Shift);
9560 EVT VT = Shift->getValueType(0);
9561 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9562 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9563 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9564 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9565 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9566 LogicOp->getFlags());
9567}
9568
9569/// Handle transforms common to the three shifts, when the shift amount is a
9570/// constant.
9571/// We are looking for: (shift being one of shl/sra/srl)
9572/// shift (binop X, C0), C1
9573/// And want to transform into:
9574/// binop (shift X, C1), (shift C0, C1)
9575SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9576 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9577
9578 // Do not turn a 'not' into a regular xor.
9579 if (isBitwiseNot(N->getOperand(0)))
9580 return SDValue();
9581
9582 // The inner binop must be one-use, since we want to replace it.
9583 SDValue LHS = N->getOperand(0);
9584 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9585 return SDValue();
9586
9587 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9588 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9589 return R;
9590
9591 // We want to pull some binops through shifts, so that we have (and (shift))
9592 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9593 // thing happens with address calculations, so it's important to canonicalize
9594 // it.
9595 switch (LHS.getOpcode()) {
9596 default:
9597 return SDValue();
9598 case ISD::OR:
9599 case ISD::XOR:
9600 case ISD::AND:
9601 break;
9602 case ISD::ADD:
9603 if (N->getOpcode() != ISD::SHL)
9604 return SDValue(); // only shl(add) not sr[al](add).
9605 break;
9606 }
9607
9608 // FIXME: disable this unless the input to the binop is a shift by a constant
9609 // or is copy/select. Enable this in other cases when figure out it's exactly
9610 // profitable.
9611 SDValue BinOpLHSVal = LHS.getOperand(0);
9612 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9613 BinOpLHSVal.getOpcode() == ISD::SRA ||
9614 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9615 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9616 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9617 BinOpLHSVal.getOpcode() == ISD::SELECT;
9618
9619 if (!IsShiftByConstant && !IsCopyOrSelect)
9620 return SDValue();
9621
9622 if (IsCopyOrSelect && N->hasOneUse())
9623 return SDValue();
9624
9625 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9626 SDLoc DL(N);
9627 EVT VT = N->getValueType(0);
9628 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9629 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9630 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9631 N->getOperand(1));
9632 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9633 }
9634
9635 return SDValue();
9636}
9637
9638SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9639 assert(N->getOpcode() == ISD::TRUNCATE);
9640 assert(N->getOperand(0).getOpcode() == ISD::AND);
9641
9642 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9643 EVT TruncVT = N->getValueType(0);
9644 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9645 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9646 SDValue N01 = N->getOperand(0).getOperand(1);
9647 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9648 SDLoc DL(N);
9649 SDValue N00 = N->getOperand(0).getOperand(0);
9650 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9651 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9652 AddToWorklist(Trunc00.getNode());
9653 AddToWorklist(Trunc01.getNode());
9654 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9655 }
9656 }
9657
9658 return SDValue();
9659}
9660
9661SDValue DAGCombiner::visitRotate(SDNode *N) {
9662 SDLoc dl(N);
9663 SDValue N0 = N->getOperand(0);
9664 SDValue N1 = N->getOperand(1);
9665 EVT VT = N->getValueType(0);
9666 unsigned Bitsize = VT.getScalarSizeInBits();
9667
9668 // fold (rot x, 0) -> x
9669 if (isNullOrNullSplat(N1))
9670 return N0;
9671
9672 // fold (rot x, c) -> x iff (c % BitSize) == 0
9673 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9674 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9675 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9676 return N0;
9677 }
9678
9679 // fold (rot x, c) -> (rot x, c % BitSize)
9680 bool OutOfRange = false;
9681 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9682 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9683 return true;
9684 };
9685 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9686 EVT AmtVT = N1.getValueType();
9687 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9688 if (SDValue Amt =
9689 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9690 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9691 }
9692
9693 // rot i16 X, 8 --> bswap X
9694 auto *RotAmtC = isConstOrConstSplat(N1);
9695 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9696 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9697 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9698
9699 // Simplify the operands using demanded-bits information.
9701 return SDValue(N, 0);
9702
9703 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9704 if (N1.getOpcode() == ISD::TRUNCATE &&
9705 N1.getOperand(0).getOpcode() == ISD::AND) {
9706 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9707 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9708 }
9709
9710 unsigned NextOp = N0.getOpcode();
9711
9712 // fold (rot* (rot* x, c2), c1)
9713 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9714 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9717 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9718 EVT ShiftVT = C1->getValueType(0);
9719 bool SameSide = (N->getOpcode() == NextOp);
9720 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9721 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9722 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9723 {N1, BitsizeC});
9724 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9725 {N0.getOperand(1), BitsizeC});
9726 if (Norm1 && Norm2)
9727 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9728 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9729 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9730 {CombinedShift, BitsizeC});
9731 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9732 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9733 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9734 CombinedShiftNorm);
9735 }
9736 }
9737 }
9738 return SDValue();
9739}
9740
9741SDValue DAGCombiner::visitSHL(SDNode *N) {
9742 SDValue N0 = N->getOperand(0);
9743 SDValue N1 = N->getOperand(1);
9744 if (SDValue V = DAG.simplifyShift(N0, N1))
9745 return V;
9746
9747 SDLoc DL(N);
9748 EVT VT = N0.getValueType();
9749 EVT ShiftVT = N1.getValueType();
9750 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9751
9752 // fold (shl c1, c2) -> c1<<c2
9753 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
9754 return C;
9755
9756 // fold vector ops
9757 if (VT.isVector()) {
9758 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9759 return FoldedVOp;
9760
9761 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9762 // If setcc produces all-one true value then:
9763 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9764 if (N1CV && N1CV->isConstant()) {
9765 if (N0.getOpcode() == ISD::AND) {
9766 SDValue N00 = N0->getOperand(0);
9767 SDValue N01 = N0->getOperand(1);
9768 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9769
9770 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9773 if (SDValue C =
9774 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
9775 return DAG.getNode(ISD::AND, DL, VT, N00, C);
9776 }
9777 }
9778 }
9779 }
9780
9781 if (SDValue NewSel = foldBinOpIntoSelect(N))
9782 return NewSel;
9783
9784 // if (shl x, c) is known to be zero, return 0
9785 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9786 return DAG.getConstant(0, DL, VT);
9787
9788 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9789 if (N1.getOpcode() == ISD::TRUNCATE &&
9790 N1.getOperand(0).getOpcode() == ISD::AND) {
9791 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9792 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
9793 }
9794
9795 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9796 if (N0.getOpcode() == ISD::SHL) {
9797 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9799 APInt c1 = LHS->getAPIntValue();
9800 APInt c2 = RHS->getAPIntValue();
9801 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9802 return (c1 + c2).uge(OpSizeInBits);
9803 };
9804 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9805 return DAG.getConstant(0, DL, VT);
9806
9807 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9809 APInt c1 = LHS->getAPIntValue();
9810 APInt c2 = RHS->getAPIntValue();
9811 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9812 return (c1 + c2).ult(OpSizeInBits);
9813 };
9814 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9815 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9816 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9817 }
9818 }
9819
9820 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9821 // For this to be valid, the second form must not preserve any of the bits
9822 // that are shifted out by the inner shift in the first form. This means
9823 // the outer shift size must be >= the number of bits added by the ext.
9824 // As a corollary, we don't care what kind of ext it is.
9825 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9826 N0.getOpcode() == ISD::ANY_EXTEND ||
9827 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9828 N0.getOperand(0).getOpcode() == ISD::SHL) {
9829 SDValue N0Op0 = N0.getOperand(0);
9830 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9831 EVT InnerVT = N0Op0.getValueType();
9832 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9833
9834 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9836 APInt c1 = LHS->getAPIntValue();
9837 APInt c2 = RHS->getAPIntValue();
9838 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9839 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9840 (c1 + c2).uge(OpSizeInBits);
9841 };
9842 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9843 /*AllowUndefs*/ false,
9844 /*AllowTypeMismatch*/ true))
9845 return DAG.getConstant(0, DL, VT);
9846
9847 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9849 APInt c1 = LHS->getAPIntValue();
9850 APInt c2 = RHS->getAPIntValue();
9851 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9852 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9853 (c1 + c2).ult(OpSizeInBits);
9854 };
9855 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9856 /*AllowUndefs*/ false,
9857 /*AllowTypeMismatch*/ true)) {
9858 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9859 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9860 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9861 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9862 }
9863 }
9864
9865 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9866 // Only fold this if the inner zext has no other uses to avoid increasing
9867 // the total number of instructions.
9868 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9869 N0.getOperand(0).getOpcode() == ISD::SRL) {
9870 SDValue N0Op0 = N0.getOperand(0);
9871 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9872
9873 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9874 APInt c1 = LHS->getAPIntValue();
9875 APInt c2 = RHS->getAPIntValue();
9876 zeroExtendToMatch(c1, c2);
9877 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9878 };
9879 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9880 /*AllowUndefs*/ false,
9881 /*AllowTypeMismatch*/ true)) {
9882 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9883 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9884 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9885 AddToWorklist(NewSHL.getNode());
9886 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9887 }
9888 }
9889
9890 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9891 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9893 const APInt &LHSC = LHS->getAPIntValue();
9894 const APInt &RHSC = RHS->getAPIntValue();
9895 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9896 LHSC.getZExtValue() <= RHSC.getZExtValue();
9897 };
9898
9899 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
9900 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9901 if (N0->getFlags().hasExact()) {
9902 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9903 /*AllowUndefs*/ false,
9904 /*AllowTypeMismatch*/ true)) {
9905 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9906 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9907 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9908 }
9909 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9910 /*AllowUndefs*/ false,
9911 /*AllowTypeMismatch*/ true)) {
9912 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9913 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9914 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
9915 }
9916 }
9917
9918 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
9919 // (and (srl x, (sub c1, c2), MASK)
9920 // Only fold this if the inner shift has no other uses -- if it does,
9921 // folding this will increase the total number of instructions.
9922 if (N0.getOpcode() == ISD::SRL &&
9923 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9925 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9926 /*AllowUndefs*/ false,
9927 /*AllowTypeMismatch*/ true)) {
9928 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9929 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9930 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9931 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9932 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9933 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9934 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9935 }
9936 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9937 /*AllowUndefs*/ false,
9938 /*AllowTypeMismatch*/ true)) {
9939 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9940 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9941 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9942 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9943 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9944 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9945 }
9946 }
9947 }
9948
9949 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9950 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9951 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9952 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9953 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9954 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9955 }
9956
9957 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9958 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9959 // Variant of version done on multiply, except mul by a power of 2 is turned
9960 // into a shift.
9961 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9962 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
9963 SDValue N01 = N0.getOperand(1);
9964 if (SDValue Shl1 =
9965 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
9966 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9967 AddToWorklist(Shl0.getNode());
9969 // Preserve the disjoint flag for Or.
9970 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
9971 Flags.setDisjoint(true);
9972 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
9973 }
9974 }
9975
9976 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
9977 // TODO: Add zext/add_nuw variant with suitable test coverage
9978 // TODO: Should we limit this with isLegalAddImmediate?
9979 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
9980 N0.getOperand(0).getOpcode() == ISD::ADD &&
9981 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
9982 N0.getOperand(0)->hasOneUse() &&
9983 TLI.isDesirableToCommuteWithShift(N, Level)) {
9984 SDValue Add = N0.getOperand(0);
9985 SDLoc DL(N0);
9986 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
9987 {Add.getOperand(1)})) {
9988 if (SDValue ShlC =
9989 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
9990 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
9991 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
9992 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
9993 }
9994 }
9995 }
9996
9997 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9998 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
9999 SDValue N01 = N0.getOperand(1);
10000 if (SDValue Shl =
10001 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10002 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10003 }
10004
10006 if (N1C && !N1C->isOpaque())
10007 if (SDValue NewSHL = visitShiftByConstant(N))
10008 return NewSHL;
10009
10011 return SDValue(N, 0);
10012
10013 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10014 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10015 const APInt &C0 = N0.getConstantOperandAPInt(0);
10016 const APInt &C1 = N1C->getAPIntValue();
10017 return DAG.getVScale(DL, VT, C0 << C1);
10018 }
10019
10020 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10021 APInt ShlVal;
10022 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10023 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10024 const APInt &C0 = N0.getConstantOperandAPInt(0);
10025 if (ShlVal.ult(C0.getBitWidth())) {
10026 APInt NewStep = C0 << ShlVal;
10027 return DAG.getStepVector(DL, VT, NewStep);
10028 }
10029 }
10030
10031 return SDValue();
10032}
10033
10034// Transform a right shift of a multiply into a multiply-high.
10035// Examples:
10036// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10037// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10039 const TargetLowering &TLI) {
10040 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10041 "SRL or SRA node is required here!");
10042
10043 // Check the shift amount. Proceed with the transformation if the shift
10044 // amount is constant.
10045 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10046 if (!ShiftAmtSrc)
10047 return SDValue();
10048
10049 // The operation feeding into the shift must be a multiply.
10050 SDValue ShiftOperand = N->getOperand(0);
10051 if (ShiftOperand.getOpcode() != ISD::MUL)
10052 return SDValue();
10053
10054 // Both operands must be equivalent extend nodes.
10055 SDValue LeftOp = ShiftOperand.getOperand(0);
10056 SDValue RightOp = ShiftOperand.getOperand(1);
10057
10058 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10059 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10060
10061 if (!IsSignExt && !IsZeroExt)
10062 return SDValue();
10063
10064 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10065 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10066
10067 // return true if U may use the lower bits of its operands
10068 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10069 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10070 return true;
10071 }
10072 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10073 if (!UShiftAmtSrc) {
10074 return true;
10075 }
10076 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10077 return UShiftAmt < NarrowVTSize;
10078 };
10079
10080 // If the lower part of the MUL is also used and MUL_LOHI is supported
10081 // do not introduce the MULH in favor of MUL_LOHI
10082 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10083 if (!ShiftOperand.hasOneUse() &&
10084 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10085 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10086 return SDValue();
10087 }
10088
10089 SDValue MulhRightOp;
10091 unsigned ActiveBits = IsSignExt
10092 ? Constant->getAPIntValue().getSignificantBits()
10093 : Constant->getAPIntValue().getActiveBits();
10094 if (ActiveBits > NarrowVTSize)
10095 return SDValue();
10096 MulhRightOp = DAG.getConstant(
10097 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10098 NarrowVT);
10099 } else {
10100 if (LeftOp.getOpcode() != RightOp.getOpcode())
10101 return SDValue();
10102 // Check that the two extend nodes are the same type.
10103 if (NarrowVT != RightOp.getOperand(0).getValueType())
10104 return SDValue();
10105 MulhRightOp = RightOp.getOperand(0);
10106 }
10107
10108 EVT WideVT = LeftOp.getValueType();
10109 // Proceed with the transformation if the wide types match.
10110 assert((WideVT == RightOp.getValueType()) &&
10111 "Cannot have a multiply node with two different operand types.");
10112
10113 // Proceed with the transformation if the wide type is twice as large
10114 // as the narrow type.
10115 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10116 return SDValue();
10117
10118 // Check the shift amount with the narrow type size.
10119 // Proceed with the transformation if the shift amount is the width
10120 // of the narrow type.
10121 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10122 if (ShiftAmt != NarrowVTSize)
10123 return SDValue();
10124
10125 // If the operation feeding into the MUL is a sign extend (sext),
10126 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10127 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10128
10129 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10130 // or if it is a vector type then we could transform to an acceptable type and
10131 // rely on legalization to split/combine the result.
10132 if (NarrowVT.isVector()) {
10133 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10134 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10135 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10136 return SDValue();
10137 } else {
10138 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10139 return SDValue();
10140 }
10141
10142 SDValue Result =
10143 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10144 bool IsSigned = N->getOpcode() == ISD::SRA;
10145 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10146}
10147
10148// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10149// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10151 unsigned Opcode = N->getOpcode();
10152 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10153 return SDValue();
10154
10155 SDValue N0 = N->getOperand(0);
10156 EVT VT = N->getValueType(0);
10157 SDLoc DL(N);
10158 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10159 SDValue OldLHS = N0.getOperand(0);
10160 SDValue OldRHS = N0.getOperand(1);
10161
10162 // If both operands are bswap/bitreverse, ignore the multiuse
10163 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10164 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10165 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10166 OldRHS.getOperand(0));
10167 }
10168
10169 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10170 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10171 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10172 NewBitReorder);
10173 }
10174
10175 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10176 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10177 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10178 OldRHS.getOperand(0));
10179 }
10180 }
10181 return SDValue();
10182}
10183
10184SDValue DAGCombiner::visitSRA(SDNode *N) {
10185 SDValue N0 = N->getOperand(0);
10186 SDValue N1 = N->getOperand(1);
10187 if (SDValue V = DAG.simplifyShift(N0, N1))
10188 return V;
10189
10190 SDLoc DL(N);
10191 EVT VT = N0.getValueType();
10192 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10193
10194 // fold (sra c1, c2) -> (sra c1, c2)
10195 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10196 return C;
10197
10198 // Arithmetic shifting an all-sign-bit value is a no-op.
10199 // fold (sra 0, x) -> 0
10200 // fold (sra -1, x) -> -1
10201 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10202 return N0;
10203
10204 // fold vector ops
10205 if (VT.isVector())
10206 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10207 return FoldedVOp;
10208
10209 if (SDValue NewSel = foldBinOpIntoSelect(N))
10210 return NewSel;
10211
10213
10214 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10215 // clamp (add c1, c2) to max shift.
10216 if (N0.getOpcode() == ISD::SRA) {
10217 EVT ShiftVT = N1.getValueType();
10218 EVT ShiftSVT = ShiftVT.getScalarType();
10219 SmallVector<SDValue, 16> ShiftValues;
10220
10221 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10222 APInt c1 = LHS->getAPIntValue();
10223 APInt c2 = RHS->getAPIntValue();
10224 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10225 APInt Sum = c1 + c2;
10226 unsigned ShiftSum =
10227 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10228 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10229 return true;
10230 };
10231 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10232 SDValue ShiftValue;
10233 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10234 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10235 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10236 assert(ShiftValues.size() == 1 &&
10237 "Expected matchBinaryPredicate to return one element for "
10238 "SPLAT_VECTORs");
10239 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10240 } else
10241 ShiftValue = ShiftValues[0];
10242 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10243 }
10244 }
10245
10246 // fold (sra (shl X, m), (sub result_size, n))
10247 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10248 // result_size - n != m.
10249 // If truncate is free for the target sext(shl) is likely to result in better
10250 // code.
10251 if (N0.getOpcode() == ISD::SHL && N1C) {
10252 // Get the two constants of the shifts, CN0 = m, CN = n.
10253 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10254 if (N01C) {
10255 LLVMContext &Ctx = *DAG.getContext();
10256 // Determine what the truncate's result bitsize and type would be.
10257 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10258
10259 if (VT.isVector())
10260 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10261
10262 // Determine the residual right-shift amount.
10263 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10264
10265 // If the shift is not a no-op (in which case this should be just a sign
10266 // extend already), the truncated to type is legal, sign_extend is legal
10267 // on that type, and the truncate to that type is both legal and free,
10268 // perform the transform.
10269 if ((ShiftAmt > 0) &&
10272 TLI.isTruncateFree(VT, TruncVT)) {
10273 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10275 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10276 N0.getOperand(0), Amt);
10277 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10278 Shift);
10279 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10280 N->getValueType(0), Trunc);
10281 }
10282 }
10283 }
10284
10285 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10286 // sra (add (shl X, N1C), AddC), N1C -->
10287 // sext (add (trunc X to (width - N1C)), AddC')
10288 // sra (sub AddC, (shl X, N1C)), N1C -->
10289 // sext (sub AddC1',(trunc X to (width - N1C)))
10290 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10291 N0.hasOneUse()) {
10292 bool IsAdd = N0.getOpcode() == ISD::ADD;
10293 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10294 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10295 Shl.hasOneUse()) {
10296 // TODO: AddC does not need to be a splat.
10297 if (ConstantSDNode *AddC =
10298 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10299 // Determine what the truncate's type would be and ask the target if
10300 // that is a free operation.
10301 LLVMContext &Ctx = *DAG.getContext();
10302 unsigned ShiftAmt = N1C->getZExtValue();
10303 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10304 if (VT.isVector())
10305 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10306
10307 // TODO: The simple type check probably belongs in the default hook
10308 // implementation and/or target-specific overrides (because
10309 // non-simple types likely require masking when legalized), but
10310 // that restriction may conflict with other transforms.
10311 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10312 TLI.isTruncateFree(VT, TruncVT)) {
10313 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10314 SDValue ShiftC =
10315 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10316 TruncVT.getScalarSizeInBits()),
10317 DL, TruncVT);
10318 SDValue Add;
10319 if (IsAdd)
10320 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10321 else
10322 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10323 return DAG.getSExtOrTrunc(Add, DL, VT);
10324 }
10325 }
10326 }
10327 }
10328
10329 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10330 if (N1.getOpcode() == ISD::TRUNCATE &&
10331 N1.getOperand(0).getOpcode() == ISD::AND) {
10332 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10333 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10334 }
10335
10336 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10337 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10338 // if c1 is equal to the number of bits the trunc removes
10339 // TODO - support non-uniform vector shift amounts.
10340 if (N0.getOpcode() == ISD::TRUNCATE &&
10341 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10342 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10343 N0.getOperand(0).hasOneUse() &&
10344 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10345 SDValue N0Op0 = N0.getOperand(0);
10346 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10347 EVT LargeVT = N0Op0.getValueType();
10348 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10349 if (LargeShift->getAPIntValue() == TruncBits) {
10350 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10351 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10352 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10353 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10354 SDValue SRA =
10355 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10356 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10357 }
10358 }
10359 }
10360
10361 // Simplify, based on bits shifted out of the LHS.
10363 return SDValue(N, 0);
10364
10365 // If the sign bit is known to be zero, switch this to a SRL.
10366 if (DAG.SignBitIsZero(N0))
10367 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10368
10369 if (N1C && !N1C->isOpaque())
10370 if (SDValue NewSRA = visitShiftByConstant(N))
10371 return NewSRA;
10372
10373 // Try to transform this shift into a multiply-high if
10374 // it matches the appropriate pattern detected in combineShiftToMULH.
10375 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10376 return MULH;
10377
10378 // Attempt to convert a sra of a load into a narrower sign-extending load.
10379 if (SDValue NarrowLoad = reduceLoadWidth(N))
10380 return NarrowLoad;
10381
10382 return SDValue();
10383}
10384
10385SDValue DAGCombiner::visitSRL(SDNode *N) {
10386 SDValue N0 = N->getOperand(0);
10387 SDValue N1 = N->getOperand(1);
10388 if (SDValue V = DAG.simplifyShift(N0, N1))
10389 return V;
10390
10391 SDLoc DL(N);
10392 EVT VT = N0.getValueType();
10393 EVT ShiftVT = N1.getValueType();
10394 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10395
10396 // fold (srl c1, c2) -> c1 >>u c2
10397 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10398 return C;
10399
10400 // fold vector ops
10401 if (VT.isVector())
10402 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10403 return FoldedVOp;
10404
10405 if (SDValue NewSel = foldBinOpIntoSelect(N))
10406 return NewSel;
10407
10408 // if (srl x, c) is known to be zero, return 0
10410 if (N1C &&
10411 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10412 return DAG.getConstant(0, DL, VT);
10413
10414 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10415 if (N0.getOpcode() == ISD::SRL) {
10416 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10418 APInt c1 = LHS->getAPIntValue();
10419 APInt c2 = RHS->getAPIntValue();
10420 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10421 return (c1 + c2).uge(OpSizeInBits);
10422 };
10423 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10424 return DAG.getConstant(0, DL, VT);
10425
10426 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10428 APInt c1 = LHS->getAPIntValue();
10429 APInt c2 = RHS->getAPIntValue();
10430 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10431 return (c1 + c2).ult(OpSizeInBits);
10432 };
10433 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10434 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10435 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10436 }
10437 }
10438
10439 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10440 N0.getOperand(0).getOpcode() == ISD::SRL) {
10441 SDValue InnerShift = N0.getOperand(0);
10442 // TODO - support non-uniform vector shift amounts.
10443 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10444 uint64_t c1 = N001C->getZExtValue();
10445 uint64_t c2 = N1C->getZExtValue();
10446 EVT InnerShiftVT = InnerShift.getValueType();
10447 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10448 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10449 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10450 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10451 if (c1 + OpSizeInBits == InnerShiftSize) {
10452 if (c1 + c2 >= InnerShiftSize)
10453 return DAG.getConstant(0, DL, VT);
10454 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10455 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10456 InnerShift.getOperand(0), NewShiftAmt);
10457 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10458 }
10459 // In the more general case, we can clear the high bits after the shift:
10460 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10461 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10462 c1 + c2 < InnerShiftSize) {
10463 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10464 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10465 InnerShift.getOperand(0), NewShiftAmt);
10466 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10467 OpSizeInBits - c2),
10468 DL, InnerShiftVT);
10469 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10470 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10471 }
10472 }
10473 }
10474
10475 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10476 // (and (srl x, (sub c2, c1), MASK)
10477 if (N0.getOpcode() == ISD::SHL &&
10478 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10480 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10482 const APInt &LHSC = LHS->getAPIntValue();
10483 const APInt &RHSC = RHS->getAPIntValue();
10484 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10485 LHSC.getZExtValue() <= RHSC.getZExtValue();
10486 };
10487 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10488 /*AllowUndefs*/ false,
10489 /*AllowTypeMismatch*/ true)) {
10490 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10491 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10492 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10493 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10494 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10495 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10496 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10497 }
10498 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10499 /*AllowUndefs*/ false,
10500 /*AllowTypeMismatch*/ true)) {
10501 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10502 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10503 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10504 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10505 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10506 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10507 }
10508 }
10509
10510 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10511 // TODO - support non-uniform vector shift amounts.
10512 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10513 // Shifting in all undef bits?
10514 EVT SmallVT = N0.getOperand(0).getValueType();
10515 unsigned BitSize = SmallVT.getScalarSizeInBits();
10516 if (N1C->getAPIntValue().uge(BitSize))
10517 return DAG.getUNDEF(VT);
10518
10519 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10520 uint64_t ShiftAmt = N1C->getZExtValue();
10521 SDLoc DL0(N0);
10522 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10523 N0.getOperand(0),
10524 DAG.getConstant(ShiftAmt, DL0,
10525 getShiftAmountTy(SmallVT)));
10526 AddToWorklist(SmallShift.getNode());
10527 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10528 return DAG.getNode(ISD::AND, DL, VT,
10529 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10530 DAG.getConstant(Mask, DL, VT));
10531 }
10532 }
10533
10534 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10535 // bit, which is unmodified by sra.
10536 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10537 if (N0.getOpcode() == ISD::SRA)
10538 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10539 }
10540
10541 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10542 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10543 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10544 isPowerOf2_32(OpSizeInBits) &&
10545 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10546 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10547
10548 // If any of the input bits are KnownOne, then the input couldn't be all
10549 // zeros, thus the result of the srl will always be zero.
10550 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10551
10552 // If all of the bits input the to ctlz node are known to be zero, then
10553 // the result of the ctlz is "32" and the result of the shift is one.
10554 APInt UnknownBits = ~Known.Zero;
10555 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10556
10557 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10558 if (UnknownBits.isPowerOf2()) {
10559 // Okay, we know that only that the single bit specified by UnknownBits
10560 // could be set on input to the CTLZ node. If this bit is set, the SRL
10561 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10562 // to an SRL/XOR pair, which is likely to simplify more.
10563 unsigned ShAmt = UnknownBits.countr_zero();
10564 SDValue Op = N0.getOperand(0);
10565
10566 if (ShAmt) {
10567 SDLoc DL(N0);
10568 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10569 DAG.getConstant(ShAmt, DL,
10570 getShiftAmountTy(Op.getValueType())));
10571 AddToWorklist(Op.getNode());
10572 }
10573 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10574 }
10575 }
10576
10577 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10578 if (N1.getOpcode() == ISD::TRUNCATE &&
10579 N1.getOperand(0).getOpcode() == ISD::AND) {
10580 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10581 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10582 }
10583
10584 // fold operands of srl based on knowledge that the low bits are not
10585 // demanded.
10587 return SDValue(N, 0);
10588
10589 if (N1C && !N1C->isOpaque())
10590 if (SDValue NewSRL = visitShiftByConstant(N))
10591 return NewSRL;
10592
10593 // Attempt to convert a srl of a load into a narrower zero-extending load.
10594 if (SDValue NarrowLoad = reduceLoadWidth(N))
10595 return NarrowLoad;
10596
10597 // Here is a common situation. We want to optimize:
10598 //
10599 // %a = ...
10600 // %b = and i32 %a, 2
10601 // %c = srl i32 %b, 1
10602 // brcond i32 %c ...
10603 //
10604 // into
10605 //
10606 // %a = ...
10607 // %b = and %a, 2
10608 // %c = setcc eq %b, 0
10609 // brcond %c ...
10610 //
10611 // However when after the source operand of SRL is optimized into AND, the SRL
10612 // itself may not be optimized further. Look for it and add the BRCOND into
10613 // the worklist.
10614 //
10615 // The also tends to happen for binary operations when SimplifyDemandedBits
10616 // is involved.
10617 //
10618 // FIXME: This is unecessary if we process the DAG in topological order,
10619 // which we plan to do. This workaround can be removed once the DAG is
10620 // processed in topological order.
10621 if (N->hasOneUse()) {
10622 SDNode *Use = *N->use_begin();
10623
10624 // Look pass the truncate.
10625 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10626 Use = *Use->use_begin();
10627
10628 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10629 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10630 AddToWorklist(Use);
10631 }
10632
10633 // Try to transform this shift into a multiply-high if
10634 // it matches the appropriate pattern detected in combineShiftToMULH.
10635 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10636 return MULH;
10637
10638 return SDValue();
10639}
10640
10641SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10642 EVT VT = N->getValueType(0);
10643 SDValue N0 = N->getOperand(0);
10644 SDValue N1 = N->getOperand(1);
10645 SDValue N2 = N->getOperand(2);
10646 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10647 unsigned BitWidth = VT.getScalarSizeInBits();
10648
10649 // fold (fshl N0, N1, 0) -> N0
10650 // fold (fshr N0, N1, 0) -> N1
10652 if (DAG.MaskedValueIsZero(
10653 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10654 return IsFSHL ? N0 : N1;
10655
10656 auto IsUndefOrZero = [](SDValue V) {
10657 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10658 };
10659
10660 // TODO - support non-uniform vector shift amounts.
10661 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10662 EVT ShAmtTy = N2.getValueType();
10663
10664 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10665 if (Cst->getAPIntValue().uge(BitWidth)) {
10666 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10667 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10668 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10669 }
10670
10671 unsigned ShAmt = Cst->getZExtValue();
10672 if (ShAmt == 0)
10673 return IsFSHL ? N0 : N1;
10674
10675 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10676 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10677 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10678 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10679 if (IsUndefOrZero(N0))
10680 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10681 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10682 SDLoc(N), ShAmtTy));
10683 if (IsUndefOrZero(N1))
10684 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10685 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10686 SDLoc(N), ShAmtTy));
10687
10688 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10689 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10690 // TODO - bigendian support once we have test coverage.
10691 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10692 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10693 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10694 !DAG.getDataLayout().isBigEndian()) {
10695 auto *LHS = dyn_cast<LoadSDNode>(N0);
10696 auto *RHS = dyn_cast<LoadSDNode>(N1);
10697 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10698 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10699 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10700 ISD::isNON_EXTLoad(LHS)) {
10701 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10702 SDLoc DL(RHS);
10703 uint64_t PtrOff =
10704 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10705 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10706 unsigned Fast = 0;
10707 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10708 RHS->getAddressSpace(), NewAlign,
10709 RHS->getMemOperand()->getFlags(), &Fast) &&
10710 Fast) {
10711 SDValue NewPtr = DAG.getMemBasePlusOffset(
10712 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10713 AddToWorklist(NewPtr.getNode());
10714 SDValue Load = DAG.getLoad(
10715 VT, DL, RHS->getChain(), NewPtr,
10716 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10717 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10718 // Replace the old load's chain with the new load's chain.
10719 WorklistRemover DeadNodes(*this);
10720 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10721 return Load;
10722 }
10723 }
10724 }
10725 }
10726 }
10727
10728 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10729 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10730 // iff We know the shift amount is in range.
10731 // TODO: when is it worth doing SUB(BW, N2) as well?
10732 if (isPowerOf2_32(BitWidth)) {
10733 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10734 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10735 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10736 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10737 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10738 }
10739
10740 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10741 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10742 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10743 // is legal as well we might be better off avoiding non-constant (BW - N2).
10744 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10745 if (N0 == N1 && hasOperation(RotOpc, VT))
10746 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10747
10748 // Simplify, based on bits shifted out of N0/N1.
10750 return SDValue(N, 0);
10751
10752 return SDValue();
10753}
10754
10755SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10756 SDValue N0 = N->getOperand(0);
10757 SDValue N1 = N->getOperand(1);
10758 if (SDValue V = DAG.simplifyShift(N0, N1))
10759 return V;
10760
10761 SDLoc DL(N);
10762 EVT VT = N0.getValueType();
10763
10764 // fold (*shlsat c1, c2) -> c1<<c2
10765 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
10766 return C;
10767
10769
10770 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10771 // fold (sshlsat x, c) -> (shl x, c)
10772 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10773 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10774 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10775
10776 // fold (ushlsat x, c) -> (shl x, c)
10777 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10778 N1C->getAPIntValue().ule(
10780 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10781 }
10782
10783 return SDValue();
10784}
10785
10786// Given a ABS node, detect the following patterns:
10787// (ABS (SUB (EXTEND a), (EXTEND b))).
10788// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10789// Generates UABD/SABD instruction.
10790SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10791 EVT SrcVT = N->getValueType(0);
10792
10793 if (N->getOpcode() == ISD::TRUNCATE)
10794 N = N->getOperand(0).getNode();
10795
10796 if (N->getOpcode() != ISD::ABS)
10797 return SDValue();
10798
10799 EVT VT = N->getValueType(0);
10800 SDValue AbsOp1 = N->getOperand(0);
10801 SDValue Op0, Op1;
10802
10803 if (AbsOp1.getOpcode() != ISD::SUB)
10804 return SDValue();
10805
10806 Op0 = AbsOp1.getOperand(0);
10807 Op1 = AbsOp1.getOperand(1);
10808
10809 unsigned Opc0 = Op0.getOpcode();
10810
10811 // Check if the operands of the sub are (zero|sign)-extended.
10812 // TODO: Should we use ValueTracking instead?
10813 if (Opc0 != Op1.getOpcode() ||
10814 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10815 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10816 // fold (abs (sub nsw x, y)) -> abds(x, y)
10817 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10818 TLI.preferABDSToABSWithNSW(VT)) {
10819 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10820 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10821 }
10822 return SDValue();
10823 }
10824
10825 EVT VT0, VT1;
10826 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10827 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10828 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10829 } else {
10830 VT0 = Op0.getOperand(0).getValueType();
10831 VT1 = Op1.getOperand(0).getValueType();
10832 }
10833 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10834
10835 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10836 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10837 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10838 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10839 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10840 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10841 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10842 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10843 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10844 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10845 }
10846
10847 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10848 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10849 if (hasOperation(ABDOpcode, VT)) {
10850 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10851 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10852 }
10853
10854 return SDValue();
10855}
10856
10857SDValue DAGCombiner::visitABS(SDNode *N) {
10858 SDValue N0 = N->getOperand(0);
10859 EVT VT = N->getValueType(0);
10860 SDLoc DL(N);
10861
10862 // fold (abs c1) -> c2
10863 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10864 return C;
10865 // fold (abs (abs x)) -> (abs x)
10866 if (N0.getOpcode() == ISD::ABS)
10867 return N0;
10868 // fold (abs x) -> x iff not-negative
10869 if (DAG.SignBitIsZero(N0))
10870 return N0;
10871
10872 if (SDValue ABD = foldABSToABD(N, DL))
10873 return ABD;
10874
10875 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10876 // iff zero_extend/truncate are free.
10877 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10878 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10879 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10880 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10881 hasOperation(ISD::ABS, ExtVT)) {
10882 return DAG.getNode(
10883 ISD::ZERO_EXTEND, DL, VT,
10884 DAG.getNode(ISD::ABS, DL, ExtVT,
10885 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10886 }
10887 }
10888
10889 return SDValue();
10890}
10891
10892SDValue DAGCombiner::visitBSWAP(SDNode *N) {
10893 SDValue N0 = N->getOperand(0);
10894 EVT VT = N->getValueType(0);
10895 SDLoc DL(N);
10896
10897 // fold (bswap c1) -> c2
10898 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
10899 return C;
10900 // fold (bswap (bswap x)) -> x
10901 if (N0.getOpcode() == ISD::BSWAP)
10902 return N0.getOperand(0);
10903
10904 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
10905 // isn't supported, it will be expanded to bswap followed by a manual reversal
10906 // of bits in each byte. By placing bswaps before bitreverse, we can remove
10907 // the two bswaps if the bitreverse gets expanded.
10908 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
10909 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10910 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
10911 }
10912
10913 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
10914 // iff x >= bw/2 (i.e. lower half is known zero)
10915 unsigned BW = VT.getScalarSizeInBits();
10916 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
10917 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10918 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
10919 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10920 ShAmt->getZExtValue() >= (BW / 2) &&
10921 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
10922 TLI.isTruncateFree(VT, HalfVT) &&
10923 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
10924 SDValue Res = N0.getOperand(0);
10925 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
10926 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
10927 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
10928 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
10929 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
10930 return DAG.getZExtOrTrunc(Res, DL, VT);
10931 }
10932 }
10933
10934 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
10935 // inverse-shift-of-bswap:
10936 // bswap (X u<< C) --> (bswap X) u>> C
10937 // bswap (X u>> C) --> (bswap X) u<< C
10938 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10939 N0.hasOneUse()) {
10940 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10941 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10942 ShAmt->getZExtValue() % 8 == 0) {
10943 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10944 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
10945 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
10946 }
10947 }
10948
10949 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
10950 return V;
10951
10952 return SDValue();
10953}
10954
10955SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
10956 SDValue N0 = N->getOperand(0);
10957 EVT VT = N->getValueType(0);
10958 SDLoc DL(N);
10959
10960 // fold (bitreverse c1) -> c2
10961 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
10962 return C;
10963 // fold (bitreverse (bitreverse x)) -> x
10964 if (N0.getOpcode() == ISD::BITREVERSE)
10965 return N0.getOperand(0);
10966 return SDValue();
10967}
10968
10969SDValue DAGCombiner::visitCTLZ(SDNode *N) {
10970 SDValue N0 = N->getOperand(0);
10971 EVT VT = N->getValueType(0);
10972 SDLoc DL(N);
10973
10974 // fold (ctlz c1) -> c2
10975 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
10976 return C;
10977
10978 // If the value is known never to be zero, switch to the undef version.
10979 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
10980 if (DAG.isKnownNeverZero(N0))
10981 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
10982
10983 return SDValue();
10984}
10985
10986SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
10987 SDValue N0 = N->getOperand(0);
10988 EVT VT = N->getValueType(0);
10989 SDLoc DL(N);
10990
10991 // fold (ctlz_zero_undef c1) -> c2
10992 if (SDValue C =
10994 return C;
10995 return SDValue();
10996}
10997
10998SDValue DAGCombiner::visitCTTZ(SDNode *N) {
10999 SDValue N0 = N->getOperand(0);
11000 EVT VT = N->getValueType(0);
11001 SDLoc DL(N);
11002
11003 // fold (cttz c1) -> c2
11004 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11005 return C;
11006
11007 // If the value is known never to be zero, switch to the undef version.
11008 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11009 if (DAG.isKnownNeverZero(N0))
11010 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11011
11012 return SDValue();
11013}
11014
11015SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11016 SDValue N0 = N->getOperand(0);
11017 EVT VT = N->getValueType(0);
11018 SDLoc DL(N);
11019
11020 // fold (cttz_zero_undef c1) -> c2
11021 if (SDValue C =
11023 return C;
11024 return SDValue();
11025}
11026
11027SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11028 SDValue N0 = N->getOperand(0);
11029 EVT VT = N->getValueType(0);
11030 unsigned NumBits = VT.getScalarSizeInBits();
11031 SDLoc DL(N);
11032
11033 // fold (ctpop c1) -> c2
11034 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11035 return C;
11036
11037 // If the source is being shifted, but doesn't affect any active bits,
11038 // then we can call CTPOP on the shift source directly.
11039 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11040 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11041 const APInt &Amt = AmtC->getAPIntValue();
11042 if (Amt.ult(NumBits)) {
11043 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11044 if ((N0.getOpcode() == ISD::SRL &&
11045 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11046 (N0.getOpcode() == ISD::SHL &&
11047 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11048 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11049 }
11050 }
11051 }
11052 }
11053
11054 // If the upper bits are known to be zero, then see if its profitable to
11055 // only count the lower bits.
11056 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11057 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11058 if (hasOperation(ISD::CTPOP, HalfVT) &&
11059 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11060 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11061 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11062 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11063 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11064 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11065 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11066 }
11067 }
11068 }
11069
11070 return SDValue();
11071}
11072
11073// FIXME: This should be checking for no signed zeros on individual operands, as
11074// well as no nans.
11076 SDValue RHS,
11077 const TargetLowering &TLI) {
11078 const TargetOptions &Options = DAG.getTarget().Options;
11079 EVT VT = LHS.getValueType();
11080
11081 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11084}
11085
11087 SDValue RHS, SDValue True, SDValue False,
11089 const TargetLowering &TLI,
11090 SelectionDAG &DAG) {
11091 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11092 switch (CC) {
11093 case ISD::SETOLT:
11094 case ISD::SETOLE:
11095 case ISD::SETLT:
11096 case ISD::SETLE:
11097 case ISD::SETULT:
11098 case ISD::SETULE: {
11099 // Since it's known never nan to get here already, either fminnum or
11100 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11101 // expanded in terms of it.
11102 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11103 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11104 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11105
11106 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11107 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11108 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11109 return SDValue();
11110 }
11111 case ISD::SETOGT:
11112 case ISD::SETOGE:
11113 case ISD::SETGT:
11114 case ISD::SETGE:
11115 case ISD::SETUGT:
11116 case ISD::SETUGE: {
11117 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11118 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11119 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11120
11121 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11122 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11123 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11124 return SDValue();
11125 }
11126 default:
11127 return SDValue();
11128 }
11129}
11130
11131/// Generate Min/Max node
11132SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11133 SDValue RHS, SDValue True,
11134 SDValue False, ISD::CondCode CC) {
11135 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11136 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11137
11138 // If we can't directly match this, try to see if we can pull an fneg out of
11139 // the select.
11141 True, DAG, LegalOperations, ForCodeSize);
11142 if (!NegTrue)
11143 return SDValue();
11144
11145 HandleSDNode NegTrueHandle(NegTrue);
11146
11147 // Try to unfold an fneg from the select if we are comparing the negated
11148 // constant.
11149 //
11150 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11151 //
11152 // TODO: Handle fabs
11153 if (LHS == NegTrue) {
11154 // If we can't directly match this, try to see if we can pull an fneg out of
11155 // the select.
11157 RHS, DAG, LegalOperations, ForCodeSize);
11158 if (NegRHS) {
11159 HandleSDNode NegRHSHandle(NegRHS);
11160 if (NegRHS == False) {
11161 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11162 False, CC, TLI, DAG);
11163 if (Combined)
11164 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11165 }
11166 }
11167 }
11168
11169 return SDValue();
11170}
11171
11172/// If a (v)select has a condition value that is a sign-bit test, try to smear
11173/// the condition operand sign-bit across the value width and use it as a mask.
11175 SelectionDAG &DAG) {
11176 SDValue Cond = N->getOperand(0);
11177 SDValue C1 = N->getOperand(1);
11178 SDValue C2 = N->getOperand(2);
11180 return SDValue();
11181
11182 EVT VT = N->getValueType(0);
11183 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11184 VT != Cond.getOperand(0).getValueType())
11185 return SDValue();
11186
11187 // The inverted-condition + commuted-select variants of these patterns are
11188 // canonicalized to these forms in IR.
11189 SDValue X = Cond.getOperand(0);
11190 SDValue CondC = Cond.getOperand(1);
11191 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11192 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11194 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11195 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11196 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11197 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11198 }
11199 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11200 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11201 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11202 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11203 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11204 }
11205 return SDValue();
11206}
11207
11209 const TargetLowering &TLI) {
11210 if (!TLI.convertSelectOfConstantsToMath(VT))
11211 return false;
11212
11213 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11214 return true;
11216 return true;
11217
11218 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11219 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11220 return true;
11221 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11222 return true;
11223
11224 return false;
11225}
11226
11227SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11228 SDValue Cond = N->getOperand(0);
11229 SDValue N1 = N->getOperand(1);
11230 SDValue N2 = N->getOperand(2);
11231 EVT VT = N->getValueType(0);
11232 EVT CondVT = Cond.getValueType();
11233 SDLoc DL(N);
11234
11235 if (!VT.isInteger())
11236 return SDValue();
11237
11238 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11239 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11240 if (!C1 || !C2)
11241 return SDValue();
11242
11243 if (CondVT != MVT::i1 || LegalOperations) {
11244 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11245 // We can't do this reliably if integer based booleans have different contents
11246 // to floating point based booleans. This is because we can't tell whether we
11247 // have an integer-based boolean or a floating-point-based boolean unless we
11248 // can find the SETCC that produced it and inspect its operands. This is
11249 // fairly easy if C is the SETCC node, but it can potentially be
11250 // undiscoverable (or not reasonably discoverable). For example, it could be
11251 // in another basic block or it could require searching a complicated
11252 // expression.
11253 if (CondVT.isInteger() &&
11254 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11256 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11258 C1->isZero() && C2->isOne()) {
11259 SDValue NotCond =
11260 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11261 if (VT.bitsEq(CondVT))
11262 return NotCond;
11263 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11264 }
11265
11266 return SDValue();
11267 }
11268
11269 // Only do this before legalization to avoid conflicting with target-specific
11270 // transforms in the other direction (create a select from a zext/sext). There
11271 // is also a target-independent combine here in DAGCombiner in the other
11272 // direction for (select Cond, -1, 0) when the condition is not i1.
11273 assert(CondVT == MVT::i1 && !LegalOperations);
11274
11275 // select Cond, 1, 0 --> zext (Cond)
11276 if (C1->isOne() && C2->isZero())
11277 return DAG.getZExtOrTrunc(Cond, DL, VT);
11278
11279 // select Cond, -1, 0 --> sext (Cond)
11280 if (C1->isAllOnes() && C2->isZero())
11281 return DAG.getSExtOrTrunc(Cond, DL, VT);
11282
11283 // select Cond, 0, 1 --> zext (!Cond)
11284 if (C1->isZero() && C2->isOne()) {
11285 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11286 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11287 return NotCond;
11288 }
11289
11290 // select Cond, 0, -1 --> sext (!Cond)
11291 if (C1->isZero() && C2->isAllOnes()) {
11292 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11293 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11294 return NotCond;
11295 }
11296
11297 // Use a target hook because some targets may prefer to transform in the
11298 // other direction.
11300 return SDValue();
11301
11302 // For any constants that differ by 1, we can transform the select into
11303 // an extend and add.
11304 const APInt &C1Val = C1->getAPIntValue();
11305 const APInt &C2Val = C2->getAPIntValue();
11306
11307 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11308 if (C1Val - 1 == C2Val) {
11309 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11310 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11311 }
11312
11313 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11314 if (C1Val + 1 == C2Val) {
11315 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11316 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11317 }
11318
11319 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11320 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11321 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11322 SDValue ShAmtC =
11323 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11324 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11325 }
11326
11327 // select Cond, -1, C --> or (sext Cond), C
11328 if (C1->isAllOnes()) {
11329 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11330 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11331 }
11332
11333 // select Cond, C, -1 --> or (sext (not Cond)), C
11334 if (C2->isAllOnes()) {
11335 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11336 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11337 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11338 }
11339
11341 return V;
11342
11343 return SDValue();
11344}
11345
11346template <class MatchContextClass>
11348 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11349 N->getOpcode() == ISD::VP_SELECT) &&
11350 "Expected a (v)(vp.)select");
11351 SDValue Cond = N->getOperand(0);
11352 SDValue T = N->getOperand(1), F = N->getOperand(2);
11353 EVT VT = N->getValueType(0);
11354 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11355 MatchContextClass matcher(DAG, TLI, N);
11356
11357 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11358 return SDValue();
11359
11360 // select Cond, Cond, F --> or Cond, F
11361 // select Cond, 1, F --> or Cond, F
11362 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11363 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11364
11365 // select Cond, T, Cond --> and Cond, T
11366 // select Cond, T, 0 --> and Cond, T
11367 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11368 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11369
11370 // select Cond, T, 1 --> or (not Cond), T
11371 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11372 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11373 DAG.getAllOnesConstant(SDLoc(N), VT));
11374 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11375 }
11376
11377 // select Cond, 0, F --> and (not Cond), F
11378 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11379 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11380 DAG.getAllOnesConstant(SDLoc(N), VT));
11381 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11382 }
11383
11384 return SDValue();
11385}
11386
11388 SDValue N0 = N->getOperand(0);
11389 SDValue N1 = N->getOperand(1);
11390 SDValue N2 = N->getOperand(2);
11391 EVT VT = N->getValueType(0);
11392 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11393 return SDValue();
11394
11395 SDValue Cond0 = N0.getOperand(0);
11396 SDValue Cond1 = N0.getOperand(1);
11397 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11398 if (VT != Cond0.getValueType())
11399 return SDValue();
11400
11401 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11402 // compare is inverted from that pattern ("Cond0 s> -1").
11403 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11404 ; // This is the pattern we are looking for.
11405 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11406 std::swap(N1, N2);
11407 else
11408 return SDValue();
11409
11410 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11411 if (isNullOrNullSplat(N2)) {
11412 SDLoc DL(N);
11413 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11414 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11415 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11416 }
11417
11418 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11419 if (isAllOnesOrAllOnesSplat(N1)) {
11420 SDLoc DL(N);
11421 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11422 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11423 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11424 }
11425
11426 // If we have to invert the sign bit mask, only do that transform if the
11427 // target has a bitwise 'and not' instruction (the invert is free).
11428 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11429 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11430 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11431 SDLoc DL(N);
11432 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11433 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11434 SDValue Not = DAG.getNOT(DL, Sra, VT);
11435 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11436 }
11437
11438 // TODO: There's another pattern in this family, but it may require
11439 // implementing hasOrNot() to check for profitability:
11440 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11441
11442 return SDValue();
11443}
11444
11445SDValue DAGCombiner::visitSELECT(SDNode *N) {
11446 SDValue N0 = N->getOperand(0);
11447 SDValue N1 = N->getOperand(1);
11448 SDValue N2 = N->getOperand(2);
11449 EVT VT = N->getValueType(0);
11450 EVT VT0 = N0.getValueType();
11451 SDLoc DL(N);
11452 SDNodeFlags Flags = N->getFlags();
11453
11454 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11455 return V;
11456
11457 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11458 return V;
11459
11460 // select (not Cond), N1, N2 -> select Cond, N2, N1
11461 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11462 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11463 SelectOp->setFlags(Flags);
11464 return SelectOp;
11465 }
11466
11467 if (SDValue V = foldSelectOfConstants(N))
11468 return V;
11469
11470 // If we can fold this based on the true/false value, do so.
11471 if (SimplifySelectOps(N, N1, N2))
11472 return SDValue(N, 0); // Don't revisit N.
11473
11474 if (VT0 == MVT::i1) {
11475 // The code in this block deals with the following 2 equivalences:
11476 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11477 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11478 // The target can specify its preferred form with the
11479 // shouldNormalizeToSelectSequence() callback. However we always transform
11480 // to the right anyway if we find the inner select exists in the DAG anyway
11481 // and we always transform to the left side if we know that we can further
11482 // optimize the combination of the conditions.
11483 bool normalizeToSequence =
11485 // select (and Cond0, Cond1), X, Y
11486 // -> select Cond0, (select Cond1, X, Y), Y
11487 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11488 SDValue Cond0 = N0->getOperand(0);
11489 SDValue Cond1 = N0->getOperand(1);
11490 SDValue InnerSelect =
11491 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11492 if (normalizeToSequence || !InnerSelect.use_empty())
11493 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11494 InnerSelect, N2, Flags);
11495 // Cleanup on failure.
11496 if (InnerSelect.use_empty())
11497 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11498 }
11499 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11500 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11501 SDValue Cond0 = N0->getOperand(0);
11502 SDValue Cond1 = N0->getOperand(1);
11503 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11504 Cond1, N1, N2, Flags);
11505 if (normalizeToSequence || !InnerSelect.use_empty())
11506 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11507 InnerSelect, Flags);
11508 // Cleanup on failure.
11509 if (InnerSelect.use_empty())
11510 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11511 }
11512
11513 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11514 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11515 SDValue N1_0 = N1->getOperand(0);
11516 SDValue N1_1 = N1->getOperand(1);
11517 SDValue N1_2 = N1->getOperand(2);
11518 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11519 // Create the actual and node if we can generate good code for it.
11520 if (!normalizeToSequence) {
11521 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11522 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11523 N2, Flags);
11524 }
11525 // Otherwise see if we can optimize the "and" to a better pattern.
11526 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11527 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11528 N2, Flags);
11529 }
11530 }
11531 }
11532 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11533 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11534 SDValue N2_0 = N2->getOperand(0);
11535 SDValue N2_1 = N2->getOperand(1);
11536 SDValue N2_2 = N2->getOperand(2);
11537 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11538 // Create the actual or node if we can generate good code for it.
11539 if (!normalizeToSequence) {
11540 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11541 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11542 N2_2, Flags);
11543 }
11544 // Otherwise see if we can optimize to a better pattern.
11545 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11546 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11547 N2_2, Flags);
11548 }
11549 }
11550 }
11551
11552 // Fold selects based on a setcc into other things, such as min/max/abs.
11553 if (N0.getOpcode() == ISD::SETCC) {
11554 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11555 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11556
11557 // select (fcmp lt x, y), x, y -> fminnum x, y
11558 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11559 //
11560 // This is OK if we don't care what happens if either operand is a NaN.
11561 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11562 if (SDValue FMinMax =
11563 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11564 return FMinMax;
11565
11566 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11567 // This is conservatively limited to pre-legal-operations to give targets
11568 // a chance to reverse the transform if they want to do that. Also, it is
11569 // unlikely that the pattern would be formed late, so it's probably not
11570 // worth going through the other checks.
11571 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11572 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11573 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11574 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11575 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11576 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11577 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11578 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11579 //
11580 // The IR equivalent of this transform would have this form:
11581 // %a = add %x, C
11582 // %c = icmp ugt %x, ~C
11583 // %r = select %c, -1, %a
11584 // =>
11585 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11586 // %u0 = extractvalue %u, 0
11587 // %u1 = extractvalue %u, 1
11588 // %r = select %u1, -1, %u0
11589 SDVTList VTs = DAG.getVTList(VT, VT0);
11590 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11591 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11592 }
11593 }
11594
11595 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11596 (!LegalOperations &&
11598 // Any flags available in a select/setcc fold will be on the setcc as they
11599 // migrated from fcmp
11600 Flags = N0->getFlags();
11601 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11602 N2, N0.getOperand(2));
11603 SelectNode->setFlags(Flags);
11604 return SelectNode;
11605 }
11606
11607 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11608 return NewSel;
11609 }
11610
11611 if (!VT.isVector())
11612 if (SDValue BinOp = foldSelectOfBinops(N))
11613 return BinOp;
11614
11615 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11616 return R;
11617
11618 return SDValue();
11619}
11620
11621// This function assumes all the vselect's arguments are CONCAT_VECTOR
11622// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11624 SDLoc DL(N);
11625 SDValue Cond = N->getOperand(0);
11626 SDValue LHS = N->getOperand(1);
11627 SDValue RHS = N->getOperand(2);
11628 EVT VT = N->getValueType(0);
11629 int NumElems = VT.getVectorNumElements();
11630 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11631 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11632 Cond.getOpcode() == ISD::BUILD_VECTOR);
11633
11634 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11635 // binary ones here.
11636 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11637 return SDValue();
11638
11639 // We're sure we have an even number of elements due to the
11640 // concat_vectors we have as arguments to vselect.
11641 // Skip BV elements until we find one that's not an UNDEF
11642 // After we find an UNDEF element, keep looping until we get to half the
11643 // length of the BV and see if all the non-undef nodes are the same.
11644 ConstantSDNode *BottomHalf = nullptr;
11645 for (int i = 0; i < NumElems / 2; ++i) {
11646 if (Cond->getOperand(i)->isUndef())
11647 continue;
11648
11649 if (BottomHalf == nullptr)
11650 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11651 else if (Cond->getOperand(i).getNode() != BottomHalf)
11652 return SDValue();
11653 }
11654
11655 // Do the same for the second half of the BuildVector
11656 ConstantSDNode *TopHalf = nullptr;
11657 for (int i = NumElems / 2; i < NumElems; ++i) {
11658 if (Cond->getOperand(i)->isUndef())
11659 continue;
11660
11661 if (TopHalf == nullptr)
11662 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11663 else if (Cond->getOperand(i).getNode() != TopHalf)
11664 return SDValue();
11665 }
11666
11667 assert(TopHalf && BottomHalf &&
11668 "One half of the selector was all UNDEFs and the other was all the "
11669 "same value. This should have been addressed before this function.");
11670 return DAG.getNode(
11672 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11673 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11674}
11675
11676bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11677 SelectionDAG &DAG, const SDLoc &DL) {
11678
11679 // Only perform the transformation when existing operands can be reused.
11680 if (IndexIsScaled)
11681 return false;
11682
11683 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11684 return false;
11685
11686 EVT VT = BasePtr.getValueType();
11687
11688 if (SDValue SplatVal = DAG.getSplatValue(Index);
11689 SplatVal && !isNullConstant(SplatVal) &&
11690 SplatVal.getValueType() == VT) {
11691 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11692 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11693 return true;
11694 }
11695
11696 if (Index.getOpcode() != ISD::ADD)
11697 return false;
11698
11699 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11700 SplatVal && SplatVal.getValueType() == VT) {
11701 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11702 Index = Index.getOperand(1);
11703 return true;
11704 }
11705 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11706 SplatVal && SplatVal.getValueType() == VT) {
11707 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11708 Index = Index.getOperand(0);
11709 return true;
11710 }
11711 return false;
11712}
11713
11714// Fold sext/zext of index into index type.
11716 SelectionDAG &DAG) {
11717 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11718
11719 // It's always safe to look through zero extends.
11720 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11721 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11722 IndexType = ISD::UNSIGNED_SCALED;
11723 Index = Index.getOperand(0);
11724 return true;
11725 }
11726 if (ISD::isIndexTypeSigned(IndexType)) {
11727 IndexType = ISD::UNSIGNED_SCALED;
11728 return true;
11729 }
11730 }
11731
11732 // It's only safe to look through sign extends when Index is signed.
11733 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11734 ISD::isIndexTypeSigned(IndexType) &&
11735 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11736 Index = Index.getOperand(0);
11737 return true;
11738 }
11739
11740 return false;
11741}
11742
11743SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11744 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11745 SDValue Mask = MSC->getMask();
11746 SDValue Chain = MSC->getChain();
11747 SDValue Index = MSC->getIndex();
11748 SDValue Scale = MSC->getScale();
11749 SDValue StoreVal = MSC->getValue();
11750 SDValue BasePtr = MSC->getBasePtr();
11751 SDValue VL = MSC->getVectorLength();
11752 ISD::MemIndexType IndexType = MSC->getIndexType();
11753 SDLoc DL(N);
11754
11755 // Zap scatters with a zero mask.
11757 return Chain;
11758
11759 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11760 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11761 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11762 DL, Ops, MSC->getMemOperand(), IndexType);
11763 }
11764
11765 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11766 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11767 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11768 DL, Ops, MSC->getMemOperand(), IndexType);
11769 }
11770
11771 return SDValue();
11772}
11773
11774SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11775 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11776 SDValue Mask = MSC->getMask();
11777 SDValue Chain = MSC->getChain();
11778 SDValue Index = MSC->getIndex();
11779 SDValue Scale = MSC->getScale();
11780 SDValue StoreVal = MSC->getValue();
11781 SDValue BasePtr = MSC->getBasePtr();
11782 ISD::MemIndexType IndexType = MSC->getIndexType();
11783 SDLoc DL(N);
11784
11785 // Zap scatters with a zero mask.
11787 return Chain;
11788
11789 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11790 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11791 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11792 DL, Ops, MSC->getMemOperand(), IndexType,
11793 MSC->isTruncatingStore());
11794 }
11795
11796 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11797 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11798 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11799 DL, Ops, MSC->getMemOperand(), IndexType,
11800 MSC->isTruncatingStore());
11801 }
11802
11803 return SDValue();
11804}
11805
11806SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11807 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11808 SDValue Mask = MST->getMask();
11809 SDValue Chain = MST->getChain();
11810 SDValue Value = MST->getValue();
11811 SDValue Ptr = MST->getBasePtr();
11812 SDLoc DL(N);
11813
11814 // Zap masked stores with a zero mask.
11816 return Chain;
11817
11818 // Remove a masked store if base pointers and masks are equal.
11819 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11820 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11821 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11822 !MST->getBasePtr().isUndef() &&
11823 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11824 MST1->getMemoryVT().getStoreSize()) ||
11826 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11827 MST->getMemoryVT().getStoreSize())) {
11828 CombineTo(MST1, MST1->getChain());
11829 if (N->getOpcode() != ISD::DELETED_NODE)
11830 AddToWorklist(N);
11831 return SDValue(N, 0);
11832 }
11833 }
11834
11835 // If this is a masked load with an all ones mask, we can use a unmasked load.
11836 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11837 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11838 !MST->isCompressingStore() && !MST->isTruncatingStore())
11839 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11840 MST->getBasePtr(), MST->getPointerInfo(),
11841 MST->getOriginalAlign(),
11842 MST->getMemOperand()->getFlags(), MST->getAAInfo());
11843
11844 // Try transforming N to an indexed store.
11845 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11846 return SDValue(N, 0);
11847
11848 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11849 Value.getValueType().isInteger() &&
11850 (!isa<ConstantSDNode>(Value) ||
11851 !cast<ConstantSDNode>(Value)->isOpaque())) {
11852 APInt TruncDemandedBits =
11853 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11855
11856 // See if we can simplify the operation with
11857 // SimplifyDemandedBits, which only works if the value has a single use.
11858 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11859 // Re-visit the store if anything changed and the store hasn't been merged
11860 // with another node (N is deleted) SimplifyDemandedBits will add Value's
11861 // node back to the worklist if necessary, but we also need to re-visit
11862 // the Store node itself.
11863 if (N->getOpcode() != ISD::DELETED_NODE)
11864 AddToWorklist(N);
11865 return SDValue(N, 0);
11866 }
11867 }
11868
11869 // If this is a TRUNC followed by a masked store, fold this into a masked
11870 // truncating store. We can do this even if this is already a masked
11871 // truncstore.
11872 // TODO: Try combine to masked compress store if possiable.
11873 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11874 MST->isUnindexed() && !MST->isCompressingStore() &&
11875 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11876 MST->getMemoryVT(), LegalOperations)) {
11877 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11878 Value.getOperand(0).getValueType());
11879 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11880 MST->getOffset(), Mask, MST->getMemoryVT(),
11881 MST->getMemOperand(), MST->getAddressingMode(),
11882 /*IsTruncating=*/true);
11883 }
11884
11885 return SDValue();
11886}
11887
11888SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11889 auto *SST = cast<VPStridedStoreSDNode>(N);
11890 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11891 // Combine strided stores with unit-stride to a regular VP store.
11892 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11893 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11894 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11895 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11896 SST->getVectorLength(), SST->getMemoryVT(),
11897 SST->getMemOperand(), SST->getAddressingMode(),
11898 SST->isTruncatingStore(), SST->isCompressingStore());
11899 }
11900 return SDValue();
11901}
11902
11903SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11904 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11905 SDValue Mask = MGT->getMask();
11906 SDValue Chain = MGT->getChain();
11907 SDValue Index = MGT->getIndex();
11908 SDValue Scale = MGT->getScale();
11909 SDValue BasePtr = MGT->getBasePtr();
11910 SDValue VL = MGT->getVectorLength();
11911 ISD::MemIndexType IndexType = MGT->getIndexType();
11912 SDLoc DL(N);
11913
11914 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11915 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11916 return DAG.getGatherVP(
11917 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11918 Ops, MGT->getMemOperand(), IndexType);
11919 }
11920
11921 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11922 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11923 return DAG.getGatherVP(
11924 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11925 Ops, MGT->getMemOperand(), IndexType);
11926 }
11927
11928 return SDValue();
11929}
11930
11931SDValue DAGCombiner::visitMGATHER(SDNode *N) {
11932 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
11933 SDValue Mask = MGT->getMask();
11934 SDValue Chain = MGT->getChain();
11935 SDValue Index = MGT->getIndex();
11936 SDValue Scale = MGT->getScale();
11937 SDValue PassThru = MGT->getPassThru();
11938 SDValue BasePtr = MGT->getBasePtr();
11939 ISD::MemIndexType IndexType = MGT->getIndexType();
11940 SDLoc DL(N);
11941
11942 // Zap gathers with a zero mask.
11944 return CombineTo(N, PassThru, MGT->getChain());
11945
11946 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11947 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11948 return DAG.getMaskedGather(
11949 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11950 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11951 }
11952
11953 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11954 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11955 return DAG.getMaskedGather(
11956 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11957 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11958 }
11959
11960 return SDValue();
11961}
11962
11963SDValue DAGCombiner::visitMLOAD(SDNode *N) {
11964 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
11965 SDValue Mask = MLD->getMask();
11966 SDLoc DL(N);
11967
11968 // Zap masked loads with a zero mask.
11970 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
11971
11972 // If this is a masked load with an all ones mask, we can use a unmasked load.
11973 // FIXME: Can we do this for indexed, expanding, or extending loads?
11974 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
11975 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
11976 SDValue NewLd = DAG.getLoad(
11977 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
11978 MLD->getPointerInfo(), MLD->getOriginalAlign(),
11979 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
11980 return CombineTo(N, NewLd, NewLd.getValue(1));
11981 }
11982
11983 // Try transforming N to an indexed load.
11984 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11985 return SDValue(N, 0);
11986
11987 return SDValue();
11988}
11989
11990SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
11991 auto *SLD = cast<VPStridedLoadSDNode>(N);
11992 EVT EltVT = SLD->getValueType(0).getVectorElementType();
11993 // Combine strided loads with unit-stride to a regular VP load.
11994 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
11995 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11996 SDValue NewLd = DAG.getLoadVP(
11997 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
11998 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
11999 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12000 SLD->getMemOperand(), SLD->isExpandingLoad());
12001 return CombineTo(N, NewLd, NewLd.getValue(1));
12002 }
12003 return SDValue();
12004}
12005
12006/// A vector select of 2 constant vectors can be simplified to math/logic to
12007/// avoid a variable select instruction and possibly avoid constant loads.
12008SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12009 SDValue Cond = N->getOperand(0);
12010 SDValue N1 = N->getOperand(1);
12011 SDValue N2 = N->getOperand(2);
12012 EVT VT = N->getValueType(0);
12013 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12017 return SDValue();
12018
12019 // Check if we can use the condition value to increment/decrement a single
12020 // constant value. This simplifies a select to an add and removes a constant
12021 // load/materialization from the general case.
12022 bool AllAddOne = true;
12023 bool AllSubOne = true;
12024 unsigned Elts = VT.getVectorNumElements();
12025 for (unsigned i = 0; i != Elts; ++i) {
12026 SDValue N1Elt = N1.getOperand(i);
12027 SDValue N2Elt = N2.getOperand(i);
12028 if (N1Elt.isUndef() || N2Elt.isUndef())
12029 continue;
12030 if (N1Elt.getValueType() != N2Elt.getValueType())
12031 continue;
12032
12033 const APInt &C1 = N1Elt->getAsAPIntVal();
12034 const APInt &C2 = N2Elt->getAsAPIntVal();
12035 if (C1 != C2 + 1)
12036 AllAddOne = false;
12037 if (C1 != C2 - 1)
12038 AllSubOne = false;
12039 }
12040
12041 // Further simplifications for the extra-special cases where the constants are
12042 // all 0 or all -1 should be implemented as folds of these patterns.
12043 SDLoc DL(N);
12044 if (AllAddOne || AllSubOne) {
12045 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12046 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12047 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12048 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12049 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12050 }
12051
12052 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12053 APInt Pow2C;
12054 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12055 isNullOrNullSplat(N2)) {
12056 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12057 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12058 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12059 }
12060
12062 return V;
12063
12064 // The general case for select-of-constants:
12065 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12066 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12067 // leave that to a machine-specific pass.
12068 return SDValue();
12069}
12070
12071SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12072 SDValue N0 = N->getOperand(0);
12073 SDValue N1 = N->getOperand(1);
12074 SDValue N2 = N->getOperand(2);
12075
12076 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12077 return V;
12078
12079 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12080 return V;
12081
12082 return SDValue();
12083}
12084
12085SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12086 SDValue N0 = N->getOperand(0);
12087 SDValue N1 = N->getOperand(1);
12088 SDValue N2 = N->getOperand(2);
12089 EVT VT = N->getValueType(0);
12090 SDLoc DL(N);
12091
12092 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12093 return V;
12094
12095 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12096 return V;
12097
12098 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12099 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12100 return DAG.getSelect(DL, VT, F, N2, N1);
12101
12102 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12103 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12106 TLI.getBooleanContents(N0.getValueType()) ==
12108 return DAG.getNode(
12109 ISD::ADD, DL, N1.getValueType(), N2,
12110 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12111 }
12112
12113 // Canonicalize integer abs.
12114 // vselect (setg[te] X, 0), X, -X ->
12115 // vselect (setgt X, -1), X, -X ->
12116 // vselect (setl[te] X, 0), -X, X ->
12117 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12118 if (N0.getOpcode() == ISD::SETCC) {
12119 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12120 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12121 bool isAbs = false;
12122 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12123
12124 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12125 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12126 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12128 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12129 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12131
12132 if (isAbs) {
12134 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12135
12136 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12137 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12138 DL, getShiftAmountTy(VT)));
12139 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12140 AddToWorklist(Shift.getNode());
12141 AddToWorklist(Add.getNode());
12142 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12143 }
12144
12145 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12146 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12147 //
12148 // This is OK if we don't care about what happens if either operand is a
12149 // NaN.
12150 //
12151 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12152 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12153 return FMinMax;
12154 }
12155
12156 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12157 return S;
12158 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12159 return S;
12160
12161 // If this select has a condition (setcc) with narrower operands than the
12162 // select, try to widen the compare to match the select width.
12163 // TODO: This should be extended to handle any constant.
12164 // TODO: This could be extended to handle non-loading patterns, but that
12165 // requires thorough testing to avoid regressions.
12166 if (isNullOrNullSplat(RHS)) {
12167 EVT NarrowVT = LHS.getValueType();
12169 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12170 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12171 unsigned WideWidth = WideVT.getScalarSizeInBits();
12172 bool IsSigned = isSignedIntSetCC(CC);
12173 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12174 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12175 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12176 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12177 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12178 // Both compare operands can be widened for free. The LHS can use an
12179 // extended load, and the RHS is a constant:
12180 // vselect (ext (setcc load(X), C)), N1, N2 -->
12181 // vselect (setcc extload(X), C'), N1, N2
12182 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12183 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12184 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12185 EVT WideSetCCVT = getSetCCResultType(WideVT);
12186 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12187 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12188 }
12189 }
12190
12191 // Match VSELECTs with absolute difference patterns.
12192 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12193 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12194 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12195 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12196 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12197 N1.getOperand(0) == N2.getOperand(1) &&
12198 N1.getOperand(1) == N2.getOperand(0)) {
12199 bool IsSigned = isSignedIntSetCC(CC);
12200 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12201 if (hasOperation(ABDOpc, VT)) {
12202 switch (CC) {
12203 case ISD::SETGT:
12204 case ISD::SETGE:
12205 case ISD::SETUGT:
12206 case ISD::SETUGE:
12207 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12208 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12209 break;
12210 case ISD::SETLT:
12211 case ISD::SETLE:
12212 case ISD::SETULT:
12213 case ISD::SETULE:
12214 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12215 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12216 break;
12217 default:
12218 break;
12219 }
12220 }
12221 }
12222
12223 // Match VSELECTs into add with unsigned saturation.
12224 if (hasOperation(ISD::UADDSAT, VT)) {
12225 // Check if one of the arms of the VSELECT is vector with all bits set.
12226 // If it's on the left side invert the predicate to simplify logic below.
12227 SDValue Other;
12228 ISD::CondCode SatCC = CC;
12230 Other = N2;
12231 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12232 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12233 Other = N1;
12234 }
12235
12236 if (Other && Other.getOpcode() == ISD::ADD) {
12237 SDValue CondLHS = LHS, CondRHS = RHS;
12238 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12239
12240 // Canonicalize condition operands.
12241 if (SatCC == ISD::SETUGE) {
12242 std::swap(CondLHS, CondRHS);
12243 SatCC = ISD::SETULE;
12244 }
12245
12246 // We can test against either of the addition operands.
12247 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12248 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12249 if (SatCC == ISD::SETULE && Other == CondRHS &&
12250 (OpLHS == CondLHS || OpRHS == CondLHS))
12251 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12252
12253 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12254 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12255 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12256 CondLHS == OpLHS) {
12257 // If the RHS is a constant we have to reverse the const
12258 // canonicalization.
12259 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12260 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12261 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12262 };
12263 if (SatCC == ISD::SETULE &&
12264 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12265 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12266 }
12267 }
12268 }
12269
12270 // Match VSELECTs into sub with unsigned saturation.
12271 if (hasOperation(ISD::USUBSAT, VT)) {
12272 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12273 // the left side invert the predicate to simplify logic below.
12274 SDValue Other;
12275 ISD::CondCode SatCC = CC;
12277 Other = N2;
12278 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12280 Other = N1;
12281 }
12282
12283 // zext(x) >= y ? trunc(zext(x) - y) : 0
12284 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12285 // zext(x) > y ? trunc(zext(x) - y) : 0
12286 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12287 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12288 Other.getOperand(0).getOpcode() == ISD::SUB &&
12289 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12290 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12291 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12292 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12293 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12294 DAG, DL))
12295 return R;
12296 }
12297
12298 if (Other && Other.getNumOperands() == 2) {
12299 SDValue CondRHS = RHS;
12300 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12301
12302 if (OpLHS == LHS) {
12303 // Look for a general sub with unsigned saturation first.
12304 // x >= y ? x-y : 0 --> usubsat x, y
12305 // x > y ? x-y : 0 --> usubsat x, y
12306 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12307 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12308 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12309
12310 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12311 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12312 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12313 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12314 // If the RHS is a constant we have to reverse the const
12315 // canonicalization.
12316 // x > C-1 ? x+-C : 0 --> usubsat x, C
12317 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12318 return (!Op && !Cond) ||
12319 (Op && Cond &&
12320 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12321 };
12322 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12323 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12324 /*AllowUndefs*/ true)) {
12325 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12326 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12327 }
12328
12329 // Another special case: If C was a sign bit, the sub has been
12330 // canonicalized into a xor.
12331 // FIXME: Would it be better to use computeKnownBits to
12332 // determine whether it's safe to decanonicalize the xor?
12333 // x s< 0 ? x^C : 0 --> usubsat x, C
12334 APInt SplatValue;
12335 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12336 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12338 SplatValue.isSignMask()) {
12339 // Note that we have to rebuild the RHS constant here to
12340 // ensure we don't rely on particular values of undef lanes.
12341 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12342 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12343 }
12344 }
12345 }
12346 }
12347 }
12348 }
12349 }
12350
12351 if (SimplifySelectOps(N, N1, N2))
12352 return SDValue(N, 0); // Don't revisit N.
12353
12354 // Fold (vselect all_ones, N1, N2) -> N1
12356 return N1;
12357 // Fold (vselect all_zeros, N1, N2) -> N2
12359 return N2;
12360
12361 // The ConvertSelectToConcatVector function is assuming both the above
12362 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12363 // and addressed.
12364 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12367 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12368 return CV;
12369 }
12370
12371 if (SDValue V = foldVSelectOfConstants(N))
12372 return V;
12373
12374 if (hasOperation(ISD::SRA, VT))
12376 return V;
12377
12379 return SDValue(N, 0);
12380
12381 return SDValue();
12382}
12383
12384SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12385 SDValue N0 = N->getOperand(0);
12386 SDValue N1 = N->getOperand(1);
12387 SDValue N2 = N->getOperand(2);
12388 SDValue N3 = N->getOperand(3);
12389 SDValue N4 = N->getOperand(4);
12390 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12391
12392 // fold select_cc lhs, rhs, x, x, cc -> x
12393 if (N2 == N3)
12394 return N2;
12395
12396 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12397 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12398 isNullConstant(N1))
12399 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12400
12401 // Determine if the condition we're dealing with is constant
12402 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12403 CC, SDLoc(N), false)) {
12404 AddToWorklist(SCC.getNode());
12405
12406 // cond always true -> true val
12407 // cond always false -> false val
12408 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12409 return SCCC->isZero() ? N3 : N2;
12410
12411 // When the condition is UNDEF, just return the first operand. This is
12412 // coherent the DAG creation, no setcc node is created in this case
12413 if (SCC->isUndef())
12414 return N2;
12415
12416 // Fold to a simpler select_cc
12417 if (SCC.getOpcode() == ISD::SETCC) {
12418 SDValue SelectOp = DAG.getNode(
12419 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12420 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12421 SelectOp->setFlags(SCC->getFlags());
12422 return SelectOp;
12423 }
12424 }
12425
12426 // If we can fold this based on the true/false value, do so.
12427 if (SimplifySelectOps(N, N2, N3))
12428 return SDValue(N, 0); // Don't revisit N.
12429
12430 // fold select_cc into other things, such as min/max/abs
12431 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12432}
12433
12434SDValue DAGCombiner::visitSETCC(SDNode *N) {
12435 // setcc is very commonly used as an argument to brcond. This pattern
12436 // also lend itself to numerous combines and, as a result, it is desired
12437 // we keep the argument to a brcond as a setcc as much as possible.
12438 bool PreferSetCC =
12439 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12440
12441 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12442 EVT VT = N->getValueType(0);
12443 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12444
12445 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12446
12447 if (Combined) {
12448 // If we prefer to have a setcc, and we don't, we'll try our best to
12449 // recreate one using rebuildSetCC.
12450 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12451 SDValue NewSetCC = rebuildSetCC(Combined);
12452
12453 // We don't have anything interesting to combine to.
12454 if (NewSetCC.getNode() == N)
12455 return SDValue();
12456
12457 if (NewSetCC)
12458 return NewSetCC;
12459 }
12460 return Combined;
12461 }
12462
12463 // Optimize
12464 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12465 // or
12466 // 2) (icmp eq/ne X, (rotate X, C1))
12467 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12468 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12469 // Then:
12470 // If C1 is a power of 2, then the rotate and shift+and versions are
12471 // equivilent, so we can interchange them depending on target preference.
12472 // Otherwise, if we have the shift+and version we can interchange srl/shl
12473 // which inturn affects the constant C0. We can use this to get better
12474 // constants again determined by target preference.
12475 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12476 auto IsAndWithShift = [](SDValue A, SDValue B) {
12477 return A.getOpcode() == ISD::AND &&
12478 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12479 A.getOperand(0) == B.getOperand(0);
12480 };
12481 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12482 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12483 B.getOperand(0) == A;
12484 };
12485 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12486 bool IsRotate = false;
12487
12488 // Find either shift+and or rotate pattern.
12489 if (IsAndWithShift(N0, N1)) {
12490 AndOrOp = N0;
12491 ShiftOrRotate = N1;
12492 } else if (IsAndWithShift(N1, N0)) {
12493 AndOrOp = N1;
12494 ShiftOrRotate = N0;
12495 } else if (IsRotateWithOp(N0, N1)) {
12496 IsRotate = true;
12497 AndOrOp = N0;
12498 ShiftOrRotate = N1;
12499 } else if (IsRotateWithOp(N1, N0)) {
12500 IsRotate = true;
12501 AndOrOp = N1;
12502 ShiftOrRotate = N0;
12503 }
12504
12505 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12506 (IsRotate || AndOrOp.hasOneUse())) {
12507 EVT OpVT = N0.getValueType();
12508 // Get constant shift/rotate amount and possibly mask (if its shift+and
12509 // variant).
12510 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12511 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12512 /*AllowTrunc*/ false);
12513 if (CNode == nullptr)
12514 return std::nullopt;
12515 return CNode->getAPIntValue();
12516 };
12517 std::optional<APInt> AndCMask =
12518 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12519 std::optional<APInt> ShiftCAmt =
12520 GetAPIntValue(ShiftOrRotate.getOperand(1));
12521 unsigned NumBits = OpVT.getScalarSizeInBits();
12522
12523 // We found constants.
12524 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12525 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12526 // Check that the constants meet the constraints.
12527 bool CanTransform = IsRotate;
12528 if (!CanTransform) {
12529 // Check that mask and shift compliment eachother
12530 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12531 // Check that we are comparing all bits
12532 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12533 // Check that the and mask is correct for the shift
12534 CanTransform &=
12535 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12536 }
12537
12538 // See if target prefers another shift/rotate opcode.
12539 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12540 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12541 // Transform is valid and we have a new preference.
12542 if (CanTransform && NewShiftOpc != ShiftOpc) {
12543 SDLoc DL(N);
12544 SDValue NewShiftOrRotate =
12545 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12546 ShiftOrRotate.getOperand(1));
12547 SDValue NewAndOrOp = SDValue();
12548
12549 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12550 APInt NewMask =
12551 NewShiftOpc == ISD::SHL
12552 ? APInt::getHighBitsSet(NumBits,
12553 NumBits - ShiftCAmt->getZExtValue())
12554 : APInt::getLowBitsSet(NumBits,
12555 NumBits - ShiftCAmt->getZExtValue());
12556 NewAndOrOp =
12557 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12558 DAG.getConstant(NewMask, DL, OpVT));
12559 } else {
12560 NewAndOrOp = ShiftOrRotate.getOperand(0);
12561 }
12562
12563 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12564 }
12565 }
12566 }
12567 }
12568 return SDValue();
12569}
12570
12571SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12572 SDValue LHS = N->getOperand(0);
12573 SDValue RHS = N->getOperand(1);
12574 SDValue Carry = N->getOperand(2);
12575 SDValue Cond = N->getOperand(3);
12576
12577 // If Carry is false, fold to a regular SETCC.
12578 if (isNullConstant(Carry))
12579 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12580
12581 return SDValue();
12582}
12583
12584/// Check if N satisfies:
12585/// N is used once.
12586/// N is a Load.
12587/// The load is compatible with ExtOpcode. It means
12588/// If load has explicit zero/sign extension, ExpOpcode must have the same
12589/// extension.
12590/// Otherwise returns true.
12591static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12592 if (!N.hasOneUse())
12593 return false;
12594
12595 if (!isa<LoadSDNode>(N))
12596 return false;
12597
12598 LoadSDNode *Load = cast<LoadSDNode>(N);
12599 ISD::LoadExtType LoadExt = Load->getExtensionType();
12600 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12601 return true;
12602
12603 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12604 // extension.
12605 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12606 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12607 return false;
12608
12609 return true;
12610}
12611
12612/// Fold
12613/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12614/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12615/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12616/// This function is called by the DAGCombiner when visiting sext/zext/aext
12617/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12619 SelectionDAG &DAG,
12620 CombineLevel Level) {
12621 unsigned Opcode = N->getOpcode();
12622 SDValue N0 = N->getOperand(0);
12623 EVT VT = N->getValueType(0);
12624 SDLoc DL(N);
12625
12626 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12627 Opcode == ISD::ANY_EXTEND) &&
12628 "Expected EXTEND dag node in input!");
12629
12630 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12631 !N0.hasOneUse())
12632 return SDValue();
12633
12634 SDValue Op1 = N0->getOperand(1);
12635 SDValue Op2 = N0->getOperand(2);
12636 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12637 return SDValue();
12638
12639 auto ExtLoadOpcode = ISD::EXTLOAD;
12640 if (Opcode == ISD::SIGN_EXTEND)
12641 ExtLoadOpcode = ISD::SEXTLOAD;
12642 else if (Opcode == ISD::ZERO_EXTEND)
12643 ExtLoadOpcode = ISD::ZEXTLOAD;
12644
12645 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12646 // Combine2), so we should conservatively check the OperationAction.
12647 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12648 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12649 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12650 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12651 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12653 return SDValue();
12654
12655 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12656 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12657 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12658}
12659
12660/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12661/// a build_vector of constants.
12662/// This function is called by the DAGCombiner when visiting sext/zext/aext
12663/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12664/// Vector extends are not folded if operations are legal; this is to
12665/// avoid introducing illegal build_vector dag nodes.
12667 const TargetLowering &TLI,
12668 SelectionDAG &DAG, bool LegalTypes) {
12669 unsigned Opcode = N->getOpcode();
12670 SDValue N0 = N->getOperand(0);
12671 EVT VT = N->getValueType(0);
12672
12673 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12674 "Expected EXTEND dag node in input!");
12675
12676 // fold (sext c1) -> c1
12677 // fold (zext c1) -> c1
12678 // fold (aext c1) -> c1
12679 if (isa<ConstantSDNode>(N0))
12680 return DAG.getNode(Opcode, DL, VT, N0);
12681
12682 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12683 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12684 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12685 if (N0->getOpcode() == ISD::SELECT) {
12686 SDValue Op1 = N0->getOperand(1);
12687 SDValue Op2 = N0->getOperand(2);
12688 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12689 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12690 // For any_extend, choose sign extension of the constants to allow a
12691 // possible further transform to sign_extend_inreg.i.e.
12692 //
12693 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12694 // t2: i64 = any_extend t1
12695 // -->
12696 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12697 // -->
12698 // t4: i64 = sign_extend_inreg t3
12699 unsigned FoldOpc = Opcode;
12700 if (FoldOpc == ISD::ANY_EXTEND)
12701 FoldOpc = ISD::SIGN_EXTEND;
12702 return DAG.getSelect(DL, VT, N0->getOperand(0),
12703 DAG.getNode(FoldOpc, DL, VT, Op1),
12704 DAG.getNode(FoldOpc, DL, VT, Op2));
12705 }
12706 }
12707
12708 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12709 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12710 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12711 EVT SVT = VT.getScalarType();
12712 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12714 return SDValue();
12715
12716 // We can fold this node into a build_vector.
12717 unsigned VTBits = SVT.getSizeInBits();
12718 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12720 unsigned NumElts = VT.getVectorNumElements();
12721
12722 for (unsigned i = 0; i != NumElts; ++i) {
12723 SDValue Op = N0.getOperand(i);
12724 if (Op.isUndef()) {
12725 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12726 Elts.push_back(DAG.getUNDEF(SVT));
12727 else
12728 Elts.push_back(DAG.getConstant(0, DL, SVT));
12729 continue;
12730 }
12731
12732 SDLoc DL(Op);
12733 // Get the constant value and if needed trunc it to the size of the type.
12734 // Nodes like build_vector might have constants wider than the scalar type.
12735 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12736 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12737 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12738 else
12739 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12740 }
12741
12742 return DAG.getBuildVector(VT, DL, Elts);
12743}
12744
12745// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12746// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12747// transformation. Returns true if extension are possible and the above
12748// mentioned transformation is profitable.
12750 unsigned ExtOpc,
12751 SmallVectorImpl<SDNode *> &ExtendNodes,
12752 const TargetLowering &TLI) {
12753 bool HasCopyToRegUses = false;
12754 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12755 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12756 ++UI) {
12757 SDNode *User = *UI;
12758 if (User == N)
12759 continue;
12760 if (UI.getUse().getResNo() != N0.getResNo())
12761 continue;
12762 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12763 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12764 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12765 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12766 // Sign bits will be lost after a zext.
12767 return false;
12768 bool Add = false;
12769 for (unsigned i = 0; i != 2; ++i) {
12770 SDValue UseOp = User->getOperand(i);
12771 if (UseOp == N0)
12772 continue;
12773 if (!isa<ConstantSDNode>(UseOp))
12774 return false;
12775 Add = true;
12776 }
12777 if (Add)
12778 ExtendNodes.push_back(User);
12779 continue;
12780 }
12781 // If truncates aren't free and there are users we can't
12782 // extend, it isn't worthwhile.
12783 if (!isTruncFree)
12784 return false;
12785 // Remember if this value is live-out.
12786 if (User->getOpcode() == ISD::CopyToReg)
12787 HasCopyToRegUses = true;
12788 }
12789
12790 if (HasCopyToRegUses) {
12791 bool BothLiveOut = false;
12792 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12793 UI != UE; ++UI) {
12794 SDUse &Use = UI.getUse();
12795 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12796 BothLiveOut = true;
12797 break;
12798 }
12799 }
12800 if (BothLiveOut)
12801 // Both unextended and extended values are live out. There had better be
12802 // a good reason for the transformation.
12803 return !ExtendNodes.empty();
12804 }
12805 return true;
12806}
12807
12808void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12809 SDValue OrigLoad, SDValue ExtLoad,
12810 ISD::NodeType ExtType) {
12811 // Extend SetCC uses if necessary.
12812 SDLoc DL(ExtLoad);
12813 for (SDNode *SetCC : SetCCs) {
12815
12816 for (unsigned j = 0; j != 2; ++j) {
12817 SDValue SOp = SetCC->getOperand(j);
12818 if (SOp == OrigLoad)
12819 Ops.push_back(ExtLoad);
12820 else
12821 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12822 }
12823
12824 Ops.push_back(SetCC->getOperand(2));
12825 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12826 }
12827}
12828
12829// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12830SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12831 SDValue N0 = N->getOperand(0);
12832 EVT DstVT = N->getValueType(0);
12833 EVT SrcVT = N0.getValueType();
12834
12835 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12836 N->getOpcode() == ISD::ZERO_EXTEND) &&
12837 "Unexpected node type (not an extend)!");
12838
12839 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12840 // For example, on a target with legal v4i32, but illegal v8i32, turn:
12841 // (v8i32 (sext (v8i16 (load x))))
12842 // into:
12843 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12844 // (v4i32 (sextload (x + 16)))))
12845 // Where uses of the original load, i.e.:
12846 // (v8i16 (load x))
12847 // are replaced with:
12848 // (v8i16 (truncate
12849 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12850 // (v4i32 (sextload (x + 16)))))))
12851 //
12852 // This combine is only applicable to illegal, but splittable, vectors.
12853 // All legal types, and illegal non-vector types, are handled elsewhere.
12854 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12855 //
12856 if (N0->getOpcode() != ISD::LOAD)
12857 return SDValue();
12858
12859 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12860
12861 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12862 !N0.hasOneUse() || !LN0->isSimple() ||
12863 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12865 return SDValue();
12866
12868 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12869 return SDValue();
12870
12871 ISD::LoadExtType ExtType =
12872 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12873
12874 // Try to split the vector types to get down to legal types.
12875 EVT SplitSrcVT = SrcVT;
12876 EVT SplitDstVT = DstVT;
12877 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12878 SplitSrcVT.getVectorNumElements() > 1) {
12879 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12880 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12881 }
12882
12883 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12884 return SDValue();
12885
12886 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
12887
12888 SDLoc DL(N);
12889 const unsigned NumSplits =
12890 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12891 const unsigned Stride = SplitSrcVT.getStoreSize();
12894
12895 SDValue BasePtr = LN0->getBasePtr();
12896 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12897 const unsigned Offset = Idx * Stride;
12898
12899 SDValue SplitLoad =
12900 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
12901 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
12902 SplitSrcVT, LN0->getOriginalAlign(),
12903 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12904
12905 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
12906
12907 Loads.push_back(SplitLoad.getValue(0));
12908 Chains.push_back(SplitLoad.getValue(1));
12909 }
12910
12911 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12912 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12913
12914 // Simplify TF.
12915 AddToWorklist(NewChain.getNode());
12916
12917 CombineTo(N, NewValue);
12918
12919 // Replace uses of the original load (before extension)
12920 // with a truncate of the concatenated sextloaded vectors.
12921 SDValue Trunc =
12922 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12923 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12924 CombineTo(N0.getNode(), Trunc, NewChain);
12925 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12926}
12927
12928// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12929// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12930SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12931 assert(N->getOpcode() == ISD::ZERO_EXTEND);
12932 EVT VT = N->getValueType(0);
12933 EVT OrigVT = N->getOperand(0).getValueType();
12934 if (TLI.isZExtFree(OrigVT, VT))
12935 return SDValue();
12936
12937 // and/or/xor
12938 SDValue N0 = N->getOperand(0);
12939 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
12940 N0.getOperand(1).getOpcode() != ISD::Constant ||
12941 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12942 return SDValue();
12943
12944 // shl/shr
12945 SDValue N1 = N0->getOperand(0);
12946 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12947 N1.getOperand(1).getOpcode() != ISD::Constant ||
12948 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12949 return SDValue();
12950
12951 // load
12952 if (!isa<LoadSDNode>(N1.getOperand(0)))
12953 return SDValue();
12954 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
12955 EVT MemVT = Load->getMemoryVT();
12956 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
12957 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
12958 return SDValue();
12959
12960
12961 // If the shift op is SHL, the logic op must be AND, otherwise the result
12962 // will be wrong.
12963 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
12964 return SDValue();
12965
12966 if (!N0.hasOneUse() || !N1.hasOneUse())
12967 return SDValue();
12968
12970 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
12971 ISD::ZERO_EXTEND, SetCCs, TLI))
12972 return SDValue();
12973
12974 // Actually do the transformation.
12975 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
12976 Load->getChain(), Load->getBasePtr(),
12977 Load->getMemoryVT(), Load->getMemOperand());
12978
12979 SDLoc DL1(N1);
12980 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
12981 N1.getOperand(1));
12982
12984 SDLoc DL0(N0);
12985 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
12986 DAG.getConstant(Mask, DL0, VT));
12987
12988 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12989 CombineTo(N, And);
12990 if (SDValue(Load, 0).hasOneUse()) {
12991 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
12992 } else {
12993 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
12994 Load->getValueType(0), ExtLoad);
12995 CombineTo(Load, Trunc, ExtLoad.getValue(1));
12996 }
12997
12998 // N0 is dead at this point.
12999 recursivelyDeleteUnusedNodes(N0.getNode());
13000
13001 return SDValue(N,0); // Return N so it doesn't get rechecked!
13002}
13003
13004/// If we're narrowing or widening the result of a vector select and the final
13005/// size is the same size as a setcc (compare) feeding the select, then try to
13006/// apply the cast operation to the select's operands because matching vector
13007/// sizes for a select condition and other operands should be more efficient.
13008SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13009 unsigned CastOpcode = Cast->getOpcode();
13010 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13011 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13012 CastOpcode == ISD::FP_ROUND) &&
13013 "Unexpected opcode for vector select narrowing/widening");
13014
13015 // We only do this transform before legal ops because the pattern may be
13016 // obfuscated by target-specific operations after legalization. Do not create
13017 // an illegal select op, however, because that may be difficult to lower.
13018 EVT VT = Cast->getValueType(0);
13019 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13020 return SDValue();
13021
13022 SDValue VSel = Cast->getOperand(0);
13023 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13024 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13025 return SDValue();
13026
13027 // Does the setcc have the same vector size as the casted select?
13028 SDValue SetCC = VSel.getOperand(0);
13029 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13030 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13031 return SDValue();
13032
13033 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13034 SDValue A = VSel.getOperand(1);
13035 SDValue B = VSel.getOperand(2);
13036 SDValue CastA, CastB;
13037 SDLoc DL(Cast);
13038 if (CastOpcode == ISD::FP_ROUND) {
13039 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13040 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13041 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13042 } else {
13043 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13044 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13045 }
13046 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13047}
13048
13049// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13050// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13052 const TargetLowering &TLI, EVT VT,
13053 bool LegalOperations, SDNode *N,
13054 SDValue N0, ISD::LoadExtType ExtLoadType) {
13055 SDNode *N0Node = N0.getNode();
13056 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13057 : ISD::isZEXTLoad(N0Node);
13058 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13059 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13060 return SDValue();
13061
13062 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13063 EVT MemVT = LN0->getMemoryVT();
13064 if ((LegalOperations || !LN0->isSimple() ||
13065 VT.isVector()) &&
13066 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13067 return SDValue();
13068
13069 SDValue ExtLoad =
13070 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13071 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13072 Combiner.CombineTo(N, ExtLoad);
13073 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13074 if (LN0->use_empty())
13075 Combiner.recursivelyDeleteUnusedNodes(LN0);
13076 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13077}
13078
13079// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13080// Only generate vector extloads when 1) they're legal, and 2) they are
13081// deemed desirable by the target. NonNegZExt can be set to true if a zero
13082// extend has the nonneg flag to allow use of sextload if profitable.
13084 const TargetLowering &TLI, EVT VT,
13085 bool LegalOperations, SDNode *N, SDValue N0,
13086 ISD::LoadExtType ExtLoadType,
13087 ISD::NodeType ExtOpc,
13088 bool NonNegZExt = false) {
13090 return {};
13091
13092 // If this is zext nneg, see if it would make sense to treat it as a sext.
13093 if (NonNegZExt) {
13094 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13095 "Unexpected load type or opcode");
13096 for (SDNode *User : N0->uses()) {
13097 if (User->getOpcode() == ISD::SETCC) {
13098 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13100 ExtLoadType = ISD::SEXTLOAD;
13101 ExtOpc = ISD::SIGN_EXTEND;
13102 break;
13103 }
13104 }
13105 }
13106 }
13107
13108 // TODO: isFixedLengthVector() should be removed and any negative effects on
13109 // code generation being the result of that target's implementation of
13110 // isVectorLoadExtDesirable().
13111 if ((LegalOperations || VT.isFixedLengthVector() ||
13112 !cast<LoadSDNode>(N0)->isSimple()) &&
13113 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13114 return {};
13115
13116 bool DoXform = true;
13118 if (!N0.hasOneUse())
13119 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13120 if (VT.isVector())
13121 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13122 if (!DoXform)
13123 return {};
13124
13125 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13126 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13127 LN0->getBasePtr(), N0.getValueType(),
13128 LN0->getMemOperand());
13129 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13130 // If the load value is used only by N, replace it via CombineTo N.
13131 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13132 Combiner.CombineTo(N, ExtLoad);
13133 if (NoReplaceTrunc) {
13134 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13135 Combiner.recursivelyDeleteUnusedNodes(LN0);
13136 } else {
13137 SDValue Trunc =
13138 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13139 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13140 }
13141 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13142}
13143
13144static SDValue
13146 bool LegalOperations, SDNode *N, SDValue N0,
13147 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13148 if (!N0.hasOneUse())
13149 return SDValue();
13150
13151 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13152 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13153 return SDValue();
13154
13155 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13156 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13157 return SDValue();
13158
13159 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13160 return SDValue();
13161
13162 SDLoc dl(Ld);
13163 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13164 SDValue NewLoad = DAG.getMaskedLoad(
13165 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13166 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13167 ExtLoadType, Ld->isExpandingLoad());
13168 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13169 return NewLoad;
13170}
13171
13172// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13174 const TargetLowering &TLI, EVT VT,
13175 SDValue N0,
13176 ISD::LoadExtType ExtLoadType) {
13177 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13178 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13179 return {};
13180 EVT MemoryVT = ALoad->getMemoryVT();
13181 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13182 return {};
13183 // Can't fold into ALoad if it is already extending differently.
13184 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13185 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13186 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13187 return {};
13188
13189 EVT OrigVT = ALoad->getValueType(0);
13190 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13191 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13192 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13193 ALoad->getBasePtr(), ALoad->getMemOperand()));
13194 NewALoad->setExtensionType(ExtLoadType);
13196 SDValue(ALoad, 0),
13197 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13198 // Update the chain uses.
13199 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13200 return SDValue(NewALoad, 0);
13201}
13202
13204 bool LegalOperations) {
13205 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13206 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13207
13208 SDValue SetCC = N->getOperand(0);
13209 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13210 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13211 return SDValue();
13212
13213 SDValue X = SetCC.getOperand(0);
13214 SDValue Ones = SetCC.getOperand(1);
13215 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13216 EVT VT = N->getValueType(0);
13217 EVT XVT = X.getValueType();
13218 // setge X, C is canonicalized to setgt, so we do not need to match that
13219 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13220 // not require the 'not' op.
13221 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13222 // Invert and smear/shift the sign bit:
13223 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13224 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13225 SDLoc DL(N);
13226 unsigned ShCt = VT.getSizeInBits() - 1;
13227 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13228 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13229 SDValue NotX = DAG.getNOT(DL, X, VT);
13230 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13231 auto ShiftOpcode =
13232 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13233 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13234 }
13235 }
13236 return SDValue();
13237}
13238
13239SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13240 SDValue N0 = N->getOperand(0);
13241 if (N0.getOpcode() != ISD::SETCC)
13242 return SDValue();
13243
13244 SDValue N00 = N0.getOperand(0);
13245 SDValue N01 = N0.getOperand(1);
13246 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13247 EVT VT = N->getValueType(0);
13248 EVT N00VT = N00.getValueType();
13249 SDLoc DL(N);
13250
13251 // Propagate fast-math-flags.
13252 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13253
13254 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13255 // the same size as the compared operands. Try to optimize sext(setcc())
13256 // if this is the case.
13257 if (VT.isVector() && !LegalOperations &&
13258 TLI.getBooleanContents(N00VT) ==
13260 EVT SVT = getSetCCResultType(N00VT);
13261
13262 // If we already have the desired type, don't change it.
13263 if (SVT != N0.getValueType()) {
13264 // We know that the # elements of the results is the same as the
13265 // # elements of the compare (and the # elements of the compare result
13266 // for that matter). Check to see that they are the same size. If so,
13267 // we know that the element size of the sext'd result matches the
13268 // element size of the compare operands.
13269 if (VT.getSizeInBits() == SVT.getSizeInBits())
13270 return DAG.getSetCC(DL, VT, N00, N01, CC);
13271
13272 // If the desired elements are smaller or larger than the source
13273 // elements, we can use a matching integer vector type and then
13274 // truncate/sign extend.
13275 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13276 if (SVT == MatchingVecType) {
13277 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13278 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13279 }
13280 }
13281
13282 // Try to eliminate the sext of a setcc by zexting the compare operands.
13283 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13285 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13286 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13287 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13288
13289 // We have an unsupported narrow vector compare op that would be legal
13290 // if extended to the destination type. See if the compare operands
13291 // can be freely extended to the destination type.
13292 auto IsFreeToExtend = [&](SDValue V) {
13293 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13294 return true;
13295 // Match a simple, non-extended load that can be converted to a
13296 // legal {z/s}ext-load.
13297 // TODO: Allow widening of an existing {z/s}ext-load?
13298 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13299 ISD::isUNINDEXEDLoad(V.getNode()) &&
13300 cast<LoadSDNode>(V)->isSimple() &&
13301 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13302 return false;
13303
13304 // Non-chain users of this value must either be the setcc in this
13305 // sequence or extends that can be folded into the new {z/s}ext-load.
13306 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13307 UI != UE; ++UI) {
13308 // Skip uses of the chain and the setcc.
13309 SDNode *User = *UI;
13310 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13311 continue;
13312 // Extra users must have exactly the same cast we are about to create.
13313 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13314 // is enhanced similarly.
13315 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13316 return false;
13317 }
13318 return true;
13319 };
13320
13321 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13322 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13323 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13324 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13325 }
13326 }
13327 }
13328
13329 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13330 // Here, T can be 1 or -1, depending on the type of the setcc and
13331 // getBooleanContents().
13332 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13333
13334 // To determine the "true" side of the select, we need to know the high bit
13335 // of the value returned by the setcc if it evaluates to true.
13336 // If the type of the setcc is i1, then the true case of the select is just
13337 // sext(i1 1), that is, -1.
13338 // If the type of the setcc is larger (say, i8) then the value of the high
13339 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13340 // of the appropriate width.
13341 SDValue ExtTrueVal = (SetCCWidth == 1)
13342 ? DAG.getAllOnesConstant(DL, VT)
13343 : DAG.getBoolConstant(true, DL, VT, N00VT);
13344 SDValue Zero = DAG.getConstant(0, DL, VT);
13345 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13346 return SCC;
13347
13348 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13349 EVT SetCCVT = getSetCCResultType(N00VT);
13350 // Don't do this transform for i1 because there's a select transform
13351 // that would reverse it.
13352 // TODO: We should not do this transform at all without a target hook
13353 // because a sext is likely cheaper than a select?
13354 if (SetCCVT.getScalarSizeInBits() != 1 &&
13355 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13356 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13357 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13358 }
13359 }
13360
13361 return SDValue();
13362}
13363
13364SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13365 SDValue N0 = N->getOperand(0);
13366 EVT VT = N->getValueType(0);
13367 SDLoc DL(N);
13368
13369 if (VT.isVector())
13370 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13371 return FoldedVOp;
13372
13373 // sext(undef) = 0 because the top bit will all be the same.
13374 if (N0.isUndef())
13375 return DAG.getConstant(0, DL, VT);
13376
13377 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13378 return Res;
13379
13380 // fold (sext (sext x)) -> (sext x)
13381 // fold (sext (aext x)) -> (sext x)
13382 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13383 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13384
13385 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13386 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13390 N0.getOperand(0));
13391
13392 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13393 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13394 SDValue N00 = N0.getOperand(0);
13395 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13396 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13397 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13398 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13399 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13400 }
13401 }
13402
13403 if (N0.getOpcode() == ISD::TRUNCATE) {
13404 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13405 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13406 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13407 SDNode *oye = N0.getOperand(0).getNode();
13408 if (NarrowLoad.getNode() != N0.getNode()) {
13409 CombineTo(N0.getNode(), NarrowLoad);
13410 // CombineTo deleted the truncate, if needed, but not what's under it.
13411 AddToWorklist(oye);
13412 }
13413 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13414 }
13415
13416 // See if the value being truncated is already sign extended. If so, just
13417 // eliminate the trunc/sext pair.
13418 SDValue Op = N0.getOperand(0);
13419 unsigned OpBits = Op.getScalarValueSizeInBits();
13420 unsigned MidBits = N0.getScalarValueSizeInBits();
13421 unsigned DestBits = VT.getScalarSizeInBits();
13422 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13423
13424 if (OpBits == DestBits) {
13425 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13426 // bits, it is already ready.
13427 if (NumSignBits > DestBits-MidBits)
13428 return Op;
13429 } else if (OpBits < DestBits) {
13430 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13431 // bits, just sext from i32.
13432 if (NumSignBits > OpBits-MidBits)
13433 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13434 } else {
13435 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13436 // bits, just truncate to i32.
13437 if (NumSignBits > OpBits-MidBits)
13438 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13439 }
13440
13441 // fold (sext (truncate x)) -> (sextinreg x).
13442 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13443 N0.getValueType())) {
13444 if (OpBits < DestBits)
13445 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13446 else if (OpBits > DestBits)
13447 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13448 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13449 DAG.getValueType(N0.getValueType()));
13450 }
13451 }
13452
13453 // Try to simplify (sext (load x)).
13454 if (SDValue foldedExt =
13455 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13457 return foldedExt;
13458
13459 if (SDValue foldedExt =
13460 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13462 return foldedExt;
13463
13464 // fold (sext (load x)) to multiple smaller sextloads.
13465 // Only on illegal but splittable vectors.
13466 if (SDValue ExtLoad = CombineExtLoad(N))
13467 return ExtLoad;
13468
13469 // Try to simplify (sext (sextload x)).
13470 if (SDValue foldedExt = tryToFoldExtOfExtload(
13471 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13472 return foldedExt;
13473
13474 // Try to simplify (sext (atomic_load x)).
13475 if (SDValue foldedExt =
13476 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13477 return foldedExt;
13478
13479 // fold (sext (and/or/xor (load x), cst)) ->
13480 // (and/or/xor (sextload x), (sext cst))
13481 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13482 isa<LoadSDNode>(N0.getOperand(0)) &&
13483 N0.getOperand(1).getOpcode() == ISD::Constant &&
13484 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13485 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13486 EVT MemVT = LN00->getMemoryVT();
13487 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13488 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13490 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13491 ISD::SIGN_EXTEND, SetCCs, TLI);
13492 if (DoXform) {
13493 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13494 LN00->getChain(), LN00->getBasePtr(),
13495 LN00->getMemoryVT(),
13496 LN00->getMemOperand());
13498 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13499 ExtLoad, DAG.getConstant(Mask, DL, VT));
13500 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13501 bool NoReplaceTruncAnd = !N0.hasOneUse();
13502 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13503 CombineTo(N, And);
13504 // If N0 has multiple uses, change other uses as well.
13505 if (NoReplaceTruncAnd) {
13506 SDValue TruncAnd =
13508 CombineTo(N0.getNode(), TruncAnd);
13509 }
13510 if (NoReplaceTrunc) {
13511 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13512 } else {
13513 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13514 LN00->getValueType(0), ExtLoad);
13515 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13516 }
13517 return SDValue(N,0); // Return N so it doesn't get rechecked!
13518 }
13519 }
13520 }
13521
13522 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13523 return V;
13524
13525 if (SDValue V = foldSextSetcc(N))
13526 return V;
13527
13528 // fold (sext x) -> (zext x) if the sign bit is known zero.
13529 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13530 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13531 DAG.SignBitIsZero(N0)) {
13533 Flags.setNonNeg(true);
13534 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13535 }
13536
13537 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13538 return NewVSel;
13539
13540 // Eliminate this sign extend by doing a negation in the destination type:
13541 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13542 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13546 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13547 return DAG.getNegative(Zext, DL, VT);
13548 }
13549 // Eliminate this sign extend by doing a decrement in the destination type:
13550 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13551 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13555 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13556 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13557 }
13558
13559 // fold sext (not i1 X) -> add (zext i1 X), -1
13560 // TODO: This could be extended to handle bool vectors.
13561 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13562 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13563 TLI.isOperationLegal(ISD::ADD, VT)))) {
13564 // If we can eliminate the 'not', the sext form should be better
13565 if (SDValue NewXor = visitXOR(N0.getNode())) {
13566 // Returning N0 is a form of in-visit replacement that may have
13567 // invalidated N0.
13568 if (NewXor.getNode() == N0.getNode()) {
13569 // Return SDValue here as the xor should have already been replaced in
13570 // this sext.
13571 return SDValue();
13572 }
13573
13574 // Return a new sext with the new xor.
13575 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13576 }
13577
13578 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13579 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13580 }
13581
13582 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13583 return Res;
13584
13585 return SDValue();
13586}
13587
13588/// Given an extending node with a pop-count operand, if the target does not
13589/// support a pop-count in the narrow source type but does support it in the
13590/// destination type, widen the pop-count to the destination type.
13591static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13592 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13593 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13594
13595 SDValue CtPop = Extend->getOperand(0);
13596 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13597 return SDValue();
13598
13599 EVT VT = Extend->getValueType(0);
13600 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13603 return SDValue();
13604
13605 // zext (ctpop X) --> ctpop (zext X)
13606 SDLoc DL(Extend);
13607 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13608 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13609}
13610
13611// If we have (zext (abs X)) where X is a type that will be promoted by type
13612// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13613static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13614 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13615
13616 EVT VT = Extend->getValueType(0);
13617 if (VT.isVector())
13618 return SDValue();
13619
13620 SDValue Abs = Extend->getOperand(0);
13621 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13622 return SDValue();
13623
13624 EVT AbsVT = Abs.getValueType();
13625 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13626 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13628 return SDValue();
13629
13630 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13631
13632 SDValue SExt =
13633 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13634 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13635 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13636}
13637
13638SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13639 SDValue N0 = N->getOperand(0);
13640 EVT VT = N->getValueType(0);
13641 SDLoc DL(N);
13642
13643 if (VT.isVector())
13644 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13645 return FoldedVOp;
13646
13647 // zext(undef) = 0
13648 if (N0.isUndef())
13649 return DAG.getConstant(0, DL, VT);
13650
13651 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13652 return Res;
13653
13654 // fold (zext (zext x)) -> (zext x)
13655 // fold (zext (aext x)) -> (zext x)
13656 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13658 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13659 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13660 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13661 }
13662
13663 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13664 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13668 N0.getOperand(0));
13669
13670 // fold (zext (truncate x)) -> (zext x) or
13671 // (zext (truncate x)) -> (truncate x)
13672 // This is valid when the truncated bits of x are already zero.
13673 SDValue Op;
13674 KnownBits Known;
13675 if (isTruncateOf(DAG, N0, Op, Known)) {
13676 APInt TruncatedBits =
13677 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13678 APInt(Op.getScalarValueSizeInBits(), 0) :
13679 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13681 std::min(Op.getScalarValueSizeInBits(),
13682 VT.getScalarSizeInBits()));
13683 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13684 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13685 DAG.salvageDebugInfo(*N0.getNode());
13686
13687 return ZExtOrTrunc;
13688 }
13689 }
13690
13691 // fold (zext (truncate x)) -> (and x, mask)
13692 if (N0.getOpcode() == ISD::TRUNCATE) {
13693 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13694 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13695 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13696 SDNode *oye = N0.getOperand(0).getNode();
13697 if (NarrowLoad.getNode() != N0.getNode()) {
13698 CombineTo(N0.getNode(), NarrowLoad);
13699 // CombineTo deleted the truncate, if needed, but not what's under it.
13700 AddToWorklist(oye);
13701 }
13702 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13703 }
13704
13705 EVT SrcVT = N0.getOperand(0).getValueType();
13706 EVT MinVT = N0.getValueType();
13707
13708 if (N->getFlags().hasNonNeg()) {
13709 SDValue Op = N0.getOperand(0);
13710 unsigned OpBits = SrcVT.getScalarSizeInBits();
13711 unsigned MidBits = MinVT.getScalarSizeInBits();
13712 unsigned DestBits = VT.getScalarSizeInBits();
13713 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13714
13715 if (OpBits == DestBits) {
13716 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13717 // bits, it is already ready.
13718 if (NumSignBits > DestBits - MidBits)
13719 return Op;
13720 } else if (OpBits < DestBits) {
13721 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13722 // bits, just sext from i32.
13723 // FIXME: This can probably be ZERO_EXTEND nneg?
13724 if (NumSignBits > OpBits - MidBits)
13725 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13726 } else {
13727 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13728 // bits, just truncate to i32.
13729 if (NumSignBits > OpBits - MidBits)
13730 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13731 }
13732 }
13733
13734 // Try to mask before the extension to avoid having to generate a larger mask,
13735 // possibly over several sub-vectors.
13736 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13737 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13739 SDValue Op = N0.getOperand(0);
13740 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13741 AddToWorklist(Op.getNode());
13742 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13743 // Transfer the debug info; the new node is equivalent to N0.
13744 DAG.transferDbgValues(N0, ZExtOrTrunc);
13745 return ZExtOrTrunc;
13746 }
13747 }
13748
13749 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13750 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13751 AddToWorklist(Op.getNode());
13752 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13753 // We may safely transfer the debug info describing the truncate node over
13754 // to the equivalent and operation.
13755 DAG.transferDbgValues(N0, And);
13756 return And;
13757 }
13758 }
13759
13760 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13761 // if either of the casts is not free.
13762 if (N0.getOpcode() == ISD::AND &&
13763 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13764 N0.getOperand(1).getOpcode() == ISD::Constant &&
13765 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13766 !TLI.isZExtFree(N0.getValueType(), VT))) {
13767 SDValue X = N0.getOperand(0).getOperand(0);
13768 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13770 return DAG.getNode(ISD::AND, DL, VT,
13771 X, DAG.getConstant(Mask, DL, VT));
13772 }
13773
13774 // Try to simplify (zext (load x)).
13775 if (SDValue foldedExt = tryToFoldExtOfLoad(
13776 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13777 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13778 return foldedExt;
13779
13780 if (SDValue foldedExt =
13781 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13783 return foldedExt;
13784
13785 // fold (zext (load x)) to multiple smaller zextloads.
13786 // Only on illegal but splittable vectors.
13787 if (SDValue ExtLoad = CombineExtLoad(N))
13788 return ExtLoad;
13789
13790 // Try to simplify (zext (atomic_load x)).
13791 if (SDValue foldedExt =
13792 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13793 return foldedExt;
13794
13795 // fold (zext (and/or/xor (load x), cst)) ->
13796 // (and/or/xor (zextload x), (zext cst))
13797 // Unless (and (load x) cst) will match as a zextload already and has
13798 // additional users, or the zext is already free.
13799 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13800 isa<LoadSDNode>(N0.getOperand(0)) &&
13801 N0.getOperand(1).getOpcode() == ISD::Constant &&
13802 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13803 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13804 EVT MemVT = LN00->getMemoryVT();
13805 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13806 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13807 bool DoXform = true;
13809 if (!N0.hasOneUse()) {
13810 if (N0.getOpcode() == ISD::AND) {
13811 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13812 EVT LoadResultTy = AndC->getValueType(0);
13813 EVT ExtVT;
13814 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13815 DoXform = false;
13816 }
13817 }
13818 if (DoXform)
13819 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13820 ISD::ZERO_EXTEND, SetCCs, TLI);
13821 if (DoXform) {
13822 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13823 LN00->getChain(), LN00->getBasePtr(),
13824 LN00->getMemoryVT(),
13825 LN00->getMemOperand());
13827 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13828 ExtLoad, DAG.getConstant(Mask, DL, VT));
13829 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13830 bool NoReplaceTruncAnd = !N0.hasOneUse();
13831 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13832 CombineTo(N, And);
13833 // If N0 has multiple uses, change other uses as well.
13834 if (NoReplaceTruncAnd) {
13835 SDValue TruncAnd =
13837 CombineTo(N0.getNode(), TruncAnd);
13838 }
13839 if (NoReplaceTrunc) {
13840 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13841 } else {
13842 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13843 LN00->getValueType(0), ExtLoad);
13844 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13845 }
13846 return SDValue(N,0); // Return N so it doesn't get rechecked!
13847 }
13848 }
13849 }
13850
13851 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13852 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13853 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13854 return ZExtLoad;
13855
13856 // Try to simplify (zext (zextload x)).
13857 if (SDValue foldedExt = tryToFoldExtOfExtload(
13858 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13859 return foldedExt;
13860
13861 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13862 return V;
13863
13864 if (N0.getOpcode() == ISD::SETCC) {
13865 // Propagate fast-math-flags.
13866 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13867
13868 // Only do this before legalize for now.
13869 if (!LegalOperations && VT.isVector() &&
13870 N0.getValueType().getVectorElementType() == MVT::i1) {
13871 EVT N00VT = N0.getOperand(0).getValueType();
13872 if (getSetCCResultType(N00VT) == N0.getValueType())
13873 return SDValue();
13874
13875 // We know that the # elements of the results is the same as the #
13876 // elements of the compare (and the # elements of the compare result for
13877 // that matter). Check to see that they are the same size. If so, we know
13878 // that the element size of the sext'd result matches the element size of
13879 // the compare operands.
13880 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13881 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13882 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13883 N0.getOperand(1), N0.getOperand(2));
13884 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13885 }
13886
13887 // If the desired elements are smaller or larger than the source
13888 // elements we can use a matching integer vector type and then
13889 // truncate/any extend followed by zext_in_reg.
13890 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13891 SDValue VsetCC =
13892 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13893 N0.getOperand(1), N0.getOperand(2));
13894 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13895 N0.getValueType());
13896 }
13897
13898 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13899 EVT N0VT = N0.getValueType();
13900 EVT N00VT = N0.getOperand(0).getValueType();
13901 if (SDValue SCC = SimplifySelectCC(
13902 DL, N0.getOperand(0), N0.getOperand(1),
13903 DAG.getBoolConstant(true, DL, N0VT, N00VT),
13904 DAG.getBoolConstant(false, DL, N0VT, N00VT),
13905 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13906 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13907 }
13908
13909 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13910 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13911 !TLI.isZExtFree(N0, VT)) {
13912 SDValue ShVal = N0.getOperand(0);
13913 SDValue ShAmt = N0.getOperand(1);
13914 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
13915 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
13916 if (N0.getOpcode() == ISD::SHL) {
13917 // If the original shl may be shifting out bits, do not perform this
13918 // transformation.
13919 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
13920 ShVal.getOperand(0).getValueSizeInBits();
13921 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13922 // If the shift is too large, then see if we can deduce that the
13923 // shift is safe anyway.
13924 // Create a mask that has ones for the bits being shifted out.
13925 APInt ShiftOutMask =
13927 ShAmtC->getAPIntValue().getZExtValue());
13928
13929 // Check if the bits being shifted out are known to be zero.
13930 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
13931 return SDValue();
13932 }
13933 }
13934
13935 // Ensure that the shift amount is wide enough for the shifted value.
13936 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13937 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13938
13939 return DAG.getNode(N0.getOpcode(), DL, VT,
13940 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
13941 }
13942 }
13943 }
13944
13945 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13946 return NewVSel;
13947
13948 if (SDValue NewCtPop = widenCtPop(N, DAG))
13949 return NewCtPop;
13950
13951 if (SDValue V = widenAbs(N, DAG))
13952 return V;
13953
13954 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13955 return Res;
13956
13957 // CSE zext nneg with sext if the zext is not free.
13958 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
13959 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
13960 if (CSENode)
13961 return SDValue(CSENode, 0);
13962 }
13963
13964 return SDValue();
13965}
13966
13967SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13968 SDValue N0 = N->getOperand(0);
13969 EVT VT = N->getValueType(0);
13970 SDLoc DL(N);
13971
13972 // aext(undef) = undef
13973 if (N0.isUndef())
13974 return DAG.getUNDEF(VT);
13975
13976 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13977 return Res;
13978
13979 // fold (aext (aext x)) -> (aext x)
13980 // fold (aext (zext x)) -> (zext x)
13981 // fold (aext (sext x)) -> (sext x)
13982 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
13983 N0.getOpcode() == ISD::SIGN_EXTEND) {
13985 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13986 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13987 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
13988 }
13989
13990 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
13991 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13992 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13996 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
13997
13998 // fold (aext (truncate (load x))) -> (aext (smaller load x))
13999 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14000 if (N0.getOpcode() == ISD::TRUNCATE) {
14001 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14002 SDNode *oye = N0.getOperand(0).getNode();
14003 if (NarrowLoad.getNode() != N0.getNode()) {
14004 CombineTo(N0.getNode(), NarrowLoad);
14005 // CombineTo deleted the truncate, if needed, but not what's under it.
14006 AddToWorklist(oye);
14007 }
14008 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14009 }
14010 }
14011
14012 // fold (aext (truncate x))
14013 if (N0.getOpcode() == ISD::TRUNCATE)
14014 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14015
14016 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14017 // if the trunc is not free.
14018 if (N0.getOpcode() == ISD::AND &&
14019 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14020 N0.getOperand(1).getOpcode() == ISD::Constant &&
14021 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14022 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14023 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14024 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14025 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14026 }
14027
14028 // fold (aext (load x)) -> (aext (truncate (extload x)))
14029 // None of the supported targets knows how to perform load and any_ext
14030 // on vectors in one instruction, so attempt to fold to zext instead.
14031 if (VT.isVector()) {
14032 // Try to simplify (zext (load x)).
14033 if (SDValue foldedExt =
14034 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14036 return foldedExt;
14037 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14039 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14040 bool DoXform = true;
14042 if (!N0.hasOneUse())
14043 DoXform =
14044 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14045 if (DoXform) {
14046 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14047 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14048 LN0->getBasePtr(), N0.getValueType(),
14049 LN0->getMemOperand());
14050 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14051 // If the load value is used only by N, replace it via CombineTo N.
14052 bool NoReplaceTrunc = N0.hasOneUse();
14053 CombineTo(N, ExtLoad);
14054 if (NoReplaceTrunc) {
14055 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14056 recursivelyDeleteUnusedNodes(LN0);
14057 } else {
14058 SDValue Trunc =
14059 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14060 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14061 }
14062 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14063 }
14064 }
14065
14066 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14067 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14068 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14069 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14070 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14071 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14072 ISD::LoadExtType ExtType = LN0->getExtensionType();
14073 EVT MemVT = LN0->getMemoryVT();
14074 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14075 SDValue ExtLoad =
14076 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14077 MemVT, LN0->getMemOperand());
14078 CombineTo(N, ExtLoad);
14079 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14080 recursivelyDeleteUnusedNodes(LN0);
14081 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14082 }
14083 }
14084
14085 if (N0.getOpcode() == ISD::SETCC) {
14086 // Propagate fast-math-flags.
14087 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14088
14089 // For vectors:
14090 // aext(setcc) -> vsetcc
14091 // aext(setcc) -> truncate(vsetcc)
14092 // aext(setcc) -> aext(vsetcc)
14093 // Only do this before legalize for now.
14094 if (VT.isVector() && !LegalOperations) {
14095 EVT N00VT = N0.getOperand(0).getValueType();
14096 if (getSetCCResultType(N00VT) == N0.getValueType())
14097 return SDValue();
14098
14099 // We know that the # elements of the results is the same as the
14100 // # elements of the compare (and the # elements of the compare result
14101 // for that matter). Check to see that they are the same size. If so,
14102 // we know that the element size of the sext'd result matches the
14103 // element size of the compare operands.
14104 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14105 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14106 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14107
14108 // If the desired elements are smaller or larger than the source
14109 // elements we can use a matching integer vector type and then
14110 // truncate/any extend
14111 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14112 SDValue VsetCC = DAG.getSetCC(
14113 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14114 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14115 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14116 }
14117
14118 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14119 if (SDValue SCC = SimplifySelectCC(
14120 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14121 DAG.getConstant(0, DL, VT),
14122 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14123 return SCC;
14124 }
14125
14126 if (SDValue NewCtPop = widenCtPop(N, DAG))
14127 return NewCtPop;
14128
14129 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14130 return Res;
14131
14132 return SDValue();
14133}
14134
14135SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14136 unsigned Opcode = N->getOpcode();
14137 SDValue N0 = N->getOperand(0);
14138 SDValue N1 = N->getOperand(1);
14139 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14140
14141 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14142 if (N0.getOpcode() == Opcode &&
14143 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14144 return N0;
14145
14146 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14147 N0.getOperand(0).getOpcode() == Opcode) {
14148 // We have an assert, truncate, assert sandwich. Make one stronger assert
14149 // by asserting on the smallest asserted type to the larger source type.
14150 // This eliminates the later assert:
14151 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14152 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14153 SDLoc DL(N);
14154 SDValue BigA = N0.getOperand(0);
14155 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14156 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14157 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14158 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14159 BigA.getOperand(0), MinAssertVTVal);
14160 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14161 }
14162
14163 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14164 // than X. Just move the AssertZext in front of the truncate and drop the
14165 // AssertSExt.
14166 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14168 Opcode == ISD::AssertZext) {
14169 SDValue BigA = N0.getOperand(0);
14170 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14171 if (AssertVT.bitsLT(BigA_AssertVT)) {
14172 SDLoc DL(N);
14173 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14174 BigA.getOperand(0), N1);
14175 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14176 }
14177 }
14178
14179 return SDValue();
14180}
14181
14182SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14183 SDLoc DL(N);
14184
14185 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14186 SDValue N0 = N->getOperand(0);
14187
14188 // Fold (assertalign (assertalign x, AL0), AL1) ->
14189 // (assertalign x, max(AL0, AL1))
14190 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14191 return DAG.getAssertAlign(DL, N0.getOperand(0),
14192 std::max(AL, AAN->getAlign()));
14193
14194 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14195 // this assert down to source operands so that those arithmetic ops could be
14196 // exposed to the DAG combining.
14197 switch (N0.getOpcode()) {
14198 default:
14199 break;
14200 case ISD::ADD:
14201 case ISD::SUB: {
14202 unsigned AlignShift = Log2(AL);
14203 SDValue LHS = N0.getOperand(0);
14204 SDValue RHS = N0.getOperand(1);
14205 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14206 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14207 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14208 if (LHSAlignShift < AlignShift)
14209 LHS = DAG.getAssertAlign(DL, LHS, AL);
14210 if (RHSAlignShift < AlignShift)
14211 RHS = DAG.getAssertAlign(DL, RHS, AL);
14212 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14213 }
14214 break;
14215 }
14216 }
14217
14218 return SDValue();
14219}
14220
14221/// If the result of a load is shifted/masked/truncated to an effectively
14222/// narrower type, try to transform the load to a narrower type and/or
14223/// use an extending load.
14224SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14225 unsigned Opc = N->getOpcode();
14226
14228 SDValue N0 = N->getOperand(0);
14229 EVT VT = N->getValueType(0);
14230 EVT ExtVT = VT;
14231
14232 // This transformation isn't valid for vector loads.
14233 if (VT.isVector())
14234 return SDValue();
14235
14236 // The ShAmt variable is used to indicate that we've consumed a right
14237 // shift. I.e. we want to narrow the width of the load by skipping to load the
14238 // ShAmt least significant bits.
14239 unsigned ShAmt = 0;
14240 // A special case is when the least significant bits from the load are masked
14241 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14242 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14243 // the result.
14244 unsigned ShiftedOffset = 0;
14245 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14246 // extended to VT.
14247 if (Opc == ISD::SIGN_EXTEND_INREG) {
14248 ExtType = ISD::SEXTLOAD;
14249 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14250 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14251 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14252 // value, or it may be shifting a higher subword, half or byte into the
14253 // lowest bits.
14254
14255 // Only handle shift with constant shift amount, and the shiftee must be a
14256 // load.
14257 auto *LN = dyn_cast<LoadSDNode>(N0);
14258 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14259 if (!N1C || !LN)
14260 return SDValue();
14261 // If the shift amount is larger than the memory type then we're not
14262 // accessing any of the loaded bytes.
14263 ShAmt = N1C->getZExtValue();
14264 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14265 if (MemoryWidth <= ShAmt)
14266 return SDValue();
14267 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14268 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14269 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14270 // If original load is a SEXTLOAD then we can't simply replace it by a
14271 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14272 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14273 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14274 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14275 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14276 LN->getExtensionType() != ExtType)
14277 return SDValue();
14278 } else if (Opc == ISD::AND) {
14279 // An AND with a constant mask is the same as a truncate + zero-extend.
14280 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14281 if (!AndC)
14282 return SDValue();
14283
14284 const APInt &Mask = AndC->getAPIntValue();
14285 unsigned ActiveBits = 0;
14286 if (Mask.isMask()) {
14287 ActiveBits = Mask.countr_one();
14288 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14289 ShiftedOffset = ShAmt;
14290 } else {
14291 return SDValue();
14292 }
14293
14294 ExtType = ISD::ZEXTLOAD;
14295 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14296 }
14297
14298 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14299 // a right shift. Here we redo some of those checks, to possibly adjust the
14300 // ExtVT even further based on "a masking AND". We could also end up here for
14301 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14302 // need to be done here as well.
14303 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14304 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14305 // Bail out when the SRL has more than one use. This is done for historical
14306 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14307 // check below? And maybe it could be non-profitable to do the transform in
14308 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14309 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14310 if (!SRL.hasOneUse())
14311 return SDValue();
14312
14313 // Only handle shift with constant shift amount, and the shiftee must be a
14314 // load.
14315 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14316 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14317 if (!SRL1C || !LN)
14318 return SDValue();
14319
14320 // If the shift amount is larger than the input type then we're not
14321 // accessing any of the loaded bytes. If the load was a zextload/extload
14322 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14323 ShAmt = SRL1C->getZExtValue();
14324 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14325 if (ShAmt >= MemoryWidth)
14326 return SDValue();
14327
14328 // Because a SRL must be assumed to *need* to zero-extend the high bits
14329 // (as opposed to anyext the high bits), we can't combine the zextload
14330 // lowering of SRL and an sextload.
14331 if (LN->getExtensionType() == ISD::SEXTLOAD)
14332 return SDValue();
14333
14334 // Avoid reading outside the memory accessed by the original load (could
14335 // happened if we only adjust the load base pointer by ShAmt). Instead we
14336 // try to narrow the load even further. The typical scenario here is:
14337 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14338 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14339 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14340 // Don't replace sextload by zextload.
14341 if (ExtType == ISD::SEXTLOAD)
14342 return SDValue();
14343 // Narrow the load.
14344 ExtType = ISD::ZEXTLOAD;
14345 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14346 }
14347
14348 // If the SRL is only used by a masking AND, we may be able to adjust
14349 // the ExtVT to make the AND redundant.
14350 SDNode *Mask = *(SRL->use_begin());
14351 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14352 isa<ConstantSDNode>(Mask->getOperand(1))) {
14353 unsigned Offset, ActiveBits;
14354 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14355 if (ShiftMask.isMask()) {
14356 EVT MaskedVT =
14357 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14358 // If the mask is smaller, recompute the type.
14359 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14360 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14361 ExtVT = MaskedVT;
14362 } else if (ExtType == ISD::ZEXTLOAD &&
14363 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14364 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14365 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14366 // If the mask is shifted we can use a narrower load and a shl to insert
14367 // the trailing zeros.
14368 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14369 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14370 ExtVT = MaskedVT;
14371 ShAmt = Offset + ShAmt;
14372 ShiftedOffset = Offset;
14373 }
14374 }
14375 }
14376
14377 N0 = SRL.getOperand(0);
14378 }
14379
14380 // If the load is shifted left (and the result isn't shifted back right), we
14381 // can fold a truncate through the shift. The typical scenario is that N
14382 // points at a TRUNCATE here so the attempted fold is:
14383 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14384 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14385 unsigned ShLeftAmt = 0;
14386 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14387 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14388 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14389 ShLeftAmt = N01->getZExtValue();
14390 N0 = N0.getOperand(0);
14391 }
14392 }
14393
14394 // If we haven't found a load, we can't narrow it.
14395 if (!isa<LoadSDNode>(N0))
14396 return SDValue();
14397
14398 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14399 // Reducing the width of a volatile load is illegal. For atomics, we may be
14400 // able to reduce the width provided we never widen again. (see D66309)
14401 if (!LN0->isSimple() ||
14402 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14403 return SDValue();
14404
14405 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14406 unsigned LVTStoreBits =
14408 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14409 return LVTStoreBits - EVTStoreBits - ShAmt;
14410 };
14411
14412 // We need to adjust the pointer to the load by ShAmt bits in order to load
14413 // the correct bytes.
14414 unsigned PtrAdjustmentInBits =
14415 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14416
14417 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14418 SDLoc DL(LN0);
14419 // The original load itself didn't wrap, so an offset within it doesn't.
14421 Flags.setNoUnsignedWrap(true);
14422 SDValue NewPtr = DAG.getMemBasePlusOffset(
14423 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14424 AddToWorklist(NewPtr.getNode());
14425
14426 SDValue Load;
14427 if (ExtType == ISD::NON_EXTLOAD)
14428 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14429 LN0->getPointerInfo().getWithOffset(PtrOff),
14430 LN0->getOriginalAlign(),
14431 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14432 else
14433 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14434 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14435 LN0->getOriginalAlign(),
14436 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14437
14438 // Replace the old load's chain with the new load's chain.
14439 WorklistRemover DeadNodes(*this);
14440 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14441
14442 // Shift the result left, if we've swallowed a left shift.
14444 if (ShLeftAmt != 0) {
14445 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14446 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14447 ShImmTy = VT;
14448 // If the shift amount is as large as the result size (but, presumably,
14449 // no larger than the source) then the useful bits of the result are
14450 // zero; we can't simply return the shortened shift, because the result
14451 // of that operation is undefined.
14452 if (ShLeftAmt >= VT.getScalarSizeInBits())
14453 Result = DAG.getConstant(0, DL, VT);
14454 else
14455 Result = DAG.getNode(ISD::SHL, DL, VT,
14456 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14457 }
14458
14459 if (ShiftedOffset != 0) {
14460 // We're using a shifted mask, so the load now has an offset. This means
14461 // that data has been loaded into the lower bytes than it would have been
14462 // before, so we need to shl the loaded data into the correct position in the
14463 // register.
14464 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14465 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14466 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14467 }
14468
14469 // Return the new loaded value.
14470 return Result;
14471}
14472
14473SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14474 SDValue N0 = N->getOperand(0);
14475 SDValue N1 = N->getOperand(1);
14476 EVT VT = N->getValueType(0);
14477 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14478 unsigned VTBits = VT.getScalarSizeInBits();
14479 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14480
14481 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14482 if (N0.isUndef())
14483 return DAG.getConstant(0, SDLoc(N), VT);
14484
14485 // fold (sext_in_reg c1) -> c1
14487 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14488
14489 // If the input is already sign extended, just drop the extension.
14490 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14491 return N0;
14492
14493 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14494 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14495 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14496 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14497 N1);
14498
14499 // fold (sext_in_reg (sext x)) -> (sext x)
14500 // fold (sext_in_reg (aext x)) -> (sext x)
14501 // if x is small enough or if we know that x has more than 1 sign bit and the
14502 // sign_extend_inreg is extending from one of them.
14503 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14504 SDValue N00 = N0.getOperand(0);
14505 unsigned N00Bits = N00.getScalarValueSizeInBits();
14506 if ((N00Bits <= ExtVTBits ||
14507 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14508 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14509 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14510 }
14511
14512 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14513 // if x is small enough or if we know that x has more than 1 sign bit and the
14514 // sign_extend_inreg is extending from one of them.
14516 SDValue N00 = N0.getOperand(0);
14517 unsigned N00Bits = N00.getScalarValueSizeInBits();
14518 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14519 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14520 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14521 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14522 if ((N00Bits == ExtVTBits ||
14523 (!IsZext && (N00Bits < ExtVTBits ||
14524 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14525 (!LegalOperations ||
14527 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14528 }
14529
14530 // fold (sext_in_reg (zext x)) -> (sext x)
14531 // iff we are extending the source sign bit.
14532 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14533 SDValue N00 = N0.getOperand(0);
14534 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14535 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14536 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14537 }
14538
14539 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14540 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14541 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14542
14543 // fold operands of sext_in_reg based on knowledge that the top bits are not
14544 // demanded.
14546 return SDValue(N, 0);
14547
14548 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14549 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14550 if (SDValue NarrowLoad = reduceLoadWidth(N))
14551 return NarrowLoad;
14552
14553 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14554 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14555 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14556 if (N0.getOpcode() == ISD::SRL) {
14557 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14558 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14559 // We can turn this into an SRA iff the input to the SRL is already sign
14560 // extended enough.
14561 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14562 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14563 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14564 N0.getOperand(1));
14565 }
14566 }
14567
14568 // fold (sext_inreg (extload x)) -> (sextload x)
14569 // If sextload is not supported by target, we can only do the combine when
14570 // load has one use. Doing otherwise can block folding the extload with other
14571 // extends that the target does support.
14572 if (ISD::isEXTLoad(N0.getNode()) &&
14574 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14575 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14576 N0.hasOneUse()) ||
14577 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14578 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14579 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14580 LN0->getChain(),
14581 LN0->getBasePtr(), ExtVT,
14582 LN0->getMemOperand());
14583 CombineTo(N, ExtLoad);
14584 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14585 AddToWorklist(ExtLoad.getNode());
14586 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14587 }
14588
14589 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14591 N0.hasOneUse() &&
14592 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14593 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14594 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14595 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14596 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14597 LN0->getChain(),
14598 LN0->getBasePtr(), ExtVT,
14599 LN0->getMemOperand());
14600 CombineTo(N, ExtLoad);
14601 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14602 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14603 }
14604
14605 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14606 // ignore it if the masked load is already sign extended
14607 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14608 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14609 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14610 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14611 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14612 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14613 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14614 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14615 CombineTo(N, ExtMaskedLoad);
14616 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14617 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14618 }
14619 }
14620
14621 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14622 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14623 if (SDValue(GN0, 0).hasOneUse() &&
14624 ExtVT == GN0->getMemoryVT() &&
14626 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14627 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14628
14629 SDValue ExtLoad = DAG.getMaskedGather(
14630 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14631 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14632
14633 CombineTo(N, ExtLoad);
14634 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14635 AddToWorklist(ExtLoad.getNode());
14636 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14637 }
14638 }
14639
14640 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14641 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14642 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14643 N0.getOperand(1), false))
14644 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14645 }
14646
14647 // Fold (iM_signext_inreg
14648 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14649 // from iN)
14650 // -> (extract_subvector (signext iN_v to iM))
14651 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14653 SDValue InnerExt = N0.getOperand(0);
14654 EVT InnerExtVT = InnerExt->getValueType(0);
14655 SDValue Extendee = InnerExt->getOperand(0);
14656
14657 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14658 (!LegalOperations ||
14659 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14660 SDValue SignExtExtendee =
14661 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14662 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14663 N0.getOperand(1));
14664 }
14665 }
14666
14667 return SDValue();
14668}
14669
14671 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14672 bool LegalOperations) {
14673 unsigned InregOpcode = N->getOpcode();
14674 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14675
14676 SDValue Src = N->getOperand(0);
14677 EVT VT = N->getValueType(0);
14678 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14679 Src.getValueType().getVectorElementType(),
14681
14682 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14683 "Expected EXTEND_VECTOR_INREG dag node in input!");
14684
14685 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14686 // FIXME: one-use check may be overly restrictive
14687 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14688 return SDValue();
14689
14690 // Profitability check: we must be extending exactly one of it's operands.
14691 // FIXME: this is probably overly restrictive.
14692 Src = Src.getOperand(0);
14693 if (Src.getValueType() != SrcVT)
14694 return SDValue();
14695
14696 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14697 return SDValue();
14698
14699 return DAG.getNode(Opcode, DL, VT, Src);
14700}
14701
14702SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14703 SDValue N0 = N->getOperand(0);
14704 EVT VT = N->getValueType(0);
14705 SDLoc DL(N);
14706
14707 if (N0.isUndef()) {
14708 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14709 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14710 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14711 ? DAG.getUNDEF(VT)
14712 : DAG.getConstant(0, DL, VT);
14713 }
14714
14715 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14716 return Res;
14717
14719 return SDValue(N, 0);
14720
14722 LegalOperations))
14723 return R;
14724
14725 return SDValue();
14726}
14727
14728SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14729 SDValue N0 = N->getOperand(0);
14730 EVT VT = N->getValueType(0);
14731 EVT SrcVT = N0.getValueType();
14732 bool isLE = DAG.getDataLayout().isLittleEndian();
14733 SDLoc DL(N);
14734
14735 // trunc(undef) = undef
14736 if (N0.isUndef())
14737 return DAG.getUNDEF(VT);
14738
14739 // fold (truncate (truncate x)) -> (truncate x)
14740 if (N0.getOpcode() == ISD::TRUNCATE)
14741 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14742
14743 // fold (truncate c1) -> c1
14744 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14745 return C;
14746
14747 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14748 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14749 N0.getOpcode() == ISD::SIGN_EXTEND ||
14750 N0.getOpcode() == ISD::ANY_EXTEND) {
14751 // if the source is smaller than the dest, we still need an extend.
14752 if (N0.getOperand(0).getValueType().bitsLT(VT))
14753 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14754 // if the source is larger than the dest, than we just need the truncate.
14755 if (N0.getOperand(0).getValueType().bitsGT(VT))
14756 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14757 // if the source and dest are the same type, we can drop both the extend
14758 // and the truncate.
14759 return N0.getOperand(0);
14760 }
14761
14762 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14763 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14764 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14765 N0.hasOneUse()) {
14766 SDValue X = N0.getOperand(0);
14767 SDValue ExtVal = N0.getOperand(1);
14768 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14769 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14770 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14771 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14772 }
14773 }
14774
14775 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14776 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14777 return SDValue();
14778
14779 // Fold extract-and-trunc into a narrow extract. For example:
14780 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14781 // i32 y = TRUNCATE(i64 x)
14782 // -- becomes --
14783 // v16i8 b = BITCAST (v2i64 val)
14784 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14785 //
14786 // Note: We only run this optimization after type legalization (which often
14787 // creates this pattern) and before operation legalization after which
14788 // we need to be more careful about the vector instructions that we generate.
14789 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14790 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14791 EVT VecTy = N0.getOperand(0).getValueType();
14792 EVT ExTy = N0.getValueType();
14793 EVT TrTy = N->getValueType(0);
14794
14795 auto EltCnt = VecTy.getVectorElementCount();
14796 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14797 auto NewEltCnt = EltCnt * SizeRatio;
14798
14799 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14800 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14801
14802 SDValue EltNo = N0->getOperand(1);
14803 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14804 int Elt = EltNo->getAsZExtVal();
14805 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14806 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14807 DAG.getBitcast(NVT, N0.getOperand(0)),
14809 }
14810 }
14811
14812 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14813 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14814 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14815 TLI.isTruncateFree(SrcVT, VT)) {
14816 SDLoc SL(N0);
14817 SDValue Cond = N0.getOperand(0);
14818 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14819 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14820 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
14821 }
14822 }
14823
14824 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14825 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14826 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14827 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14828 SDValue Amt = N0.getOperand(1);
14829 KnownBits Known = DAG.computeKnownBits(Amt);
14830 unsigned Size = VT.getScalarSizeInBits();
14831 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14832 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14833 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14834 if (AmtVT != Amt.getValueType()) {
14835 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
14836 AddToWorklist(Amt.getNode());
14837 }
14838 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
14839 }
14840 }
14841
14842 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
14843 return V;
14844
14845 if (SDValue ABD = foldABSToABD(N, DL))
14846 return ABD;
14847
14848 // Attempt to pre-truncate BUILD_VECTOR sources.
14849 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14850 N0.hasOneUse() &&
14851 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14852 // Avoid creating illegal types if running after type legalizer.
14853 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14854 EVT SVT = VT.getScalarType();
14855 SmallVector<SDValue, 8> TruncOps;
14856 for (const SDValue &Op : N0->op_values()) {
14857 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14858 TruncOps.push_back(TruncOp);
14859 }
14860 return DAG.getBuildVector(VT, DL, TruncOps);
14861 }
14862
14863 // trunc (splat_vector x) -> splat_vector (trunc x)
14864 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14865 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14866 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14867 EVT SVT = VT.getScalarType();
14868 return DAG.getSplatVector(
14869 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14870 }
14871
14872 // Fold a series of buildvector, bitcast, and truncate if possible.
14873 // For example fold
14874 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14875 // (2xi32 (buildvector x, y)).
14876 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14877 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14879 N0.getOperand(0).hasOneUse()) {
14880 SDValue BuildVect = N0.getOperand(0);
14881 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14882 EVT TruncVecEltTy = VT.getVectorElementType();
14883
14884 // Check that the element types match.
14885 if (BuildVectEltTy == TruncVecEltTy) {
14886 // Now we only need to compute the offset of the truncated elements.
14887 unsigned BuildVecNumElts = BuildVect.getNumOperands();
14888 unsigned TruncVecNumElts = VT.getVectorNumElements();
14889 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14890
14891 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
14892 "Invalid number of elements");
14893
14895 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14896 Opnds.push_back(BuildVect.getOperand(i));
14897
14898 return DAG.getBuildVector(VT, DL, Opnds);
14899 }
14900 }
14901
14902 // fold (truncate (load x)) -> (smaller load x)
14903 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14904 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14905 if (SDValue Reduced = reduceLoadWidth(N))
14906 return Reduced;
14907
14908 // Handle the case where the truncated result is at least as wide as the
14909 // loaded type.
14910 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14911 auto *LN0 = cast<LoadSDNode>(N0);
14912 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
14913 SDValue NewLoad = DAG.getExtLoad(
14914 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
14915 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
14916 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14917 return NewLoad;
14918 }
14919 }
14920 }
14921
14922 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14923 // where ... are all 'undef'.
14924 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14926 SDValue V;
14927 unsigned Idx = 0;
14928 unsigned NumDefs = 0;
14929
14930 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14931 SDValue X = N0.getOperand(i);
14932 if (!X.isUndef()) {
14933 V = X;
14934 Idx = i;
14935 NumDefs++;
14936 }
14937 // Stop if more than one members are non-undef.
14938 if (NumDefs > 1)
14939 break;
14940
14943 X.getValueType().getVectorElementCount()));
14944 }
14945
14946 if (NumDefs == 0)
14947 return DAG.getUNDEF(VT);
14948
14949 if (NumDefs == 1) {
14950 assert(V.getNode() && "The single defined operand is empty!");
14952 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14953 if (i != Idx) {
14954 Opnds.push_back(DAG.getUNDEF(VTs[i]));
14955 continue;
14956 }
14957 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14958 AddToWorklist(NV.getNode());
14959 Opnds.push_back(NV);
14960 }
14961 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
14962 }
14963 }
14964
14965 // Fold truncate of a bitcast of a vector to an extract of the low vector
14966 // element.
14967 //
14968 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14969 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14970 SDValue VecSrc = N0.getOperand(0);
14971 EVT VecSrcVT = VecSrc.getValueType();
14972 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14973 (!LegalOperations ||
14974 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14975 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14976 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
14978 }
14979 }
14980
14981 // Simplify the operands using demanded-bits information.
14983 return SDValue(N, 0);
14984
14985 // fold (truncate (extract_subvector(ext x))) ->
14986 // (extract_subvector x)
14987 // TODO: This can be generalized to cover cases where the truncate and extract
14988 // do not fully cancel each other out.
14989 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
14990 SDValue N00 = N0.getOperand(0);
14991 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
14992 N00.getOpcode() == ISD::ZERO_EXTEND ||
14993 N00.getOpcode() == ISD::ANY_EXTEND) {
14994 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
14996 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
14997 N00.getOperand(0), N0.getOperand(1));
14998 }
14999 }
15000
15001 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15002 return NewVSel;
15003
15004 // Narrow a suitable binary operation with a non-opaque constant operand by
15005 // moving it ahead of the truncate. This is limited to pre-legalization
15006 // because targets may prefer a wider type during later combines and invert
15007 // this transform.
15008 switch (N0.getOpcode()) {
15009 case ISD::ADD:
15010 case ISD::SUB:
15011 case ISD::MUL:
15012 case ISD::AND:
15013 case ISD::OR:
15014 case ISD::XOR:
15015 if (!LegalOperations && N0.hasOneUse() &&
15016 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15017 isConstantOrConstantVector(N0.getOperand(1), true))) {
15018 // TODO: We already restricted this to pre-legalization, but for vectors
15019 // we are extra cautious to not create an unsupported operation.
15020 // Target-specific changes are likely needed to avoid regressions here.
15021 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15022 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15023 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15024 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15025 }
15026 }
15027 break;
15028 case ISD::ADDE:
15029 case ISD::UADDO_CARRY:
15030 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15031 // (trunc uaddo_carry(X, Y, Carry)) ->
15032 // (uaddo_carry trunc(X), trunc(Y), Carry)
15033 // When the adde's carry is not used.
15034 // We only do for uaddo_carry before legalize operation
15035 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15036 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15037 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15038 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15039 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15040 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15041 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15042 }
15043 break;
15044 case ISD::USUBSAT:
15045 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15046 // enough to know that the upper bits are zero we must ensure that we don't
15047 // introduce an extra truncate.
15048 if (!LegalOperations && N0.hasOneUse() &&
15051 VT.getScalarSizeInBits() &&
15052 hasOperation(N0.getOpcode(), VT)) {
15053 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15054 DAG, DL);
15055 }
15056 break;
15057 }
15058
15059 return SDValue();
15060}
15061
15062static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15063 SDValue Elt = N->getOperand(i);
15064 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15065 return Elt.getNode();
15066 return Elt.getOperand(Elt.getResNo()).getNode();
15067}
15068
15069/// build_pair (load, load) -> load
15070/// if load locations are consecutive.
15071SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15072 assert(N->getOpcode() == ISD::BUILD_PAIR);
15073
15074 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15075 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15076
15077 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15078 // most significant part in elt 1. So when combining into one large load, we
15079 // need to consider the endianness.
15080 if (DAG.getDataLayout().isBigEndian())
15081 std::swap(LD1, LD2);
15082
15083 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15084 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15085 LD1->getAddressSpace() != LD2->getAddressSpace())
15086 return SDValue();
15087
15088 unsigned LD1Fast = 0;
15089 EVT LD1VT = LD1->getValueType(0);
15090 unsigned LD1Bytes = LD1VT.getStoreSize();
15091 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15092 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15093 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15094 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15095 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15096 LD1->getPointerInfo(), LD1->getAlign());
15097
15098 return SDValue();
15099}
15100
15101static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15102 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15103 // and Lo parts; on big-endian machines it doesn't.
15104 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15105}
15106
15107SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15108 const TargetLowering &TLI) {
15109 // If this is not a bitcast to an FP type or if the target doesn't have
15110 // IEEE754-compliant FP logic, we're done.
15111 EVT VT = N->getValueType(0);
15112 SDValue N0 = N->getOperand(0);
15113 EVT SourceVT = N0.getValueType();
15114
15115 if (!VT.isFloatingPoint())
15116 return SDValue();
15117
15118 // TODO: Handle cases where the integer constant is a different scalar
15119 // bitwidth to the FP.
15120 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15121 return SDValue();
15122
15123 unsigned FPOpcode;
15124 APInt SignMask;
15125 switch (N0.getOpcode()) {
15126 case ISD::AND:
15127 FPOpcode = ISD::FABS;
15128 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15129 break;
15130 case ISD::XOR:
15131 FPOpcode = ISD::FNEG;
15132 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15133 break;
15134 case ISD::OR:
15135 FPOpcode = ISD::FABS;
15136 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15137 break;
15138 default:
15139 return SDValue();
15140 }
15141
15142 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15143 return SDValue();
15144
15145 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15146 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15147 // removing this would require more changes.
15148 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15149 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15150 return true;
15151
15152 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15153 };
15154
15155 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15156 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15157 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15158 // fneg (fabs X)
15159 SDValue LogicOp0 = N0.getOperand(0);
15160 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15161 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15162 IsBitCastOrFree(LogicOp0, VT)) {
15163 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15164 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15165 NumFPLogicOpsConv++;
15166 if (N0.getOpcode() == ISD::OR)
15167 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15168 return FPOp;
15169 }
15170
15171 return SDValue();
15172}
15173
15174SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15175 SDValue N0 = N->getOperand(0);
15176 EVT VT = N->getValueType(0);
15177
15178 if (N0.isUndef())
15179 return DAG.getUNDEF(VT);
15180
15181 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15182 // Only do this before legalize types, unless both types are integer and the
15183 // scalar type is legal. Only do this before legalize ops, since the target
15184 // maybe depending on the bitcast.
15185 // First check to see if this is all constant.
15186 // TODO: Support FP bitcasts after legalize types.
15187 if (VT.isVector() &&
15188 (!LegalTypes ||
15189 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15190 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15191 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15192 cast<BuildVectorSDNode>(N0)->isConstant())
15193 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15195
15196 // If the input is a constant, let getNode fold it.
15197 if (isIntOrFPConstant(N0)) {
15198 // If we can't allow illegal operations, we need to check that this is just
15199 // a fp -> int or int -> conversion and that the resulting operation will
15200 // be legal.
15201 if (!LegalOperations ||
15202 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15204 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15205 TLI.isOperationLegal(ISD::Constant, VT))) {
15206 SDValue C = DAG.getBitcast(VT, N0);
15207 if (C.getNode() != N)
15208 return C;
15209 }
15210 }
15211
15212 // (conv (conv x, t1), t2) -> (conv x, t2)
15213 if (N0.getOpcode() == ISD::BITCAST)
15214 return DAG.getBitcast(VT, N0.getOperand(0));
15215
15216 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15217 // iff the current bitwise logicop type isn't legal
15218 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15219 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15220 auto IsFreeBitcast = [VT](SDValue V) {
15221 return (V.getOpcode() == ISD::BITCAST &&
15222 V.getOperand(0).getValueType() == VT) ||
15224 V->hasOneUse());
15225 };
15226 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15227 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15228 DAG.getBitcast(VT, N0.getOperand(0)),
15229 DAG.getBitcast(VT, N0.getOperand(1)));
15230 }
15231
15232 // fold (conv (load x)) -> (load (conv*)x)
15233 // If the resultant load doesn't need a higher alignment than the original!
15234 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15235 // Do not remove the cast if the types differ in endian layout.
15237 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15238 // If the load is volatile, we only want to change the load type if the
15239 // resulting load is legal. Otherwise we might increase the number of
15240 // memory accesses. We don't care if the original type was legal or not
15241 // as we assume software couldn't rely on the number of accesses of an
15242 // illegal type.
15243 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15244 TLI.isOperationLegal(ISD::LOAD, VT))) {
15245 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15246
15247 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15248 *LN0->getMemOperand())) {
15249 SDValue Load =
15250 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15251 LN0->getMemOperand());
15252 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15253 return Load;
15254 }
15255 }
15256
15257 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15258 return V;
15259
15260 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15261 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15262 //
15263 // For ppc_fp128:
15264 // fold (bitcast (fneg x)) ->
15265 // flipbit = signbit
15266 // (xor (bitcast x) (build_pair flipbit, flipbit))
15267 //
15268 // fold (bitcast (fabs x)) ->
15269 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15270 // (xor (bitcast x) (build_pair flipbit, flipbit))
15271 // This often reduces constant pool loads.
15272 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15273 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15274 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15275 !N0.getValueType().isVector()) {
15276 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15277 AddToWorklist(NewConv.getNode());
15278
15279 SDLoc DL(N);
15280 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15281 assert(VT.getSizeInBits() == 128);
15282 SDValue SignBit = DAG.getConstant(
15283 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15284 SDValue FlipBit;
15285 if (N0.getOpcode() == ISD::FNEG) {
15286 FlipBit = SignBit;
15287 AddToWorklist(FlipBit.getNode());
15288 } else {
15289 assert(N0.getOpcode() == ISD::FABS);
15290 SDValue Hi =
15291 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15293 SDLoc(NewConv)));
15294 AddToWorklist(Hi.getNode());
15295 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15296 AddToWorklist(FlipBit.getNode());
15297 }
15298 SDValue FlipBits =
15299 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15300 AddToWorklist(FlipBits.getNode());
15301 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15302 }
15303 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15304 if (N0.getOpcode() == ISD::FNEG)
15305 return DAG.getNode(ISD::XOR, DL, VT,
15306 NewConv, DAG.getConstant(SignBit, DL, VT));
15307 assert(N0.getOpcode() == ISD::FABS);
15308 return DAG.getNode(ISD::AND, DL, VT,
15309 NewConv, DAG.getConstant(~SignBit, DL, VT));
15310 }
15311
15312 // fold (bitconvert (fcopysign cst, x)) ->
15313 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15314 // Note that we don't handle (copysign x, cst) because this can always be
15315 // folded to an fneg or fabs.
15316 //
15317 // For ppc_fp128:
15318 // fold (bitcast (fcopysign cst, x)) ->
15319 // flipbit = (and (extract_element
15320 // (xor (bitcast cst), (bitcast x)), 0),
15321 // signbit)
15322 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15323 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15324 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15325 !VT.isVector()) {
15326 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15327 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15328 if (isTypeLegal(IntXVT)) {
15329 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15330 AddToWorklist(X.getNode());
15331
15332 // If X has a different width than the result/lhs, sext it or truncate it.
15333 unsigned VTWidth = VT.getSizeInBits();
15334 if (OrigXWidth < VTWidth) {
15335 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15336 AddToWorklist(X.getNode());
15337 } else if (OrigXWidth > VTWidth) {
15338 // To get the sign bit in the right place, we have to shift it right
15339 // before truncating.
15340 SDLoc DL(X);
15341 X = DAG.getNode(ISD::SRL, DL,
15342 X.getValueType(), X,
15343 DAG.getConstant(OrigXWidth-VTWidth, DL,
15344 X.getValueType()));
15345 AddToWorklist(X.getNode());
15346 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15347 AddToWorklist(X.getNode());
15348 }
15349
15350 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15351 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15352 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15353 AddToWorklist(Cst.getNode());
15354 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15355 AddToWorklist(X.getNode());
15356 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15357 AddToWorklist(XorResult.getNode());
15358 SDValue XorResult64 = DAG.getNode(
15359 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15361 SDLoc(XorResult)));
15362 AddToWorklist(XorResult64.getNode());
15363 SDValue FlipBit =
15364 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15365 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15366 AddToWorklist(FlipBit.getNode());
15367 SDValue FlipBits =
15368 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15369 AddToWorklist(FlipBits.getNode());
15370 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15371 }
15372 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15373 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15374 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15375 AddToWorklist(X.getNode());
15376
15377 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15378 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15379 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15380 AddToWorklist(Cst.getNode());
15381
15382 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15383 }
15384 }
15385
15386 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15387 if (N0.getOpcode() == ISD::BUILD_PAIR)
15388 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15389 return CombineLD;
15390
15391 // Remove double bitcasts from shuffles - this is often a legacy of
15392 // XformToShuffleWithZero being used to combine bitmaskings (of
15393 // float vectors bitcast to integer vectors) into shuffles.
15394 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15395 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15396 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15399 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15400
15401 // If operands are a bitcast, peek through if it casts the original VT.
15402 // If operands are a constant, just bitcast back to original VT.
15403 auto PeekThroughBitcast = [&](SDValue Op) {
15404 if (Op.getOpcode() == ISD::BITCAST &&
15405 Op.getOperand(0).getValueType() == VT)
15406 return SDValue(Op.getOperand(0));
15407 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15408 return DAG.getBitcast(VT, Op);
15409 return SDValue();
15410 };
15411
15412 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15413 // the result type of this bitcast. This would eliminate at least one
15414 // bitcast. See the transform in InstCombine.
15415 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15416 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15417 if (!(SV0 && SV1))
15418 return SDValue();
15419
15420 int MaskScale =
15422 SmallVector<int, 8> NewMask;
15423 for (int M : SVN->getMask())
15424 for (int i = 0; i != MaskScale; ++i)
15425 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15426
15427 SDValue LegalShuffle =
15428 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15429 if (LegalShuffle)
15430 return LegalShuffle;
15431 }
15432
15433 return SDValue();
15434}
15435
15436SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15437 EVT VT = N->getValueType(0);
15438 return CombineConsecutiveLoads(N, VT);
15439}
15440
15441SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15442 SDValue N0 = N->getOperand(0);
15443
15444 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15445 return N0;
15446
15447 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
15448 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15449 // example https://reviews.llvm.org/D136529#4120959.
15450 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15451 return SDValue();
15452
15453 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15454 // Try to push freeze through instructions that propagate but don't produce
15455 // poison as far as possible. If an operand of freeze follows three
15456 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15457 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15458 // the freeze through to the operands that are not guaranteed non-poison.
15459 // NOTE: we will strip poison-generating flags, so ignore them here.
15460 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15461 /*ConsiderFlags*/ false) ||
15462 N0->getNumValues() != 1 || !N0->hasOneUse())
15463 return SDValue();
15464
15465 bool AllowMultipleMaybePoisonOperands =
15466 N0.getOpcode() == ISD::BUILD_VECTOR ||
15467 N0.getOpcode() == ISD::BUILD_PAIR ||
15470
15471 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15472 // ones" or "constant" into something that depends on FrozenUndef. We can
15473 // instead pick undef values to keep those properties, while at the same time
15474 // folding away the freeze.
15475 // If we implement a more general solution for folding away freeze(undef) in
15476 // the future, then this special handling can be removed.
15477 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15478 SDLoc DL(N0);
15479 EVT VT = N0.getValueType();
15481 return DAG.getAllOnesConstant(DL, VT);
15484 for (const SDValue &Op : N0->op_values())
15485 NewVecC.push_back(
15486 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15487 return DAG.getBuildVector(VT, DL, NewVecC);
15488 }
15489 }
15490
15491 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15492 for (SDValue Op : N0->ops()) {
15493 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15494 /*Depth*/ 1))
15495 continue;
15496 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15497 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15498 if (!HadMaybePoisonOperands)
15499 continue;
15500 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15501 // Multiple maybe-poison ops when not allowed - bail out.
15502 return SDValue();
15503 }
15504 }
15505 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15506 // it could create undef or poison due to it's poison-generating flags.
15507 // So not finding any maybe-poison operands is fine.
15508
15509 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15510 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15511 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15512 continue;
15513 // First, freeze each offending operand.
15514 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15515 // Then, change all other uses of unfrozen operand to use frozen operand.
15516 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15517 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15518 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15519 // But, that also updated the use in the freeze we just created, thus
15520 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15521 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15522 MaybePoisonOperand);
15523 }
15524 }
15525
15526 // This node has been merged with another.
15527 if (N->getOpcode() == ISD::DELETED_NODE)
15528 return SDValue(N, 0);
15529
15530 // The whole node may have been updated, so the value we were holding
15531 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15532 N0 = N->getOperand(0);
15533
15534 // Finally, recreate the node, it's operands were updated to use
15535 // frozen operands, so we just need to use it's "original" operands.
15536 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15537 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15538 for (SDValue &Op : Ops) {
15539 if (Op.getOpcode() == ISD::UNDEF)
15540 Op = DAG.getFreeze(Op);
15541 }
15542
15543 SDValue R;
15544 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
15545 // Special case handling for ShuffleVectorSDNode nodes.
15546 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
15547 SVN->getMask());
15548 } else {
15549 // NOTE: this strips poison generating flags.
15550 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15551 }
15552 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15553 "Can't create node that may be undef/poison!");
15554 return R;
15555}
15556
15557/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15558/// operands. DstEltVT indicates the destination element value type.
15559SDValue DAGCombiner::
15560ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15561 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15562
15563 // If this is already the right type, we're done.
15564 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15565
15566 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15567 unsigned DstBitSize = DstEltVT.getSizeInBits();
15568
15569 // If this is a conversion of N elements of one type to N elements of another
15570 // type, convert each element. This handles FP<->INT cases.
15571 if (SrcBitSize == DstBitSize) {
15573 for (SDValue Op : BV->op_values()) {
15574 // If the vector element type is not legal, the BUILD_VECTOR operands
15575 // are promoted and implicitly truncated. Make that explicit here.
15576 if (Op.getValueType() != SrcEltVT)
15577 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15578 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15579 AddToWorklist(Ops.back().getNode());
15580 }
15581 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15583 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15584 }
15585
15586 // Otherwise, we're growing or shrinking the elements. To avoid having to
15587 // handle annoying details of growing/shrinking FP values, we convert them to
15588 // int first.
15589 if (SrcEltVT.isFloatingPoint()) {
15590 // Convert the input float vector to a int vector where the elements are the
15591 // same sizes.
15592 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15593 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15594 SrcEltVT = IntVT;
15595 }
15596
15597 // Now we know the input is an integer vector. If the output is a FP type,
15598 // convert to integer first, then to FP of the right size.
15599 if (DstEltVT.isFloatingPoint()) {
15600 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15601 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15602
15603 // Next, convert to FP elements of the same size.
15604 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15605 }
15606
15607 // Okay, we know the src/dst types are both integers of differing types.
15608 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15609
15610 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15611 // BuildVectorSDNode?
15612 auto *BVN = cast<BuildVectorSDNode>(BV);
15613
15614 // Extract the constant raw bit data.
15615 BitVector UndefElements;
15616 SmallVector<APInt> RawBits;
15617 bool IsLE = DAG.getDataLayout().isLittleEndian();
15618 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15619 return SDValue();
15620
15621 SDLoc DL(BV);
15623 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15624 if (UndefElements[I])
15625 Ops.push_back(DAG.getUNDEF(DstEltVT));
15626 else
15627 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15628 }
15629
15630 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15631 return DAG.getBuildVector(VT, DL, Ops);
15632}
15633
15634// Returns true if floating point contraction is allowed on the FMUL-SDValue
15635// `N`
15637 assert(N.getOpcode() == ISD::FMUL);
15638
15639 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15640 N->getFlags().hasAllowContract();
15641}
15642
15643// Returns true if `N` can assume no infinities involved in its computation.
15645 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15646}
15647
15648/// Try to perform FMA combining on a given FADD node.
15649template <class MatchContextClass>
15650SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15651 SDValue N0 = N->getOperand(0);
15652 SDValue N1 = N->getOperand(1);
15653 EVT VT = N->getValueType(0);
15654 SDLoc SL(N);
15655 MatchContextClass matcher(DAG, TLI, N);
15656 const TargetOptions &Options = DAG.getTarget().Options;
15657
15658 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15659
15660 // Floating-point multiply-add with intermediate rounding.
15661 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15662 // FIXME: Add VP_FMAD opcode.
15663 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15664
15665 // Floating-point multiply-add without intermediate rounding.
15666 bool HasFMA =
15668 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15669
15670 // No valid opcode, do not combine.
15671 if (!HasFMAD && !HasFMA)
15672 return SDValue();
15673
15674 bool CanReassociate =
15675 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15676 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15677 Options.UnsafeFPMath || HasFMAD);
15678 // If the addition is not contractable, do not combine.
15679 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15680 return SDValue();
15681
15682 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15683 // beneficial. It does not reduce latency. It increases register pressure. It
15684 // replaces an fadd with an fma which is a more complex instruction, so is
15685 // likely to have a larger encoding, use more functional units, etc.
15686 if (N0 == N1)
15687 return SDValue();
15688
15689 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15690 return SDValue();
15691
15692 // Always prefer FMAD to FMA for precision.
15693 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15695
15696 auto isFusedOp = [&](SDValue N) {
15697 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15698 };
15699
15700 // Is the node an FMUL and contractable either due to global flags or
15701 // SDNodeFlags.
15702 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15703 if (!matcher.match(N, ISD::FMUL))
15704 return false;
15705 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15706 };
15707 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15708 // prefer to fold the multiply with fewer uses.
15710 if (N0->use_size() > N1->use_size())
15711 std::swap(N0, N1);
15712 }
15713
15714 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15715 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15716 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15717 N0.getOperand(1), N1);
15718 }
15719
15720 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15721 // Note: Commutes FADD operands.
15722 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15723 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15724 N1.getOperand(1), N0);
15725 }
15726
15727 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15728 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15729 // This also works with nested fma instructions:
15730 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15731 // fma A, B, (fma C, D, fma (E, F, G))
15732 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15733 // fma A, B, (fma C, D, fma (E, F, G)).
15734 // This requires reassociation because it changes the order of operations.
15735 if (CanReassociate) {
15736 SDValue FMA, E;
15737 if (isFusedOp(N0) && N0.hasOneUse()) {
15738 FMA = N0;
15739 E = N1;
15740 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15741 FMA = N1;
15742 E = N0;
15743 }
15744
15745 SDValue TmpFMA = FMA;
15746 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15747 SDValue FMul = TmpFMA->getOperand(2);
15748 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15749 SDValue C = FMul.getOperand(0);
15750 SDValue D = FMul.getOperand(1);
15751 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15753 // Replacing the inner FMul could cause the outer FMA to be simplified
15754 // away.
15755 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15756 }
15757
15758 TmpFMA = TmpFMA->getOperand(2);
15759 }
15760 }
15761
15762 // Look through FP_EXTEND nodes to do more combining.
15763
15764 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15765 if (matcher.match(N0, ISD::FP_EXTEND)) {
15766 SDValue N00 = N0.getOperand(0);
15767 if (isContractableFMUL(N00) &&
15768 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15769 N00.getValueType())) {
15770 return matcher.getNode(
15771 PreferredFusedOpcode, SL, VT,
15772 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15773 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15774 }
15775 }
15776
15777 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15778 // Note: Commutes FADD operands.
15779 if (matcher.match(N1, ISD::FP_EXTEND)) {
15780 SDValue N10 = N1.getOperand(0);
15781 if (isContractableFMUL(N10) &&
15782 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15783 N10.getValueType())) {
15784 return matcher.getNode(
15785 PreferredFusedOpcode, SL, VT,
15786 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15787 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15788 }
15789 }
15790
15791 // More folding opportunities when target permits.
15792 if (Aggressive) {
15793 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15794 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15795 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15796 SDValue Z) {
15797 return matcher.getNode(
15798 PreferredFusedOpcode, SL, VT, X, Y,
15799 matcher.getNode(PreferredFusedOpcode, SL, VT,
15800 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15801 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15802 };
15803 if (isFusedOp(N0)) {
15804 SDValue N02 = N0.getOperand(2);
15805 if (matcher.match(N02, ISD::FP_EXTEND)) {
15806 SDValue N020 = N02.getOperand(0);
15807 if (isContractableFMUL(N020) &&
15808 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15809 N020.getValueType())) {
15810 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15811 N020.getOperand(0), N020.getOperand(1),
15812 N1);
15813 }
15814 }
15815 }
15816
15817 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15818 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15819 // FIXME: This turns two single-precision and one double-precision
15820 // operation into two double-precision operations, which might not be
15821 // interesting for all targets, especially GPUs.
15822 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15823 SDValue Z) {
15824 return matcher.getNode(
15825 PreferredFusedOpcode, SL, VT,
15826 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15827 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15828 matcher.getNode(PreferredFusedOpcode, SL, VT,
15829 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15830 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15831 };
15832 if (N0.getOpcode() == ISD::FP_EXTEND) {
15833 SDValue N00 = N0.getOperand(0);
15834 if (isFusedOp(N00)) {
15835 SDValue N002 = N00.getOperand(2);
15836 if (isContractableFMUL(N002) &&
15837 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15838 N00.getValueType())) {
15839 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15840 N002.getOperand(0), N002.getOperand(1),
15841 N1);
15842 }
15843 }
15844 }
15845
15846 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15847 // -> (fma y, z, (fma (fpext u), (fpext v), x))
15848 if (isFusedOp(N1)) {
15849 SDValue N12 = N1.getOperand(2);
15850 if (N12.getOpcode() == ISD::FP_EXTEND) {
15851 SDValue N120 = N12.getOperand(0);
15852 if (isContractableFMUL(N120) &&
15853 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15854 N120.getValueType())) {
15855 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15856 N120.getOperand(0), N120.getOperand(1),
15857 N0);
15858 }
15859 }
15860 }
15861
15862 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15863 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15864 // FIXME: This turns two single-precision and one double-precision
15865 // operation into two double-precision operations, which might not be
15866 // interesting for all targets, especially GPUs.
15867 if (N1.getOpcode() == ISD::FP_EXTEND) {
15868 SDValue N10 = N1.getOperand(0);
15869 if (isFusedOp(N10)) {
15870 SDValue N102 = N10.getOperand(2);
15871 if (isContractableFMUL(N102) &&
15872 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15873 N10.getValueType())) {
15874 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15875 N102.getOperand(0), N102.getOperand(1),
15876 N0);
15877 }
15878 }
15879 }
15880 }
15881
15882 return SDValue();
15883}
15884
15885/// Try to perform FMA combining on a given FSUB node.
15886template <class MatchContextClass>
15887SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
15888 SDValue N0 = N->getOperand(0);
15889 SDValue N1 = N->getOperand(1);
15890 EVT VT = N->getValueType(0);
15891 SDLoc SL(N);
15892 MatchContextClass matcher(DAG, TLI, N);
15893 const TargetOptions &Options = DAG.getTarget().Options;
15894
15895 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15896
15897 // Floating-point multiply-add with intermediate rounding.
15898 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15899 // FIXME: Add VP_FMAD opcode.
15900 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15901
15902 // Floating-point multiply-add without intermediate rounding.
15903 bool HasFMA =
15905 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15906
15907 // No valid opcode, do not combine.
15908 if (!HasFMAD && !HasFMA)
15909 return SDValue();
15910
15911 const SDNodeFlags Flags = N->getFlags();
15912 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15913 Options.UnsafeFPMath || HasFMAD);
15914
15915 // If the subtraction is not contractable, do not combine.
15916 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15917 return SDValue();
15918
15919 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15920 return SDValue();
15921
15922 // Always prefer FMAD to FMA for precision.
15923 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15925 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15926
15927 // Is the node an FMUL and contractable either due to global flags or
15928 // SDNodeFlags.
15929 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15930 if (!matcher.match(N, ISD::FMUL))
15931 return false;
15932 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15933 };
15934
15935 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15936 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15937 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15938 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15939 XY.getOperand(1),
15940 matcher.getNode(ISD::FNEG, SL, VT, Z));
15941 }
15942 return SDValue();
15943 };
15944
15945 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15946 // Note: Commutes FSUB operands.
15947 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15948 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15949 return matcher.getNode(
15950 PreferredFusedOpcode, SL, VT,
15951 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15952 YZ.getOperand(1), X);
15953 }
15954 return SDValue();
15955 };
15956
15957 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15958 // prefer to fold the multiply with fewer uses.
15959 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15960 (N0->use_size() > N1->use_size())) {
15961 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15962 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15963 return V;
15964 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15965 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15966 return V;
15967 } else {
15968 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15969 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15970 return V;
15971 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15972 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15973 return V;
15974 }
15975
15976 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15977 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
15978 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15979 SDValue N00 = N0.getOperand(0).getOperand(0);
15980 SDValue N01 = N0.getOperand(0).getOperand(1);
15981 return matcher.getNode(PreferredFusedOpcode, SL, VT,
15982 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
15983 matcher.getNode(ISD::FNEG, SL, VT, N1));
15984 }
15985
15986 // Look through FP_EXTEND nodes to do more combining.
15987
15988 // fold (fsub (fpext (fmul x, y)), z)
15989 // -> (fma (fpext x), (fpext y), (fneg z))
15990 if (matcher.match(N0, ISD::FP_EXTEND)) {
15991 SDValue N00 = N0.getOperand(0);
15992 if (isContractableFMUL(N00) &&
15993 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15994 N00.getValueType())) {
15995 return matcher.getNode(
15996 PreferredFusedOpcode, SL, VT,
15997 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15998 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15999 matcher.getNode(ISD::FNEG, SL, VT, N1));
16000 }
16001 }
16002
16003 // fold (fsub x, (fpext (fmul y, z)))
16004 // -> (fma (fneg (fpext y)), (fpext z), x)
16005 // Note: Commutes FSUB operands.
16006 if (matcher.match(N1, ISD::FP_EXTEND)) {
16007 SDValue N10 = N1.getOperand(0);
16008 if (isContractableFMUL(N10) &&
16009 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16010 N10.getValueType())) {
16011 return matcher.getNode(
16012 PreferredFusedOpcode, SL, VT,
16013 matcher.getNode(
16014 ISD::FNEG, SL, VT,
16015 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16016 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16017 }
16018 }
16019
16020 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16021 // -> (fneg (fma (fpext x), (fpext y), z))
16022 // Note: This could be removed with appropriate canonicalization of the
16023 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16024 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16025 // from implementing the canonicalization in visitFSUB.
16026 if (matcher.match(N0, ISD::FP_EXTEND)) {
16027 SDValue N00 = N0.getOperand(0);
16028 if (matcher.match(N00, ISD::FNEG)) {
16029 SDValue N000 = N00.getOperand(0);
16030 if (isContractableFMUL(N000) &&
16031 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16032 N00.getValueType())) {
16033 return matcher.getNode(
16034 ISD::FNEG, SL, VT,
16035 matcher.getNode(
16036 PreferredFusedOpcode, SL, VT,
16037 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16038 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16039 N1));
16040 }
16041 }
16042 }
16043
16044 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16045 // -> (fneg (fma (fpext x)), (fpext y), z)
16046 // Note: This could be removed with appropriate canonicalization of the
16047 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16048 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16049 // from implementing the canonicalization in visitFSUB.
16050 if (matcher.match(N0, ISD::FNEG)) {
16051 SDValue N00 = N0.getOperand(0);
16052 if (matcher.match(N00, ISD::FP_EXTEND)) {
16053 SDValue N000 = N00.getOperand(0);
16054 if (isContractableFMUL(N000) &&
16055 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16056 N000.getValueType())) {
16057 return matcher.getNode(
16058 ISD::FNEG, SL, VT,
16059 matcher.getNode(
16060 PreferredFusedOpcode, SL, VT,
16061 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16062 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16063 N1));
16064 }
16065 }
16066 }
16067
16068 auto isReassociable = [&Options](SDNode *N) {
16069 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16070 };
16071
16072 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16073 &isReassociable](SDValue N) {
16074 return isContractableFMUL(N) && isReassociable(N.getNode());
16075 };
16076
16077 auto isFusedOp = [&](SDValue N) {
16078 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16079 };
16080
16081 // More folding opportunities when target permits.
16082 if (Aggressive && isReassociable(N)) {
16083 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16084 // fold (fsub (fma x, y, (fmul u, v)), z)
16085 // -> (fma x, y (fma u, v, (fneg z)))
16086 if (CanFuse && isFusedOp(N0) &&
16087 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16088 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16089 return matcher.getNode(
16090 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16091 matcher.getNode(PreferredFusedOpcode, SL, VT,
16092 N0.getOperand(2).getOperand(0),
16093 N0.getOperand(2).getOperand(1),
16094 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16095 }
16096
16097 // fold (fsub x, (fma y, z, (fmul u, v)))
16098 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16099 if (CanFuse && isFusedOp(N1) &&
16100 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16101 N1->hasOneUse() && NoSignedZero) {
16102 SDValue N20 = N1.getOperand(2).getOperand(0);
16103 SDValue N21 = N1.getOperand(2).getOperand(1);
16104 return matcher.getNode(
16105 PreferredFusedOpcode, SL, VT,
16106 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16107 N1.getOperand(1),
16108 matcher.getNode(PreferredFusedOpcode, SL, VT,
16109 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16110 }
16111
16112 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16113 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16114 if (isFusedOp(N0) && N0->hasOneUse()) {
16115 SDValue N02 = N0.getOperand(2);
16116 if (matcher.match(N02, ISD::FP_EXTEND)) {
16117 SDValue N020 = N02.getOperand(0);
16118 if (isContractableAndReassociableFMUL(N020) &&
16119 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16120 N020.getValueType())) {
16121 return matcher.getNode(
16122 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16123 matcher.getNode(
16124 PreferredFusedOpcode, SL, VT,
16125 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16126 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16127 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16128 }
16129 }
16130 }
16131
16132 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16133 // -> (fma (fpext x), (fpext y),
16134 // (fma (fpext u), (fpext v), (fneg z)))
16135 // FIXME: This turns two single-precision and one double-precision
16136 // operation into two double-precision operations, which might not be
16137 // interesting for all targets, especially GPUs.
16138 if (matcher.match(N0, ISD::FP_EXTEND)) {
16139 SDValue N00 = N0.getOperand(0);
16140 if (isFusedOp(N00)) {
16141 SDValue N002 = N00.getOperand(2);
16142 if (isContractableAndReassociableFMUL(N002) &&
16143 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16144 N00.getValueType())) {
16145 return matcher.getNode(
16146 PreferredFusedOpcode, SL, VT,
16147 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16148 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16149 matcher.getNode(
16150 PreferredFusedOpcode, SL, VT,
16151 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16152 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16153 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16154 }
16155 }
16156 }
16157
16158 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16159 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16160 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16161 N1->hasOneUse()) {
16162 SDValue N120 = N1.getOperand(2).getOperand(0);
16163 if (isContractableAndReassociableFMUL(N120) &&
16164 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16165 N120.getValueType())) {
16166 SDValue N1200 = N120.getOperand(0);
16167 SDValue N1201 = N120.getOperand(1);
16168 return matcher.getNode(
16169 PreferredFusedOpcode, SL, VT,
16170 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16171 N1.getOperand(1),
16172 matcher.getNode(
16173 PreferredFusedOpcode, SL, VT,
16174 matcher.getNode(ISD::FNEG, SL, VT,
16175 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16176 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16177 }
16178 }
16179
16180 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16181 // -> (fma (fneg (fpext y)), (fpext z),
16182 // (fma (fneg (fpext u)), (fpext v), x))
16183 // FIXME: This turns two single-precision and one double-precision
16184 // operation into two double-precision operations, which might not be
16185 // interesting for all targets, especially GPUs.
16186 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16187 SDValue CvtSrc = N1.getOperand(0);
16188 SDValue N100 = CvtSrc.getOperand(0);
16189 SDValue N101 = CvtSrc.getOperand(1);
16190 SDValue N102 = CvtSrc.getOperand(2);
16191 if (isContractableAndReassociableFMUL(N102) &&
16192 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16193 CvtSrc.getValueType())) {
16194 SDValue N1020 = N102.getOperand(0);
16195 SDValue N1021 = N102.getOperand(1);
16196 return matcher.getNode(
16197 PreferredFusedOpcode, SL, VT,
16198 matcher.getNode(ISD::FNEG, SL, VT,
16199 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16200 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16201 matcher.getNode(
16202 PreferredFusedOpcode, SL, VT,
16203 matcher.getNode(ISD::FNEG, SL, VT,
16204 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16205 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16206 }
16207 }
16208 }
16209
16210 return SDValue();
16211}
16212
16213/// Try to perform FMA combining on a given FMUL node based on the distributive
16214/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16215/// subtraction instead of addition).
16216SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16217 SDValue N0 = N->getOperand(0);
16218 SDValue N1 = N->getOperand(1);
16219 EVT VT = N->getValueType(0);
16220 SDLoc SL(N);
16221
16222 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16223
16224 const TargetOptions &Options = DAG.getTarget().Options;
16225
16226 // The transforms below are incorrect when x == 0 and y == inf, because the
16227 // intermediate multiplication produces a nan.
16228 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16229 if (!hasNoInfs(Options, FAdd))
16230 return SDValue();
16231
16232 // Floating-point multiply-add without intermediate rounding.
16233 bool HasFMA =
16236 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16237
16238 // Floating-point multiply-add with intermediate rounding. This can result
16239 // in a less precise result due to the changed rounding order.
16240 bool HasFMAD = Options.UnsafeFPMath &&
16241 (LegalOperations && TLI.isFMADLegal(DAG, N));
16242
16243 // No valid opcode, do not combine.
16244 if (!HasFMAD && !HasFMA)
16245 return SDValue();
16246
16247 // Always prefer FMAD to FMA for precision.
16248 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16250
16251 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16252 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16253 auto FuseFADD = [&](SDValue X, SDValue Y) {
16254 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16255 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16256 if (C->isExactlyValue(+1.0))
16257 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16258 Y);
16259 if (C->isExactlyValue(-1.0))
16260 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16261 DAG.getNode(ISD::FNEG, SL, VT, Y));
16262 }
16263 }
16264 return SDValue();
16265 };
16266
16267 if (SDValue FMA = FuseFADD(N0, N1))
16268 return FMA;
16269 if (SDValue FMA = FuseFADD(N1, N0))
16270 return FMA;
16271
16272 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16273 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16274 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16275 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16276 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16277 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16278 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16279 if (C0->isExactlyValue(+1.0))
16280 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16281 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16282 Y);
16283 if (C0->isExactlyValue(-1.0))
16284 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16285 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16286 DAG.getNode(ISD::FNEG, SL, VT, Y));
16287 }
16288 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16289 if (C1->isExactlyValue(+1.0))
16290 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16291 DAG.getNode(ISD::FNEG, SL, VT, Y));
16292 if (C1->isExactlyValue(-1.0))
16293 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16294 Y);
16295 }
16296 }
16297 return SDValue();
16298 };
16299
16300 if (SDValue FMA = FuseFSUB(N0, N1))
16301 return FMA;
16302 if (SDValue FMA = FuseFSUB(N1, N0))
16303 return FMA;
16304
16305 return SDValue();
16306}
16307
16308SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16309 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16310
16311 // FADD -> FMA combines:
16312 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16313 if (Fused.getOpcode() != ISD::DELETED_NODE)
16314 AddToWorklist(Fused.getNode());
16315 return Fused;
16316 }
16317 return SDValue();
16318}
16319
16320SDValue DAGCombiner::visitFADD(SDNode *N) {
16321 SDValue N0 = N->getOperand(0);
16322 SDValue N1 = N->getOperand(1);
16325 EVT VT = N->getValueType(0);
16326 SDLoc DL(N);
16327 const TargetOptions &Options = DAG.getTarget().Options;
16328 SDNodeFlags Flags = N->getFlags();
16329 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16330
16331 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16332 return R;
16333
16334 // fold (fadd c1, c2) -> c1 + c2
16335 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16336 return C;
16337
16338 // canonicalize constant to RHS
16339 if (N0CFP && !N1CFP)
16340 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16341
16342 // fold vector ops
16343 if (VT.isVector())
16344 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16345 return FoldedVOp;
16346
16347 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16348 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16349 if (N1C && N1C->isZero())
16350 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16351 return N0;
16352
16353 if (SDValue NewSel = foldBinOpIntoSelect(N))
16354 return NewSel;
16355
16356 // fold (fadd A, (fneg B)) -> (fsub A, B)
16357 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16358 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16359 N1, DAG, LegalOperations, ForCodeSize))
16360 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16361
16362 // fold (fadd (fneg A), B) -> (fsub B, A)
16363 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16364 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16365 N0, DAG, LegalOperations, ForCodeSize))
16366 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16367
16368 auto isFMulNegTwo = [](SDValue FMul) {
16369 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16370 return false;
16371 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16372 return C && C->isExactlyValue(-2.0);
16373 };
16374
16375 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16376 if (isFMulNegTwo(N0)) {
16377 SDValue B = N0.getOperand(0);
16378 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16379 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16380 }
16381 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16382 if (isFMulNegTwo(N1)) {
16383 SDValue B = N1.getOperand(0);
16384 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16385 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16386 }
16387
16388 // No FP constant should be created after legalization as Instruction
16389 // Selection pass has a hard time dealing with FP constants.
16390 bool AllowNewConst = (Level < AfterLegalizeDAG);
16391
16392 // If nnan is enabled, fold lots of things.
16393 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16394 // If allowed, fold (fadd (fneg x), x) -> 0.0
16395 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16396 return DAG.getConstantFP(0.0, DL, VT);
16397
16398 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16399 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16400 return DAG.getConstantFP(0.0, DL, VT);
16401 }
16402
16403 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16404 // TODO: break out portions of the transformations below for which Unsafe is
16405 // considered and which do not require both nsz and reassoc
16406 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16407 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16408 AllowNewConst) {
16409 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16410 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16412 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16413 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16414 }
16415
16416 // We can fold chains of FADD's of the same value into multiplications.
16417 // This transform is not safe in general because we are reducing the number
16418 // of rounding steps.
16419 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16420 if (N0.getOpcode() == ISD::FMUL) {
16421 SDNode *CFP00 =
16423 SDNode *CFP01 =
16425
16426 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16427 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16428 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16429 DAG.getConstantFP(1.0, DL, VT));
16430 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16431 }
16432
16433 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16434 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16435 N1.getOperand(0) == N1.getOperand(1) &&
16436 N0.getOperand(0) == N1.getOperand(0)) {
16437 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16438 DAG.getConstantFP(2.0, DL, VT));
16439 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16440 }
16441 }
16442
16443 if (N1.getOpcode() == ISD::FMUL) {
16444 SDNode *CFP10 =
16446 SDNode *CFP11 =
16448
16449 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16450 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16451 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16452 DAG.getConstantFP(1.0, DL, VT));
16453 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16454 }
16455
16456 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16457 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16458 N0.getOperand(0) == N0.getOperand(1) &&
16459 N1.getOperand(0) == N0.getOperand(0)) {
16460 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16461 DAG.getConstantFP(2.0, DL, VT));
16462 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16463 }
16464 }
16465
16466 if (N0.getOpcode() == ISD::FADD) {
16467 SDNode *CFP00 =
16469 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16470 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16471 (N0.getOperand(0) == N1)) {
16472 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16473 DAG.getConstantFP(3.0, DL, VT));
16474 }
16475 }
16476
16477 if (N1.getOpcode() == ISD::FADD) {
16478 SDNode *CFP10 =
16480 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16481 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16482 N1.getOperand(0) == N0) {
16483 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16484 DAG.getConstantFP(3.0, DL, VT));
16485 }
16486 }
16487
16488 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16489 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16490 N0.getOperand(0) == N0.getOperand(1) &&
16491 N1.getOperand(0) == N1.getOperand(1) &&
16492 N0.getOperand(0) == N1.getOperand(0)) {
16493 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16494 DAG.getConstantFP(4.0, DL, VT));
16495 }
16496 }
16497
16498 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16499 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16500 VT, N0, N1, Flags))
16501 return SD;
16502 } // enable-unsafe-fp-math
16503
16504 // FADD -> FMA combines:
16505 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16506 if (Fused.getOpcode() != ISD::DELETED_NODE)
16507 AddToWorklist(Fused.getNode());
16508 return Fused;
16509 }
16510 return SDValue();
16511}
16512
16513SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16514 SDValue Chain = N->getOperand(0);
16515 SDValue N0 = N->getOperand(1);
16516 SDValue N1 = N->getOperand(2);
16517 EVT VT = N->getValueType(0);
16518 EVT ChainVT = N->getValueType(1);
16519 SDLoc DL(N);
16520 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16521
16522 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16523 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16524 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16525 N1, DAG, LegalOperations, ForCodeSize)) {
16526 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16527 {Chain, N0, NegN1});
16528 }
16529
16530 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16531 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16532 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16533 N0, DAG, LegalOperations, ForCodeSize)) {
16534 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16535 {Chain, N1, NegN0});
16536 }
16537 return SDValue();
16538}
16539
16540SDValue DAGCombiner::visitFSUB(SDNode *N) {
16541 SDValue N0 = N->getOperand(0);
16542 SDValue N1 = N->getOperand(1);
16543 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16544 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16545 EVT VT = N->getValueType(0);
16546 SDLoc DL(N);
16547 const TargetOptions &Options = DAG.getTarget().Options;
16548 const SDNodeFlags Flags = N->getFlags();
16549 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16550
16551 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16552 return R;
16553
16554 // fold (fsub c1, c2) -> c1-c2
16555 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16556 return C;
16557
16558 // fold vector ops
16559 if (VT.isVector())
16560 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16561 return FoldedVOp;
16562
16563 if (SDValue NewSel = foldBinOpIntoSelect(N))
16564 return NewSel;
16565
16566 // (fsub A, 0) -> A
16567 if (N1CFP && N1CFP->isZero()) {
16568 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16569 Flags.hasNoSignedZeros()) {
16570 return N0;
16571 }
16572 }
16573
16574 if (N0 == N1) {
16575 // (fsub x, x) -> 0.0
16576 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16577 return DAG.getConstantFP(0.0f, DL, VT);
16578 }
16579
16580 // (fsub -0.0, N1) -> -N1
16581 if (N0CFP && N0CFP->isZero()) {
16582 if (N0CFP->isNegative() ||
16583 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16584 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16585 // flushed to zero, unless all users treat denorms as zero (DAZ).
16586 // FIXME: This transform will change the sign of a NaN and the behavior
16587 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16588 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16589 if (DenormMode == DenormalMode::getIEEE()) {
16590 if (SDValue NegN1 =
16591 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16592 return NegN1;
16593 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16594 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16595 }
16596 }
16597 }
16598
16599 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16600 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16601 N1.getOpcode() == ISD::FADD) {
16602 // X - (X + Y) -> -Y
16603 if (N0 == N1->getOperand(0))
16604 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16605 // X - (Y + X) -> -Y
16606 if (N0 == N1->getOperand(1))
16607 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16608 }
16609
16610 // fold (fsub A, (fneg B)) -> (fadd A, B)
16611 if (SDValue NegN1 =
16612 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16613 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16614
16615 // FSUB -> FMA combines:
16616 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16617 AddToWorklist(Fused.getNode());
16618 return Fused;
16619 }
16620
16621 return SDValue();
16622}
16623
16624// Transform IEEE Floats:
16625// (fmul C, (uitofp Pow2))
16626// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16627// (fdiv C, (uitofp Pow2))
16628// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16629//
16630// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16631// there is no need for more than an add/sub.
16632//
16633// This is valid under the following circumstances:
16634// 1) We are dealing with IEEE floats
16635// 2) C is normal
16636// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16637// TODO: Much of this could also be used for generating `ldexp` on targets the
16638// prefer it.
16639SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16640 EVT VT = N->getValueType(0);
16641 SDValue ConstOp, Pow2Op;
16642
16643 std::optional<int> Mantissa;
16644 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16645 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16646 return false;
16647
16648 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16649 Pow2Op = N->getOperand(1 - ConstOpIdx);
16650 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16651 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16652 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16653 return false;
16654
16655 Pow2Op = Pow2Op.getOperand(0);
16656
16657 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16658 // TODO: We could use knownbits to make this bound more precise.
16659 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16660
16661 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16662 if (CFP == nullptr)
16663 return false;
16664
16665 const APFloat &APF = CFP->getValueAPF();
16666
16667 // Make sure we have normal/ieee constant.
16668 if (!APF.isNormal() || !APF.isIEEE())
16669 return false;
16670
16671 // Make sure the floats exponent is within the bounds that this transform
16672 // produces bitwise equals value.
16673 int CurExp = ilogb(APF);
16674 // FMul by pow2 will only increase exponent.
16675 int MinExp =
16676 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16677 // FDiv by pow2 will only decrease exponent.
16678 int MaxExp =
16679 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16680 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16682 return false;
16683
16684 // Finally make sure we actually know the mantissa for the float type.
16685 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16686 if (!Mantissa)
16687 Mantissa = ThisMantissa;
16688
16689 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16690 };
16691
16692 // TODO: We may be able to include undefs.
16693 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16694 };
16695
16696 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16697 return SDValue();
16698
16699 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16700 return SDValue();
16701
16702 // Get log2 after all other checks have taken place. This is because
16703 // BuildLogBase2 may create a new node.
16704 SDLoc DL(N);
16705 // Get Log2 type with same bitwidth as the float type (VT).
16706 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16707 if (VT.isVector())
16708 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16710
16711 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16712 /*InexpensiveOnly*/ true, NewIntVT);
16713 if (!Log2)
16714 return SDValue();
16715
16716 // Perform actual transform.
16717 SDValue MantissaShiftCnt =
16718 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16719 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16720 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16721 // cast. We could implement that by handle here to handle the casts.
16722 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16723 SDValue ResAsInt =
16724 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16725 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16726 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16727 return ResAsFP;
16728}
16729
16730SDValue DAGCombiner::visitFMUL(SDNode *N) {
16731 SDValue N0 = N->getOperand(0);
16732 SDValue N1 = N->getOperand(1);
16733 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16734 EVT VT = N->getValueType(0);
16735 SDLoc DL(N);
16736 const TargetOptions &Options = DAG.getTarget().Options;
16737 const SDNodeFlags Flags = N->getFlags();
16738 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16739
16740 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16741 return R;
16742
16743 // fold (fmul c1, c2) -> c1*c2
16744 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16745 return C;
16746
16747 // canonicalize constant to RHS
16750 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16751
16752 // fold vector ops
16753 if (VT.isVector())
16754 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16755 return FoldedVOp;
16756
16757 if (SDValue NewSel = foldBinOpIntoSelect(N))
16758 return NewSel;
16759
16760 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16761 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16763 N0.getOpcode() == ISD::FMUL) {
16764 SDValue N00 = N0.getOperand(0);
16765 SDValue N01 = N0.getOperand(1);
16766 // Avoid an infinite loop by making sure that N00 is not a constant
16767 // (the inner multiply has not been constant folded yet).
16770 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16771 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16772 }
16773 }
16774
16775 // Match a special-case: we convert X * 2.0 into fadd.
16776 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16777 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16778 N0.getOperand(0) == N0.getOperand(1)) {
16779 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16780 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16781 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16782 }
16783
16784 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16785 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16786 VT, N0, N1, Flags))
16787 return SD;
16788 }
16789
16790 // fold (fmul X, 2.0) -> (fadd X, X)
16791 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16792 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16793
16794 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16795 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16796 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16797 return DAG.getNode(ISD::FSUB, DL, VT,
16798 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16799 }
16800 }
16801
16802 // -N0 * -N1 --> N0 * N1
16807 SDValue NegN0 =
16808 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16809 if (NegN0) {
16810 HandleSDNode NegN0Handle(NegN0);
16811 SDValue NegN1 =
16812 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16813 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16815 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16816 }
16817
16818 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16819 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16820 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16821 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16822 TLI.isOperationLegal(ISD::FABS, VT)) {
16823 SDValue Select = N0, X = N1;
16824 if (Select.getOpcode() != ISD::SELECT)
16825 std::swap(Select, X);
16826
16827 SDValue Cond = Select.getOperand(0);
16828 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16829 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16830
16831 if (TrueOpnd && FalseOpnd &&
16832 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16833 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16834 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16835 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16836 switch (CC) {
16837 default: break;
16838 case ISD::SETOLT:
16839 case ISD::SETULT:
16840 case ISD::SETOLE:
16841 case ISD::SETULE:
16842 case ISD::SETLT:
16843 case ISD::SETLE:
16844 std::swap(TrueOpnd, FalseOpnd);
16845 [[fallthrough]];
16846 case ISD::SETOGT:
16847 case ISD::SETUGT:
16848 case ISD::SETOGE:
16849 case ISD::SETUGE:
16850 case ISD::SETGT:
16851 case ISD::SETGE:
16852 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16853 TLI.isOperationLegal(ISD::FNEG, VT))
16854 return DAG.getNode(ISD::FNEG, DL, VT,
16855 DAG.getNode(ISD::FABS, DL, VT, X));
16856 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16857 return DAG.getNode(ISD::FABS, DL, VT, X);
16858
16859 break;
16860 }
16861 }
16862 }
16863
16864 // FMUL -> FMA combines:
16865 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16866 AddToWorklist(Fused.getNode());
16867 return Fused;
16868 }
16869
16870 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16871 // able to run.
16872 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16873 return R;
16874
16875 return SDValue();
16876}
16877
16878template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16879 SDValue N0 = N->getOperand(0);
16880 SDValue N1 = N->getOperand(1);
16881 SDValue N2 = N->getOperand(2);
16882 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16883 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16884 EVT VT = N->getValueType(0);
16885 SDLoc DL(N);
16886 const TargetOptions &Options = DAG.getTarget().Options;
16887 // FMA nodes have flags that propagate to the created nodes.
16888 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16889 MatchContextClass matcher(DAG, TLI, N);
16890
16891 bool CanReassociate =
16892 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16893
16894 // Constant fold FMA.
16895 if (isa<ConstantFPSDNode>(N0) &&
16896 isa<ConstantFPSDNode>(N1) &&
16897 isa<ConstantFPSDNode>(N2)) {
16898 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
16899 }
16900
16901 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
16906 SDValue NegN0 =
16907 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16908 if (NegN0) {
16909 HandleSDNode NegN0Handle(NegN0);
16910 SDValue NegN1 =
16911 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16912 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16914 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
16915 }
16916
16917 // FIXME: use fast math flags instead of Options.UnsafeFPMath
16918 if (Options.UnsafeFPMath) {
16919 if (N0CFP && N0CFP->isZero())
16920 return N2;
16921 if (N1CFP && N1CFP->isZero())
16922 return N2;
16923 }
16924
16925 // FIXME: Support splat of constant.
16926 if (N0CFP && N0CFP->isExactlyValue(1.0))
16927 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
16928 if (N1CFP && N1CFP->isExactlyValue(1.0))
16929 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
16930
16931 // Canonicalize (fma c, x, y) -> (fma x, c, y)
16934 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
16935
16936 if (CanReassociate) {
16937 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
16938 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
16941 return matcher.getNode(
16942 ISD::FMUL, DL, VT, N0,
16943 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
16944 }
16945
16946 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
16947 if (matcher.match(N0, ISD::FMUL) &&
16950 return matcher.getNode(
16951 ISD::FMA, DL, VT, N0.getOperand(0),
16952 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
16953 }
16954 }
16955
16956 // (fma x, -1, y) -> (fadd (fneg x), y)
16957 // FIXME: Support splat of constant.
16958 if (N1CFP) {
16959 if (N1CFP->isExactlyValue(1.0))
16960 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
16961
16962 if (N1CFP->isExactlyValue(-1.0) &&
16963 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
16964 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
16965 AddToWorklist(RHSNeg.getNode());
16966 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
16967 }
16968
16969 // fma (fneg x), K, y -> fma x -K, y
16970 if (matcher.match(N0, ISD::FNEG) &&
16972 (N1.hasOneUse() &&
16973 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
16974 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
16975 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
16976 }
16977 }
16978
16979 // FIXME: Support splat of constant.
16980 if (CanReassociate) {
16981 // (fma x, c, x) -> (fmul x, (c+1))
16982 if (N1CFP && N0 == N2) {
16983 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16984 matcher.getNode(ISD::FADD, DL, VT, N1,
16985 DAG.getConstantFP(1.0, DL, VT)));
16986 }
16987
16988 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
16989 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
16990 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16991 matcher.getNode(ISD::FADD, DL, VT, N1,
16992 DAG.getConstantFP(-1.0, DL, VT)));
16993 }
16994 }
16995
16996 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
16997 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
16998 if (!TLI.isFNegFree(VT))
17000 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17001 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17002 return SDValue();
17003}
17004
17005SDValue DAGCombiner::visitFMAD(SDNode *N) {
17006 SDValue N0 = N->getOperand(0);
17007 SDValue N1 = N->getOperand(1);
17008 SDValue N2 = N->getOperand(2);
17009 EVT VT = N->getValueType(0);
17010 SDLoc DL(N);
17011
17012 // Constant fold FMAD.
17013 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17014 isa<ConstantFPSDNode>(N2))
17015 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17016
17017 return SDValue();
17018}
17019
17020// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17021// reciprocal.
17022// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17023// Notice that this is not always beneficial. One reason is different targets
17024// may have different costs for FDIV and FMUL, so sometimes the cost of two
17025// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17026// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17027SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17028 // TODO: Limit this transform based on optsize/minsize - it always creates at
17029 // least 1 extra instruction. But the perf win may be substantial enough
17030 // that only minsize should restrict this.
17031 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17032 const SDNodeFlags Flags = N->getFlags();
17033 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17034 return SDValue();
17035
17036 // Skip if current node is a reciprocal/fneg-reciprocal.
17037 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17038 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17039 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17040 return SDValue();
17041
17042 // Exit early if the target does not want this transform or if there can't
17043 // possibly be enough uses of the divisor to make the transform worthwhile.
17044 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17045
17046 // For splat vectors, scale the number of uses by the splat factor. If we can
17047 // convert the division into a scalar op, that will likely be much faster.
17048 unsigned NumElts = 1;
17049 EVT VT = N->getValueType(0);
17050 if (VT.isVector() && DAG.isSplatValue(N1))
17051 NumElts = VT.getVectorMinNumElements();
17052
17053 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17054 return SDValue();
17055
17056 // Find all FDIV users of the same divisor.
17057 // Use a set because duplicates may be present in the user list.
17059 for (auto *U : N1->uses()) {
17060 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17061 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17062 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17063 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17064 U->getFlags().hasAllowReassociation() &&
17065 U->getFlags().hasNoSignedZeros())
17066 continue;
17067
17068 // This division is eligible for optimization only if global unsafe math
17069 // is enabled or if this division allows reciprocal formation.
17070 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17071 Users.insert(U);
17072 }
17073 }
17074
17075 // Now that we have the actual number of divisor uses, make sure it meets
17076 // the minimum threshold specified by the target.
17077 if ((Users.size() * NumElts) < MinUses)
17078 return SDValue();
17079
17080 SDLoc DL(N);
17081 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17082 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17083
17084 // Dividend / Divisor -> Dividend * Reciprocal
17085 for (auto *U : Users) {
17086 SDValue Dividend = U->getOperand(0);
17087 if (Dividend != FPOne) {
17088 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17089 Reciprocal, Flags);
17090 CombineTo(U, NewNode);
17091 } else if (U != Reciprocal.getNode()) {
17092 // In the absence of fast-math-flags, this user node is always the
17093 // same node as Reciprocal, but with FMF they may be different nodes.
17094 CombineTo(U, Reciprocal);
17095 }
17096 }
17097 return SDValue(N, 0); // N was replaced.
17098}
17099
17100SDValue DAGCombiner::visitFDIV(SDNode *N) {
17101 SDValue N0 = N->getOperand(0);
17102 SDValue N1 = N->getOperand(1);
17103 EVT VT = N->getValueType(0);
17104 SDLoc DL(N);
17105 const TargetOptions &Options = DAG.getTarget().Options;
17106 SDNodeFlags Flags = N->getFlags();
17107 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17108
17109 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17110 return R;
17111
17112 // fold (fdiv c1, c2) -> c1/c2
17113 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17114 return C;
17115
17116 // fold vector ops
17117 if (VT.isVector())
17118 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17119 return FoldedVOp;
17120
17121 if (SDValue NewSel = foldBinOpIntoSelect(N))
17122 return NewSel;
17123
17125 return V;
17126
17127 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17128 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17129 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17130 // Compute the reciprocal 1.0 / c2.
17131 const APFloat &N1APF = N1CFP->getValueAPF();
17132 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17133 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17134 // Only do the transform if the reciprocal is a legal fp immediate that
17135 // isn't too nasty (eg NaN, denormal, ...).
17136 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17137 (!LegalOperations ||
17138 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17139 // backend)... we should handle this gracefully after Legalize.
17140 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17142 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17143 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17144 DAG.getConstantFP(Recip, DL, VT));
17145 }
17146
17147 // If this FDIV is part of a reciprocal square root, it may be folded
17148 // into a target-specific square root estimate instruction.
17149 if (N1.getOpcode() == ISD::FSQRT) {
17150 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17151 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17152 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17153 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17154 if (SDValue RV =
17155 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17156 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17157 AddToWorklist(RV.getNode());
17158 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17159 }
17160 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17161 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17162 if (SDValue RV =
17163 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17164 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17165 AddToWorklist(RV.getNode());
17166 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17167 }
17168 } else if (N1.getOpcode() == ISD::FMUL) {
17169 // Look through an FMUL. Even though this won't remove the FDIV directly,
17170 // it's still worthwhile to get rid of the FSQRT if possible.
17171 SDValue Sqrt, Y;
17172 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17173 Sqrt = N1.getOperand(0);
17174 Y = N1.getOperand(1);
17175 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17176 Sqrt = N1.getOperand(1);
17177 Y = N1.getOperand(0);
17178 }
17179 if (Sqrt.getNode()) {
17180 // If the other multiply operand is known positive, pull it into the
17181 // sqrt. That will eliminate the division if we convert to an estimate.
17182 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17183 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17184 SDValue A;
17185 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17186 A = Y.getOperand(0);
17187 else if (Y == Sqrt.getOperand(0))
17188 A = Y;
17189 if (A) {
17190 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17191 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17192 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17193 SDValue AAZ =
17194 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17195 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17196 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17197
17198 // Estimate creation failed. Clean up speculatively created nodes.
17199 recursivelyDeleteUnusedNodes(AAZ.getNode());
17200 }
17201 }
17202
17203 // We found a FSQRT, so try to make this fold:
17204 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17205 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17206 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17207 AddToWorklist(Div.getNode());
17208 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17209 }
17210 }
17211 }
17212
17213 // Fold into a reciprocal estimate and multiply instead of a real divide.
17214 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17215 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17216 return RV;
17217 }
17218
17219 // Fold X/Sqrt(X) -> Sqrt(X)
17220 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17221 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17222 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17223 return N1;
17224
17225 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17230 SDValue NegN0 =
17231 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17232 if (NegN0) {
17233 HandleSDNode NegN0Handle(NegN0);
17234 SDValue NegN1 =
17235 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17236 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17238 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17239 }
17240
17241 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17242 return R;
17243
17244 return SDValue();
17245}
17246
17247SDValue DAGCombiner::visitFREM(SDNode *N) {
17248 SDValue N0 = N->getOperand(0);
17249 SDValue N1 = N->getOperand(1);
17250 EVT VT = N->getValueType(0);
17251 SDNodeFlags Flags = N->getFlags();
17252 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17253
17254 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17255 return R;
17256
17257 // fold (frem c1, c2) -> fmod(c1,c2)
17258 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
17259 return C;
17260
17261 if (SDValue NewSel = foldBinOpIntoSelect(N))
17262 return NewSel;
17263
17264 return SDValue();
17265}
17266
17267SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17268 SDNodeFlags Flags = N->getFlags();
17269 const TargetOptions &Options = DAG.getTarget().Options;
17270
17271 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17272 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17273 if (!Flags.hasApproximateFuncs() ||
17274 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17275 return SDValue();
17276
17277 SDValue N0 = N->getOperand(0);
17278 if (TLI.isFsqrtCheap(N0, DAG))
17279 return SDValue();
17280
17281 // FSQRT nodes have flags that propagate to the created nodes.
17282 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17283 // transform the fdiv, we may produce a sub-optimal estimate sequence
17284 // because the reciprocal calculation may not have to filter out a
17285 // 0.0 input.
17286 return buildSqrtEstimate(N0, Flags);
17287}
17288
17289/// copysign(x, fp_extend(y)) -> copysign(x, y)
17290/// copysign(x, fp_round(y)) -> copysign(x, y)
17291/// Operands to the functions are the type of X and Y respectively.
17292static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17293 // Always fold no-op FP casts.
17294 if (XTy == YTy)
17295 return true;
17296
17297 // Do not optimize out type conversion of f128 type yet.
17298 // For some targets like x86_64, configuration is changed to keep one f128
17299 // value in one SSE register, but instruction selection cannot handle
17300 // FCOPYSIGN on SSE registers yet.
17301 if (YTy == MVT::f128)
17302 return false;
17303
17305}
17306
17308 SDValue N1 = N->getOperand(1);
17309 if (N1.getOpcode() != ISD::FP_EXTEND &&
17310 N1.getOpcode() != ISD::FP_ROUND)
17311 return false;
17312 EVT N1VT = N1->getValueType(0);
17313 EVT N1Op0VT = N1->getOperand(0).getValueType();
17314 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17315}
17316
17317SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17318 SDValue N0 = N->getOperand(0);
17319 SDValue N1 = N->getOperand(1);
17320 EVT VT = N->getValueType(0);
17321
17322 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17323 if (SDValue C =
17324 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17325 return C;
17326
17327 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17328 const APFloat &V = N1C->getValueAPF();
17329 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17330 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17331 if (!V.isNegative()) {
17332 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17333 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17334 } else {
17335 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17336 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17337 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17338 }
17339 }
17340
17341 // copysign(fabs(x), y) -> copysign(x, y)
17342 // copysign(fneg(x), y) -> copysign(x, y)
17343 // copysign(copysign(x,z), y) -> copysign(x, y)
17344 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17345 N0.getOpcode() == ISD::FCOPYSIGN)
17346 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17347
17348 // copysign(x, abs(y)) -> abs(x)
17349 if (N1.getOpcode() == ISD::FABS)
17350 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17351
17352 // copysign(x, copysign(y,z)) -> copysign(x, z)
17353 if (N1.getOpcode() == ISD::FCOPYSIGN)
17354 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17355
17356 // copysign(x, fp_extend(y)) -> copysign(x, y)
17357 // copysign(x, fp_round(y)) -> copysign(x, y)
17359 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17360
17361 return SDValue();
17362}
17363
17364SDValue DAGCombiner::visitFPOW(SDNode *N) {
17365 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17366 if (!ExponentC)
17367 return SDValue();
17368 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17369
17370 // Try to convert x ** (1/3) into cube root.
17371 // TODO: Handle the various flavors of long double.
17372 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17373 // Some range near 1/3 should be fine.
17374 EVT VT = N->getValueType(0);
17375 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17376 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17377 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17378 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17379 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17380 // For regular numbers, rounding may cause the results to differ.
17381 // Therefore, we require { nsz ninf nnan afn } for this transform.
17382 // TODO: We could select out the special cases if we don't have nsz/ninf.
17383 SDNodeFlags Flags = N->getFlags();
17384 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17385 !Flags.hasApproximateFuncs())
17386 return SDValue();
17387
17388 // Do not create a cbrt() libcall if the target does not have it, and do not
17389 // turn a pow that has lowering support into a cbrt() libcall.
17390 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17393 return SDValue();
17394
17395 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17396 }
17397
17398 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17399 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17400 // TODO: This could be extended (using a target hook) to handle smaller
17401 // power-of-2 fractional exponents.
17402 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17403 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17404 if (ExponentIs025 || ExponentIs075) {
17405 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17406 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17407 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17408 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17409 // For regular numbers, rounding may cause the results to differ.
17410 // Therefore, we require { nsz ninf afn } for this transform.
17411 // TODO: We could select out the special cases if we don't have nsz/ninf.
17412 SDNodeFlags Flags = N->getFlags();
17413
17414 // We only need no signed zeros for the 0.25 case.
17415 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17416 !Flags.hasApproximateFuncs())
17417 return SDValue();
17418
17419 // Don't double the number of libcalls. We are trying to inline fast code.
17421 return SDValue();
17422
17423 // Assume that libcalls are the smallest code.
17424 // TODO: This restriction should probably be lifted for vectors.
17425 if (ForCodeSize)
17426 return SDValue();
17427
17428 // pow(X, 0.25) --> sqrt(sqrt(X))
17429 SDLoc DL(N);
17430 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17431 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17432 if (ExponentIs025)
17433 return SqrtSqrt;
17434 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17435 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17436 }
17437
17438 return SDValue();
17439}
17440
17442 const TargetLowering &TLI) {
17443 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17444 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17445 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17446 // conversions would return +0.0.
17447 // FIXME: We should be able to use node-level FMF here.
17448 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17449 EVT VT = N->getValueType(0);
17450 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17452 return SDValue();
17453
17454 // fptosi/fptoui round towards zero, so converting from FP to integer and
17455 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17456 SDValue N0 = N->getOperand(0);
17457 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17458 N0.getOperand(0).getValueType() == VT)
17459 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17460
17461 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17462 N0.getOperand(0).getValueType() == VT)
17463 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17464
17465 return SDValue();
17466}
17467
17468SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17469 SDValue N0 = N->getOperand(0);
17470 EVT VT = N->getValueType(0);
17471 EVT OpVT = N0.getValueType();
17472
17473 // [us]itofp(undef) = 0, because the result value is bounded.
17474 if (N0.isUndef())
17475 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17476
17477 // fold (sint_to_fp c1) -> c1fp
17479 // ...but only if the target supports immediate floating-point values
17480 (!LegalOperations ||
17482 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17483
17484 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17485 // but UINT_TO_FP is legal on this target, try to convert.
17486 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17487 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17488 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17489 if (DAG.SignBitIsZero(N0))
17490 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17491 }
17492
17493 // The next optimizations are desirable only if SELECT_CC can be lowered.
17494 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17495 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17496 !VT.isVector() &&
17497 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17498 SDLoc DL(N);
17499 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17500 DAG.getConstantFP(0.0, DL, VT));
17501 }
17502
17503 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17504 // (select (setcc x, y, cc), 1.0, 0.0)
17505 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17506 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17507 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17508 SDLoc DL(N);
17509 return DAG.getSelect(DL, VT, N0.getOperand(0),
17510 DAG.getConstantFP(1.0, DL, VT),
17511 DAG.getConstantFP(0.0, DL, VT));
17512 }
17513
17514 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17515 return FTrunc;
17516
17517 return SDValue();
17518}
17519
17520SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17521 SDValue N0 = N->getOperand(0);
17522 EVT VT = N->getValueType(0);
17523 EVT OpVT = N0.getValueType();
17524
17525 // [us]itofp(undef) = 0, because the result value is bounded.
17526 if (N0.isUndef())
17527 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17528
17529 // fold (uint_to_fp c1) -> c1fp
17531 // ...but only if the target supports immediate floating-point values
17532 (!LegalOperations ||
17534 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17535
17536 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17537 // but SINT_TO_FP is legal on this target, try to convert.
17538 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17539 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17540 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17541 if (DAG.SignBitIsZero(N0))
17542 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17543 }
17544
17545 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17546 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17547 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17548 SDLoc DL(N);
17549 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17550 DAG.getConstantFP(0.0, DL, VT));
17551 }
17552
17553 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17554 return FTrunc;
17555
17556 return SDValue();
17557}
17558
17559// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17561 SDValue N0 = N->getOperand(0);
17562 EVT VT = N->getValueType(0);
17563
17564 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17565 return SDValue();
17566
17567 SDValue Src = N0.getOperand(0);
17568 EVT SrcVT = Src.getValueType();
17569 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17570 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17571
17572 // We can safely assume the conversion won't overflow the output range,
17573 // because (for example) (uint8_t)18293.f is undefined behavior.
17574
17575 // Since we can assume the conversion won't overflow, our decision as to
17576 // whether the input will fit in the float should depend on the minimum
17577 // of the input range and output range.
17578
17579 // This means this is also safe for a signed input and unsigned output, since
17580 // a negative input would lead to undefined behavior.
17581 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17582 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17583 unsigned ActualSize = std::min(InputSize, OutputSize);
17584 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17585
17586 // We can only fold away the float conversion if the input range can be
17587 // represented exactly in the float range.
17588 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17589 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17590 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17592 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17593 }
17594 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17595 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17596 return DAG.getBitcast(VT, Src);
17597 }
17598 return SDValue();
17599}
17600
17601SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17602 SDValue N0 = N->getOperand(0);
17603 EVT VT = N->getValueType(0);
17604
17605 // fold (fp_to_sint undef) -> undef
17606 if (N0.isUndef())
17607 return DAG.getUNDEF(VT);
17608
17609 // fold (fp_to_sint c1fp) -> c1
17611 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17612
17613 return FoldIntToFPToInt(N, DAG);
17614}
17615
17616SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17617 SDValue N0 = N->getOperand(0);
17618 EVT VT = N->getValueType(0);
17619
17620 // fold (fp_to_uint undef) -> undef
17621 if (N0.isUndef())
17622 return DAG.getUNDEF(VT);
17623
17624 // fold (fp_to_uint c1fp) -> c1
17626 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17627
17628 return FoldIntToFPToInt(N, DAG);
17629}
17630
17631SDValue DAGCombiner::visitXRINT(SDNode *N) {
17632 SDValue N0 = N->getOperand(0);
17633 EVT VT = N->getValueType(0);
17634
17635 // fold (lrint|llrint undef) -> undef
17636 if (N0.isUndef())
17637 return DAG.getUNDEF(VT);
17638
17639 // fold (lrint|llrint c1fp) -> c1
17641 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17642
17643 return SDValue();
17644}
17645
17646SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17647 SDValue N0 = N->getOperand(0);
17648 SDValue N1 = N->getOperand(1);
17649 EVT VT = N->getValueType(0);
17650
17651 // fold (fp_round c1fp) -> c1fp
17652 if (SDValue C =
17653 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17654 return C;
17655
17656 // fold (fp_round (fp_extend x)) -> x
17657 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17658 return N0.getOperand(0);
17659
17660 // fold (fp_round (fp_round x)) -> (fp_round x)
17661 if (N0.getOpcode() == ISD::FP_ROUND) {
17662 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17663 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17664
17665 // Avoid folding legal fp_rounds into non-legal ones.
17666 if (!hasOperation(ISD::FP_ROUND, VT))
17667 return SDValue();
17668
17669 // Skip this folding if it results in an fp_round from f80 to f16.
17670 //
17671 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17672 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17673 // instructions from f32 or f64. Moreover, the first (value-preserving)
17674 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17675 // x86.
17676 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17677 return SDValue();
17678
17679 // If the first fp_round isn't a value preserving truncation, it might
17680 // introduce a tie in the second fp_round, that wouldn't occur in the
17681 // single-step fp_round we want to fold to.
17682 // In other words, double rounding isn't the same as rounding.
17683 // Also, this is a value preserving truncation iff both fp_round's are.
17684 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17685 SDLoc DL(N);
17686 return DAG.getNode(
17687 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17688 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17689 }
17690 }
17691
17692 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17693 // Note: From a legality perspective, this is a two step transform. First,
17694 // we duplicate the fp_round to the arguments of the copysign, then we
17695 // eliminate the fp_round on Y. The second step requires an additional
17696 // predicate to match the implementation above.
17697 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17699 N0.getValueType())) {
17700 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17701 N0.getOperand(0), N1);
17702 AddToWorklist(Tmp.getNode());
17703 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17704 Tmp, N0.getOperand(1));
17705 }
17706
17707 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17708 return NewVSel;
17709
17710 return SDValue();
17711}
17712
17713SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17714 SDValue N0 = N->getOperand(0);
17715 EVT VT = N->getValueType(0);
17716
17717 if (VT.isVector())
17718 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17719 return FoldedVOp;
17720
17721 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17722 if (N->hasOneUse() &&
17723 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17724 return SDValue();
17725
17726 // fold (fp_extend c1fp) -> c1fp
17728 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17729
17730 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17731 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17733 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17734
17735 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17736 // value of X.
17737 if (N0.getOpcode() == ISD::FP_ROUND
17738 && N0.getConstantOperandVal(1) == 1) {
17739 SDValue In = N0.getOperand(0);
17740 if (In.getValueType() == VT) return In;
17741 if (VT.bitsLT(In.getValueType()))
17742 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17743 In, N0.getOperand(1));
17744 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17745 }
17746
17747 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17748 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17750 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17751 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17752 LN0->getChain(),
17753 LN0->getBasePtr(), N0.getValueType(),
17754 LN0->getMemOperand());
17755 CombineTo(N, ExtLoad);
17756 CombineTo(
17757 N0.getNode(),
17758 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17759 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17760 ExtLoad.getValue(1));
17761 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17762 }
17763
17764 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17765 return NewVSel;
17766
17767 return SDValue();
17768}
17769
17770SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17771 SDValue N0 = N->getOperand(0);
17772 EVT VT = N->getValueType(0);
17773
17774 // fold (fceil c1) -> fceil(c1)
17776 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17777
17778 return SDValue();
17779}
17780
17781SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17782 SDValue N0 = N->getOperand(0);
17783 EVT VT = N->getValueType(0);
17784
17785 // fold (ftrunc c1) -> ftrunc(c1)
17787 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17788
17789 // fold ftrunc (known rounded int x) -> x
17790 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17791 // likely to be generated to extract integer from a rounded floating value.
17792 switch (N0.getOpcode()) {
17793 default: break;
17794 case ISD::FRINT:
17795 case ISD::FTRUNC:
17796 case ISD::FNEARBYINT:
17797 case ISD::FROUNDEVEN:
17798 case ISD::FFLOOR:
17799 case ISD::FCEIL:
17800 return N0;
17801 }
17802
17803 return SDValue();
17804}
17805
17806SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17807 SDValue N0 = N->getOperand(0);
17808
17809 // fold (ffrexp c1) -> ffrexp(c1)
17811 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17812 return SDValue();
17813}
17814
17815SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17816 SDValue N0 = N->getOperand(0);
17817 EVT VT = N->getValueType(0);
17818
17819 // fold (ffloor c1) -> ffloor(c1)
17821 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17822
17823 return SDValue();
17824}
17825
17826SDValue DAGCombiner::visitFNEG(SDNode *N) {
17827 SDValue N0 = N->getOperand(0);
17828 EVT VT = N->getValueType(0);
17829 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17830
17831 // Constant fold FNEG.
17833 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17834
17835 if (SDValue NegN0 =
17836 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17837 return NegN0;
17838
17839 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17840 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17841 // know it was called from a context with a nsz flag if the input fsub does
17842 // not.
17843 if (N0.getOpcode() == ISD::FSUB &&
17845 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17846 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17847 N0.getOperand(0));
17848 }
17849
17850 if (SDValue Cast = foldSignChangeInBitcast(N))
17851 return Cast;
17852
17853 return SDValue();
17854}
17855
17856SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17857 SDValue N0 = N->getOperand(0);
17858 SDValue N1 = N->getOperand(1);
17859 EVT VT = N->getValueType(0);
17860 const SDNodeFlags Flags = N->getFlags();
17861 unsigned Opc = N->getOpcode();
17862 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17863 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17864 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17865
17866 // Constant fold.
17867 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
17868 return C;
17869
17870 // Canonicalize to constant on RHS.
17873 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
17874
17875 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
17876 const APFloat &AF = N1CFP->getValueAPF();
17877
17878 // minnum(X, nan) -> X
17879 // maxnum(X, nan) -> X
17880 // minimum(X, nan) -> nan
17881 // maximum(X, nan) -> nan
17882 if (AF.isNaN())
17883 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
17884
17885 // In the following folds, inf can be replaced with the largest finite
17886 // float, if the ninf flag is set.
17887 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
17888 // minnum(X, -inf) -> -inf
17889 // maxnum(X, +inf) -> +inf
17890 // minimum(X, -inf) -> -inf if nnan
17891 // maximum(X, +inf) -> +inf if nnan
17892 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
17893 return N->getOperand(1);
17894
17895 // minnum(X, +inf) -> X if nnan
17896 // maxnum(X, -inf) -> X if nnan
17897 // minimum(X, +inf) -> X
17898 // maximum(X, -inf) -> X
17899 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
17900 return N->getOperand(0);
17901 }
17902 }
17903
17904 if (SDValue SD = reassociateReduction(
17905 PropagatesNaN
17908 Opc, SDLoc(N), VT, N0, N1, Flags))
17909 return SD;
17910
17911 return SDValue();
17912}
17913
17914SDValue DAGCombiner::visitFABS(SDNode *N) {
17915 SDValue N0 = N->getOperand(0);
17916 EVT VT = N->getValueType(0);
17917
17918 // fold (fabs c1) -> fabs(c1)
17920 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17921
17922 // fold (fabs (fabs x)) -> (fabs x)
17923 if (N0.getOpcode() == ISD::FABS)
17924 return N->getOperand(0);
17925
17926 // fold (fabs (fneg x)) -> (fabs x)
17927 // fold (fabs (fcopysign x, y)) -> (fabs x)
17928 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
17929 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
17930
17931 if (SDValue Cast = foldSignChangeInBitcast(N))
17932 return Cast;
17933
17934 return SDValue();
17935}
17936
17937SDValue DAGCombiner::visitBRCOND(SDNode *N) {
17938 SDValue Chain = N->getOperand(0);
17939 SDValue N1 = N->getOperand(1);
17940 SDValue N2 = N->getOperand(2);
17941
17942 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
17943 // nondeterministic jumps).
17944 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
17945 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17946 N1->getOperand(0), N2);
17947 }
17948
17949 // Variant of the previous fold where there is a SETCC in between:
17950 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
17951 // =>
17952 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
17953 // =>
17954 // BRCOND(SETCC(X, CONST, Cond))
17955 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
17956 // isn't equivalent to true or false.
17957 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
17958 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
17959 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
17960 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
17961 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
17962 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
17963 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
17964 bool Updated = false;
17965
17966 // Is 'X Cond C' always true or false?
17967 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
17968 bool False = (Cond == ISD::SETULT && C->isZero()) ||
17969 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
17970 (Cond == ISD::SETUGT && C->isAllOnes()) ||
17971 (Cond == ISD::SETGT && C->isMaxSignedValue());
17972 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
17973 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
17974 (Cond == ISD::SETUGE && C->isZero()) ||
17975 (Cond == ISD::SETGE && C->isMinSignedValue());
17976 return True || False;
17977 };
17978
17979 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
17980 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
17981 S0 = S0->getOperand(0);
17982 Updated = true;
17983 }
17984 }
17985 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
17986 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
17987 S1 = S1->getOperand(0);
17988 Updated = true;
17989 }
17990 }
17991
17992 if (Updated)
17993 return DAG.getNode(
17994 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17995 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
17996 }
17997
17998 // If N is a constant we could fold this into a fallthrough or unconditional
17999 // branch. However that doesn't happen very often in normal code, because
18000 // Instcombine/SimplifyCFG should have handled the available opportunities.
18001 // If we did this folding here, it would be necessary to update the
18002 // MachineBasicBlock CFG, which is awkward.
18003
18004 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18005 // on the target.
18006 if (N1.getOpcode() == ISD::SETCC &&
18008 N1.getOperand(0).getValueType())) {
18009 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18010 Chain, N1.getOperand(2),
18011 N1.getOperand(0), N1.getOperand(1), N2);
18012 }
18013
18014 if (N1.hasOneUse()) {
18015 // rebuildSetCC calls visitXor which may change the Chain when there is a
18016 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18017 HandleSDNode ChainHandle(Chain);
18018 if (SDValue NewN1 = rebuildSetCC(N1))
18019 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18020 ChainHandle.getValue(), NewN1, N2);
18021 }
18022
18023 return SDValue();
18024}
18025
18026SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18027 if (N.getOpcode() == ISD::SRL ||
18028 (N.getOpcode() == ISD::TRUNCATE &&
18029 (N.getOperand(0).hasOneUse() &&
18030 N.getOperand(0).getOpcode() == ISD::SRL))) {
18031 // Look pass the truncate.
18032 if (N.getOpcode() == ISD::TRUNCATE)
18033 N = N.getOperand(0);
18034
18035 // Match this pattern so that we can generate simpler code:
18036 //
18037 // %a = ...
18038 // %b = and i32 %a, 2
18039 // %c = srl i32 %b, 1
18040 // brcond i32 %c ...
18041 //
18042 // into
18043 //
18044 // %a = ...
18045 // %b = and i32 %a, 2
18046 // %c = setcc eq %b, 0
18047 // brcond %c ...
18048 //
18049 // This applies only when the AND constant value has one bit set and the
18050 // SRL constant is equal to the log2 of the AND constant. The back-end is
18051 // smart enough to convert the result into a TEST/JMP sequence.
18052 SDValue Op0 = N.getOperand(0);
18053 SDValue Op1 = N.getOperand(1);
18054
18055 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18056 SDValue AndOp1 = Op0.getOperand(1);
18057
18058 if (AndOp1.getOpcode() == ISD::Constant) {
18059 const APInt &AndConst = AndOp1->getAsAPIntVal();
18060
18061 if (AndConst.isPowerOf2() &&
18062 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18063 SDLoc DL(N);
18064 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18065 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18066 ISD::SETNE);
18067 }
18068 }
18069 }
18070 }
18071
18072 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18073 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18074 if (N.getOpcode() == ISD::XOR) {
18075 // Because we may call this on a speculatively constructed
18076 // SimplifiedSetCC Node, we need to simplify this node first.
18077 // Ideally this should be folded into SimplifySetCC and not
18078 // here. For now, grab a handle to N so we don't lose it from
18079 // replacements interal to the visit.
18080 HandleSDNode XORHandle(N);
18081 while (N.getOpcode() == ISD::XOR) {
18082 SDValue Tmp = visitXOR(N.getNode());
18083 // No simplification done.
18084 if (!Tmp.getNode())
18085 break;
18086 // Returning N is form in-visit replacement that may invalidated
18087 // N. Grab value from Handle.
18088 if (Tmp.getNode() == N.getNode())
18089 N = XORHandle.getValue();
18090 else // Node simplified. Try simplifying again.
18091 N = Tmp;
18092 }
18093
18094 if (N.getOpcode() != ISD::XOR)
18095 return N;
18096
18097 SDValue Op0 = N->getOperand(0);
18098 SDValue Op1 = N->getOperand(1);
18099
18100 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18101 bool Equal = false;
18102 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18103 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18104 Op0.getValueType() == MVT::i1) {
18105 N = Op0;
18106 Op0 = N->getOperand(0);
18107 Op1 = N->getOperand(1);
18108 Equal = true;
18109 }
18110
18111 EVT SetCCVT = N.getValueType();
18112 if (LegalTypes)
18113 SetCCVT = getSetCCResultType(SetCCVT);
18114 // Replace the uses of XOR with SETCC
18115 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18116 Equal ? ISD::SETEQ : ISD::SETNE);
18117 }
18118 }
18119
18120 return SDValue();
18121}
18122
18123// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18124//
18125SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18126 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18127 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18128
18129 // If N is a constant we could fold this into a fallthrough or unconditional
18130 // branch. However that doesn't happen very often in normal code, because
18131 // Instcombine/SimplifyCFG should have handled the available opportunities.
18132 // If we did this folding here, it would be necessary to update the
18133 // MachineBasicBlock CFG, which is awkward.
18134
18135 // Use SimplifySetCC to simplify SETCC's.
18137 CondLHS, CondRHS, CC->get(), SDLoc(N),
18138 false);
18139 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18140
18141 // fold to a simpler setcc
18142 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18143 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18144 N->getOperand(0), Simp.getOperand(2),
18145 Simp.getOperand(0), Simp.getOperand(1),
18146 N->getOperand(4));
18147
18148 return SDValue();
18149}
18150
18151static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18152 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18153 const TargetLowering &TLI) {
18154 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18155 if (LD->isIndexed())
18156 return false;
18157 EVT VT = LD->getMemoryVT();
18158 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18159 return false;
18160 Ptr = LD->getBasePtr();
18161 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18162 if (ST->isIndexed())
18163 return false;
18164 EVT VT = ST->getMemoryVT();
18165 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18166 return false;
18167 Ptr = ST->getBasePtr();
18168 IsLoad = false;
18169 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18170 if (LD->isIndexed())
18171 return false;
18172 EVT VT = LD->getMemoryVT();
18173 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18174 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18175 return false;
18176 Ptr = LD->getBasePtr();
18177 IsMasked = true;
18178 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18179 if (ST->isIndexed())
18180 return false;
18181 EVT VT = ST->getMemoryVT();
18182 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18183 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18184 return false;
18185 Ptr = ST->getBasePtr();
18186 IsLoad = false;
18187 IsMasked = true;
18188 } else {
18189 return false;
18190 }
18191 return true;
18192}
18193
18194/// Try turning a load/store into a pre-indexed load/store when the base
18195/// pointer is an add or subtract and it has other uses besides the load/store.
18196/// After the transformation, the new indexed load/store has effectively folded
18197/// the add/subtract in and all of its other uses are redirected to the
18198/// new load/store.
18199bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18200 if (Level < AfterLegalizeDAG)
18201 return false;
18202
18203 bool IsLoad = true;
18204 bool IsMasked = false;
18205 SDValue Ptr;
18206 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18207 Ptr, TLI))
18208 return false;
18209
18210 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18211 // out. There is no reason to make this a preinc/predec.
18212 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18213 Ptr->hasOneUse())
18214 return false;
18215
18216 // Ask the target to do addressing mode selection.
18220 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18221 return false;
18222
18223 // Backends without true r+i pre-indexed forms may need to pass a
18224 // constant base with a variable offset so that constant coercion
18225 // will work with the patterns in canonical form.
18226 bool Swapped = false;
18227 if (isa<ConstantSDNode>(BasePtr)) {
18228 std::swap(BasePtr, Offset);
18229 Swapped = true;
18230 }
18231
18232 // Don't create a indexed load / store with zero offset.
18234 return false;
18235
18236 // Try turning it into a pre-indexed load / store except when:
18237 // 1) The new base ptr is a frame index.
18238 // 2) If N is a store and the new base ptr is either the same as or is a
18239 // predecessor of the value being stored.
18240 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18241 // that would create a cycle.
18242 // 4) All uses are load / store ops that use it as old base ptr.
18243
18244 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18245 // (plus the implicit offset) to a register to preinc anyway.
18246 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18247 return false;
18248
18249 // Check #2.
18250 if (!IsLoad) {
18251 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18252 : cast<StoreSDNode>(N)->getValue();
18253
18254 // Would require a copy.
18255 if (Val == BasePtr)
18256 return false;
18257
18258 // Would create a cycle.
18259 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18260 return false;
18261 }
18262
18263 // Caches for hasPredecessorHelper.
18266 Worklist.push_back(N);
18267
18268 // If the offset is a constant, there may be other adds of constants that
18269 // can be folded with this one. We should do this to avoid having to keep
18270 // a copy of the original base pointer.
18271 SmallVector<SDNode *, 16> OtherUses;
18272 constexpr unsigned int MaxSteps = 8192;
18273 if (isa<ConstantSDNode>(Offset))
18274 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18275 UE = BasePtr->use_end();
18276 UI != UE; ++UI) {
18277 SDUse &Use = UI.getUse();
18278 // Skip the use that is Ptr and uses of other results from BasePtr's
18279 // node (important for nodes that return multiple results).
18280 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18281 continue;
18282
18283 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18284 MaxSteps))
18285 continue;
18286
18287 if (Use.getUser()->getOpcode() != ISD::ADD &&
18288 Use.getUser()->getOpcode() != ISD::SUB) {
18289 OtherUses.clear();
18290 break;
18291 }
18292
18293 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18294 if (!isa<ConstantSDNode>(Op1)) {
18295 OtherUses.clear();
18296 break;
18297 }
18298
18299 // FIXME: In some cases, we can be smarter about this.
18300 if (Op1.getValueType() != Offset.getValueType()) {
18301 OtherUses.clear();
18302 break;
18303 }
18304
18305 OtherUses.push_back(Use.getUser());
18306 }
18307
18308 if (Swapped)
18309 std::swap(BasePtr, Offset);
18310
18311 // Now check for #3 and #4.
18312 bool RealUse = false;
18313
18314 for (SDNode *Use : Ptr->uses()) {
18315 if (Use == N)
18316 continue;
18317 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18318 return false;
18319
18320 // If Ptr may be folded in addressing mode of other use, then it's
18321 // not profitable to do this transformation.
18322 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18323 RealUse = true;
18324 }
18325
18326 if (!RealUse)
18327 return false;
18328
18330 if (!IsMasked) {
18331 if (IsLoad)
18332 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18333 else
18334 Result =
18335 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18336 } else {
18337 if (IsLoad)
18338 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18339 Offset, AM);
18340 else
18341 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18342 Offset, AM);
18343 }
18344 ++PreIndexedNodes;
18345 ++NodesCombined;
18346 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18347 Result.dump(&DAG); dbgs() << '\n');
18348 WorklistRemover DeadNodes(*this);
18349 if (IsLoad) {
18350 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18351 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18352 } else {
18353 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18354 }
18355
18356 // Finally, since the node is now dead, remove it from the graph.
18357 deleteAndRecombine(N);
18358
18359 if (Swapped)
18360 std::swap(BasePtr, Offset);
18361
18362 // Replace other uses of BasePtr that can be updated to use Ptr
18363 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18364 unsigned OffsetIdx = 1;
18365 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18366 OffsetIdx = 0;
18367 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18368 BasePtr.getNode() && "Expected BasePtr operand");
18369
18370 // We need to replace ptr0 in the following expression:
18371 // x0 * offset0 + y0 * ptr0 = t0
18372 // knowing that
18373 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18374 //
18375 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18376 // indexed load/store and the expression that needs to be re-written.
18377 //
18378 // Therefore, we have:
18379 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18380
18381 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18382 const APInt &Offset0 = CN->getAPIntValue();
18383 const APInt &Offset1 = Offset->getAsAPIntVal();
18384 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18385 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18386 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18387 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18388
18389 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18390
18391 APInt CNV = Offset0;
18392 if (X0 < 0) CNV = -CNV;
18393 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18394 else CNV = CNV - Offset1;
18395
18396 SDLoc DL(OtherUses[i]);
18397
18398 // We can now generate the new expression.
18399 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18400 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18401
18402 SDValue NewUse = DAG.getNode(Opcode,
18403 DL,
18404 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18405 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18406 deleteAndRecombine(OtherUses[i]);
18407 }
18408
18409 // Replace the uses of Ptr with uses of the updated base value.
18410 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18411 deleteAndRecombine(Ptr.getNode());
18412 AddToWorklist(Result.getNode());
18413
18414 return true;
18415}
18416
18418 SDValue &BasePtr, SDValue &Offset,
18420 SelectionDAG &DAG,
18421 const TargetLowering &TLI) {
18422 if (PtrUse == N ||
18423 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18424 return false;
18425
18426 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18427 return false;
18428
18429 // Don't create a indexed load / store with zero offset.
18431 return false;
18432
18433 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18434 return false;
18435
18437 for (SDNode *Use : BasePtr->uses()) {
18438 if (Use == Ptr.getNode())
18439 continue;
18440
18441 // No if there's a later user which could perform the index instead.
18442 if (isa<MemSDNode>(Use)) {
18443 bool IsLoad = true;
18444 bool IsMasked = false;
18445 SDValue OtherPtr;
18447 IsMasked, OtherPtr, TLI)) {
18449 Worklist.push_back(Use);
18450 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18451 return false;
18452 }
18453 }
18454
18455 // If all the uses are load / store addresses, then don't do the
18456 // transformation.
18457 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18458 for (SDNode *UseUse : Use->uses())
18459 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18460 return false;
18461 }
18462 }
18463 return true;
18464}
18465
18467 bool &IsMasked, SDValue &Ptr,
18468 SDValue &BasePtr, SDValue &Offset,
18470 SelectionDAG &DAG,
18471 const TargetLowering &TLI) {
18473 IsMasked, Ptr, TLI) ||
18474 Ptr->hasOneUse())
18475 return nullptr;
18476
18477 // Try turning it into a post-indexed load / store except when
18478 // 1) All uses are load / store ops that use it as base ptr (and
18479 // it may be folded as addressing mmode).
18480 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18481 // nor a successor of N. Otherwise, if Op is folded that would
18482 // create a cycle.
18483 for (SDNode *Op : Ptr->uses()) {
18484 // Check for #1.
18485 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18486 continue;
18487
18488 // Check for #2.
18491 constexpr unsigned int MaxSteps = 8192;
18492 // Ptr is predecessor to both N and Op.
18493 Visited.insert(Ptr.getNode());
18494 Worklist.push_back(N);
18495 Worklist.push_back(Op);
18496 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18497 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18498 return Op;
18499 }
18500 return nullptr;
18501}
18502
18503/// Try to combine a load/store with a add/sub of the base pointer node into a
18504/// post-indexed load/store. The transformation folded the add/subtract into the
18505/// new indexed load/store effectively and all of its uses are redirected to the
18506/// new load/store.
18507bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18508 if (Level < AfterLegalizeDAG)
18509 return false;
18510
18511 bool IsLoad = true;
18512 bool IsMasked = false;
18513 SDValue Ptr;
18517 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18518 Offset, AM, DAG, TLI);
18519 if (!Op)
18520 return false;
18521
18523 if (!IsMasked)
18524 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18525 Offset, AM)
18526 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18527 BasePtr, Offset, AM);
18528 else
18529 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18530 BasePtr, Offset, AM)
18532 BasePtr, Offset, AM);
18533 ++PostIndexedNodes;
18534 ++NodesCombined;
18535 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18536 Result.dump(&DAG); dbgs() << '\n');
18537 WorklistRemover DeadNodes(*this);
18538 if (IsLoad) {
18539 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18540 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18541 } else {
18542 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18543 }
18544
18545 // Finally, since the node is now dead, remove it from the graph.
18546 deleteAndRecombine(N);
18547
18548 // Replace the uses of Use with uses of the updated base value.
18550 Result.getValue(IsLoad ? 1 : 0));
18551 deleteAndRecombine(Op);
18552 return true;
18553}
18554
18555/// Return the base-pointer arithmetic from an indexed \p LD.
18556SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18557 ISD::MemIndexedMode AM = LD->getAddressingMode();
18558 assert(AM != ISD::UNINDEXED);
18559 SDValue BP = LD->getOperand(1);
18560 SDValue Inc = LD->getOperand(2);
18561
18562 // Some backends use TargetConstants for load offsets, but don't expect
18563 // TargetConstants in general ADD nodes. We can convert these constants into
18564 // regular Constants (if the constant is not opaque).
18566 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18567 "Cannot split out indexing using opaque target constants");
18568 if (Inc.getOpcode() == ISD::TargetConstant) {
18569 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18570 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18571 ConstInc->getValueType(0));
18572 }
18573
18574 unsigned Opc =
18575 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18576 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18577}
18578
18580 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18581}
18582
18583bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18584 EVT STType = Val.getValueType();
18585 EVT STMemType = ST->getMemoryVT();
18586 if (STType == STMemType)
18587 return true;
18588 if (isTypeLegal(STMemType))
18589 return false; // fail.
18590 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18591 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18592 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18593 return true;
18594 }
18595 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18596 STType.isInteger() && STMemType.isInteger()) {
18597 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18598 return true;
18599 }
18600 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18601 Val = DAG.getBitcast(STMemType, Val);
18602 return true;
18603 }
18604 return false; // fail.
18605}
18606
18607bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18608 EVT LDMemType = LD->getMemoryVT();
18609 EVT LDType = LD->getValueType(0);
18610 assert(Val.getValueType() == LDMemType &&
18611 "Attempting to extend value of non-matching type");
18612 if (LDType == LDMemType)
18613 return true;
18614 if (LDMemType.isInteger() && LDType.isInteger()) {
18615 switch (LD->getExtensionType()) {
18616 case ISD::NON_EXTLOAD:
18617 Val = DAG.getBitcast(LDType, Val);
18618 return true;
18619 case ISD::EXTLOAD:
18620 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18621 return true;
18622 case ISD::SEXTLOAD:
18623 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18624 return true;
18625 case ISD::ZEXTLOAD:
18626 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18627 return true;
18628 }
18629 }
18630 return false;
18631}
18632
18633StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18634 int64_t &Offset) {
18635 SDValue Chain = LD->getOperand(0);
18636
18637 // Look through CALLSEQ_START.
18638 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18639 Chain = Chain->getOperand(0);
18640
18641 StoreSDNode *ST = nullptr;
18643 if (Chain.getOpcode() == ISD::TokenFactor) {
18644 // Look for unique store within the TokenFactor.
18645 for (SDValue Op : Chain->ops()) {
18646 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18647 if (!Store)
18648 continue;
18649 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18650 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18651 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18652 continue;
18653 // Make sure the store is not aliased with any nodes in TokenFactor.
18654 GatherAllAliases(Store, Chain, Aliases);
18655 if (Aliases.empty() ||
18656 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18657 ST = Store;
18658 break;
18659 }
18660 } else {
18661 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18662 if (Store) {
18663 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18664 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18665 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18666 ST = Store;
18667 }
18668 }
18669
18670 return ST;
18671}
18672
18673SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18674 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18675 return SDValue();
18676 SDValue Chain = LD->getOperand(0);
18677 int64_t Offset;
18678
18679 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18680 // TODO: Relax this restriction for unordered atomics (see D66309)
18681 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18682 return SDValue();
18683
18684 EVT LDType = LD->getValueType(0);
18685 EVT LDMemType = LD->getMemoryVT();
18686 EVT STMemType = ST->getMemoryVT();
18687 EVT STType = ST->getValue().getValueType();
18688
18689 // There are two cases to consider here:
18690 // 1. The store is fixed width and the load is scalable. In this case we
18691 // don't know at compile time if the store completely envelops the load
18692 // so we abandon the optimisation.
18693 // 2. The store is scalable and the load is fixed width. We could
18694 // potentially support a limited number of cases here, but there has been
18695 // no cost-benefit analysis to prove it's worth it.
18696 bool LdStScalable = LDMemType.isScalableVT();
18697 if (LdStScalable != STMemType.isScalableVT())
18698 return SDValue();
18699
18700 // If we are dealing with scalable vectors on a big endian platform the
18701 // calculation of offsets below becomes trickier, since we do not know at
18702 // compile time the absolute size of the vector. Until we've done more
18703 // analysis on big-endian platforms it seems better to bail out for now.
18704 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18705 return SDValue();
18706
18707 // Normalize for Endianness. After this Offset=0 will denote that the least
18708 // significant bit in the loaded value maps to the least significant bit in
18709 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18710 // n:th least significant byte of the stored value.
18711 int64_t OrigOffset = Offset;
18712 if (DAG.getDataLayout().isBigEndian())
18713 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18714 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18715 8 -
18716 Offset;
18717
18718 // Check that the stored value cover all bits that are loaded.
18719 bool STCoversLD;
18720
18721 TypeSize LdMemSize = LDMemType.getSizeInBits();
18722 TypeSize StMemSize = STMemType.getSizeInBits();
18723 if (LdStScalable)
18724 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18725 else
18726 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18727 StMemSize.getFixedValue());
18728
18729 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18730 if (LD->isIndexed()) {
18731 // Cannot handle opaque target constants and we must respect the user's
18732 // request not to split indexes from loads.
18733 if (!canSplitIdx(LD))
18734 return SDValue();
18735 SDValue Idx = SplitIndexingFromLoad(LD);
18736 SDValue Ops[] = {Val, Idx, Chain};
18737 return CombineTo(LD, Ops, 3);
18738 }
18739 return CombineTo(LD, Val, Chain);
18740 };
18741
18742 if (!STCoversLD)
18743 return SDValue();
18744
18745 // Memory as copy space (potentially masked).
18746 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18747 // Simple case: Direct non-truncating forwarding
18748 if (LDType.getSizeInBits() == LdMemSize)
18749 return ReplaceLd(LD, ST->getValue(), Chain);
18750 // Can we model the truncate and extension with an and mask?
18751 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18752 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18753 // Mask to size of LDMemType
18754 auto Mask =
18756 StMemSize.getFixedValue()),
18757 SDLoc(ST), STType);
18758 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18759 return ReplaceLd(LD, Val, Chain);
18760 }
18761 }
18762
18763 // Handle some cases for big-endian that would be Offset 0 and handled for
18764 // little-endian.
18765 SDValue Val = ST->getValue();
18766 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18767 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18768 !LDType.isVector() && isTypeLegal(STType) &&
18769 TLI.isOperationLegal(ISD::SRL, STType)) {
18770 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18771 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18772 Offset = 0;
18773 }
18774 }
18775
18776 // TODO: Deal with nonzero offset.
18777 if (LD->getBasePtr().isUndef() || Offset != 0)
18778 return SDValue();
18779 // Model necessary truncations / extenstions.
18780 // Truncate Value To Stored Memory Size.
18781 do {
18782 if (!getTruncatedStoreValue(ST, Val))
18783 continue;
18784 if (!isTypeLegal(LDMemType))
18785 continue;
18786 if (STMemType != LDMemType) {
18787 // TODO: Support vectors? This requires extract_subvector/bitcast.
18788 if (!STMemType.isVector() && !LDMemType.isVector() &&
18789 STMemType.isInteger() && LDMemType.isInteger())
18790 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18791 else
18792 continue;
18793 }
18794 if (!extendLoadedValueToExtension(LD, Val))
18795 continue;
18796 return ReplaceLd(LD, Val, Chain);
18797 } while (false);
18798
18799 // On failure, cleanup dead nodes we may have created.
18800 if (Val->use_empty())
18801 deleteAndRecombine(Val.getNode());
18802 return SDValue();
18803}
18804
18805SDValue DAGCombiner::visitLOAD(SDNode *N) {
18806 LoadSDNode *LD = cast<LoadSDNode>(N);
18807 SDValue Chain = LD->getChain();
18808 SDValue Ptr = LD->getBasePtr();
18809
18810 // If load is not volatile and there are no uses of the loaded value (and
18811 // the updated indexed value in case of indexed loads), change uses of the
18812 // chain value into uses of the chain input (i.e. delete the dead load).
18813 // TODO: Allow this for unordered atomics (see D66309)
18814 if (LD->isSimple()) {
18815 if (N->getValueType(1) == MVT::Other) {
18816 // Unindexed loads.
18817 if (!N->hasAnyUseOfValue(0)) {
18818 // It's not safe to use the two value CombineTo variant here. e.g.
18819 // v1, chain2 = load chain1, loc
18820 // v2, chain3 = load chain2, loc
18821 // v3 = add v2, c
18822 // Now we replace use of chain2 with chain1. This makes the second load
18823 // isomorphic to the one we are deleting, and thus makes this load live.
18824 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18825 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18826 dbgs() << "\n");
18827 WorklistRemover DeadNodes(*this);
18828 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18829 AddUsersToWorklist(Chain.getNode());
18830 if (N->use_empty())
18831 deleteAndRecombine(N);
18832
18833 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18834 }
18835 } else {
18836 // Indexed loads.
18837 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18838
18839 // If this load has an opaque TargetConstant offset, then we cannot split
18840 // the indexing into an add/sub directly (that TargetConstant may not be
18841 // valid for a different type of node, and we cannot convert an opaque
18842 // target constant into a regular constant).
18843 bool CanSplitIdx = canSplitIdx(LD);
18844
18845 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18846 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18847 SDValue Index;
18848 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18849 Index = SplitIndexingFromLoad(LD);
18850 // Try to fold the base pointer arithmetic into subsequent loads and
18851 // stores.
18852 AddUsersToWorklist(N);
18853 } else
18854 Index = DAG.getUNDEF(N->getValueType(1));
18855 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18856 dbgs() << "\nWith: "; Undef.dump(&DAG);
18857 dbgs() << " and 2 other values\n");
18858 WorklistRemover DeadNodes(*this);
18859 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18861 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18862 deleteAndRecombine(N);
18863 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18864 }
18865 }
18866 }
18867
18868 // If this load is directly stored, replace the load value with the stored
18869 // value.
18870 if (auto V = ForwardStoreValueToDirectLoad(LD))
18871 return V;
18872
18873 // Try to infer better alignment information than the load already has.
18874 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
18875 !LD->isAtomic()) {
18876 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18877 if (*Alignment > LD->getAlign() &&
18878 isAligned(*Alignment, LD->getSrcValueOffset())) {
18879 SDValue NewLoad = DAG.getExtLoad(
18880 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
18881 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
18882 LD->getMemOperand()->getFlags(), LD->getAAInfo());
18883 // NewLoad will always be N as we are only refining the alignment
18884 assert(NewLoad.getNode() == N);
18885 (void)NewLoad;
18886 }
18887 }
18888 }
18889
18890 if (LD->isUnindexed()) {
18891 // Walk up chain skipping non-aliasing memory nodes.
18892 SDValue BetterChain = FindBetterChain(LD, Chain);
18893
18894 // If there is a better chain.
18895 if (Chain != BetterChain) {
18896 SDValue ReplLoad;
18897
18898 // Replace the chain to void dependency.
18899 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
18900 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
18901 BetterChain, Ptr, LD->getMemOperand());
18902 } else {
18903 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
18904 LD->getValueType(0),
18905 BetterChain, Ptr, LD->getMemoryVT(),
18906 LD->getMemOperand());
18907 }
18908
18909 // Create token factor to keep old chain connected.
18910 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
18911 MVT::Other, Chain, ReplLoad.getValue(1));
18912
18913 // Replace uses with load result and token factor
18914 return CombineTo(N, ReplLoad.getValue(0), Token);
18915 }
18916 }
18917
18918 // Try transforming N to an indexed load.
18919 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18920 return SDValue(N, 0);
18921
18922 // Try to slice up N to more direct loads if the slices are mapped to
18923 // different register banks or pairing can take place.
18924 if (SliceUpLoad(N))
18925 return SDValue(N, 0);
18926
18927 return SDValue();
18928}
18929
18930namespace {
18931
18932/// Helper structure used to slice a load in smaller loads.
18933/// Basically a slice is obtained from the following sequence:
18934/// Origin = load Ty1, Base
18935/// Shift = srl Ty1 Origin, CstTy Amount
18936/// Inst = trunc Shift to Ty2
18937///
18938/// Then, it will be rewritten into:
18939/// Slice = load SliceTy, Base + SliceOffset
18940/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
18941///
18942/// SliceTy is deduced from the number of bits that are actually used to
18943/// build Inst.
18944struct LoadedSlice {
18945 /// Helper structure used to compute the cost of a slice.
18946 struct Cost {
18947 /// Are we optimizing for code size.
18948 bool ForCodeSize = false;
18949
18950 /// Various cost.
18951 unsigned Loads = 0;
18952 unsigned Truncates = 0;
18953 unsigned CrossRegisterBanksCopies = 0;
18954 unsigned ZExts = 0;
18955 unsigned Shift = 0;
18956
18957 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
18958
18959 /// Get the cost of one isolated slice.
18960 Cost(const LoadedSlice &LS, bool ForCodeSize)
18961 : ForCodeSize(ForCodeSize), Loads(1) {
18962 EVT TruncType = LS.Inst->getValueType(0);
18963 EVT LoadedType = LS.getLoadedType();
18964 if (TruncType != LoadedType &&
18965 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
18966 ZExts = 1;
18967 }
18968
18969 /// Account for slicing gain in the current cost.
18970 /// Slicing provide a few gains like removing a shift or a
18971 /// truncate. This method allows to grow the cost of the original
18972 /// load with the gain from this slice.
18973 void addSliceGain(const LoadedSlice &LS) {
18974 // Each slice saves a truncate.
18975 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
18976 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
18977 ++Truncates;
18978 // If there is a shift amount, this slice gets rid of it.
18979 if (LS.Shift)
18980 ++Shift;
18981 // If this slice can merge a cross register bank copy, account for it.
18982 if (LS.canMergeExpensiveCrossRegisterBankCopy())
18983 ++CrossRegisterBanksCopies;
18984 }
18985
18986 Cost &operator+=(const Cost &RHS) {
18987 Loads += RHS.Loads;
18988 Truncates += RHS.Truncates;
18989 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
18990 ZExts += RHS.ZExts;
18991 Shift += RHS.Shift;
18992 return *this;
18993 }
18994
18995 bool operator==(const Cost &RHS) const {
18996 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
18997 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
18998 ZExts == RHS.ZExts && Shift == RHS.Shift;
18999 }
19000
19001 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19002
19003 bool operator<(const Cost &RHS) const {
19004 // Assume cross register banks copies are as expensive as loads.
19005 // FIXME: Do we want some more target hooks?
19006 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19007 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19008 // Unless we are optimizing for code size, consider the
19009 // expensive operation first.
19010 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19011 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19012 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19013 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19014 }
19015
19016 bool operator>(const Cost &RHS) const { return RHS < *this; }
19017
19018 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19019
19020 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19021 };
19022
19023 // The last instruction that represent the slice. This should be a
19024 // truncate instruction.
19025 SDNode *Inst;
19026
19027 // The original load instruction.
19028 LoadSDNode *Origin;
19029
19030 // The right shift amount in bits from the original load.
19031 unsigned Shift;
19032
19033 // The DAG from which Origin came from.
19034 // This is used to get some contextual information about legal types, etc.
19035 SelectionDAG *DAG;
19036
19037 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19038 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19039 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19040
19041 /// Get the bits used in a chunk of bits \p BitWidth large.
19042 /// \return Result is \p BitWidth and has used bits set to 1 and
19043 /// not used bits set to 0.
19044 APInt getUsedBits() const {
19045 // Reproduce the trunc(lshr) sequence:
19046 // - Start from the truncated value.
19047 // - Zero extend to the desired bit width.
19048 // - Shift left.
19049 assert(Origin && "No original load to compare against.");
19050 unsigned BitWidth = Origin->getValueSizeInBits(0);
19051 assert(Inst && "This slice is not bound to an instruction");
19052 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19053 "Extracted slice is bigger than the whole type!");
19054 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19055 UsedBits.setAllBits();
19056 UsedBits = UsedBits.zext(BitWidth);
19057 UsedBits <<= Shift;
19058 return UsedBits;
19059 }
19060
19061 /// Get the size of the slice to be loaded in bytes.
19062 unsigned getLoadedSize() const {
19063 unsigned SliceSize = getUsedBits().popcount();
19064 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19065 return SliceSize / 8;
19066 }
19067
19068 /// Get the type that will be loaded for this slice.
19069 /// Note: This may not be the final type for the slice.
19070 EVT getLoadedType() const {
19071 assert(DAG && "Missing context");
19072 LLVMContext &Ctxt = *DAG->getContext();
19073 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19074 }
19075
19076 /// Get the alignment of the load used for this slice.
19077 Align getAlign() const {
19078 Align Alignment = Origin->getAlign();
19079 uint64_t Offset = getOffsetFromBase();
19080 if (Offset != 0)
19081 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19082 return Alignment;
19083 }
19084
19085 /// Check if this slice can be rewritten with legal operations.
19086 bool isLegal() const {
19087 // An invalid slice is not legal.
19088 if (!Origin || !Inst || !DAG)
19089 return false;
19090
19091 // Offsets are for indexed load only, we do not handle that.
19092 if (!Origin->getOffset().isUndef())
19093 return false;
19094
19095 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19096
19097 // Check that the type is legal.
19098 EVT SliceType = getLoadedType();
19099 if (!TLI.isTypeLegal(SliceType))
19100 return false;
19101
19102 // Check that the load is legal for this type.
19103 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19104 return false;
19105
19106 // Check that the offset can be computed.
19107 // 1. Check its type.
19108 EVT PtrType = Origin->getBasePtr().getValueType();
19109 if (PtrType == MVT::Untyped || PtrType.isExtended())
19110 return false;
19111
19112 // 2. Check that it fits in the immediate.
19113 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19114 return false;
19115
19116 // 3. Check that the computation is legal.
19117 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19118 return false;
19119
19120 // Check that the zext is legal if it needs one.
19121 EVT TruncateType = Inst->getValueType(0);
19122 if (TruncateType != SliceType &&
19123 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19124 return false;
19125
19126 return true;
19127 }
19128
19129 /// Get the offset in bytes of this slice in the original chunk of
19130 /// bits.
19131 /// \pre DAG != nullptr.
19132 uint64_t getOffsetFromBase() const {
19133 assert(DAG && "Missing context.");
19134 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19135 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19136 uint64_t Offset = Shift / 8;
19137 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19138 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19139 "The size of the original loaded type is not a multiple of a"
19140 " byte.");
19141 // If Offset is bigger than TySizeInBytes, it means we are loading all
19142 // zeros. This should have been optimized before in the process.
19143 assert(TySizeInBytes > Offset &&
19144 "Invalid shift amount for given loaded size");
19145 if (IsBigEndian)
19146 Offset = TySizeInBytes - Offset - getLoadedSize();
19147 return Offset;
19148 }
19149
19150 /// Generate the sequence of instructions to load the slice
19151 /// represented by this object and redirect the uses of this slice to
19152 /// this new sequence of instructions.
19153 /// \pre this->Inst && this->Origin are valid Instructions and this
19154 /// object passed the legal check: LoadedSlice::isLegal returned true.
19155 /// \return The last instruction of the sequence used to load the slice.
19156 SDValue loadSlice() const {
19157 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19158 const SDValue &OldBaseAddr = Origin->getBasePtr();
19159 SDValue BaseAddr = OldBaseAddr;
19160 // Get the offset in that chunk of bytes w.r.t. the endianness.
19161 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19162 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19163 if (Offset) {
19164 // BaseAddr = BaseAddr + Offset.
19165 EVT ArithType = BaseAddr.getValueType();
19166 SDLoc DL(Origin);
19167 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19168 DAG->getConstant(Offset, DL, ArithType));
19169 }
19170
19171 // Create the type of the loaded slice according to its size.
19172 EVT SliceType = getLoadedType();
19173
19174 // Create the load for the slice.
19175 SDValue LastInst =
19176 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19178 Origin->getMemOperand()->getFlags());
19179 // If the final type is not the same as the loaded type, this means that
19180 // we have to pad with zero. Create a zero extend for that.
19181 EVT FinalType = Inst->getValueType(0);
19182 if (SliceType != FinalType)
19183 LastInst =
19184 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19185 return LastInst;
19186 }
19187
19188 /// Check if this slice can be merged with an expensive cross register
19189 /// bank copy. E.g.,
19190 /// i = load i32
19191 /// f = bitcast i32 i to float
19192 bool canMergeExpensiveCrossRegisterBankCopy() const {
19193 if (!Inst || !Inst->hasOneUse())
19194 return false;
19195 SDNode *Use = *Inst->use_begin();
19196 if (Use->getOpcode() != ISD::BITCAST)
19197 return false;
19198 assert(DAG && "Missing context");
19199 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19200 EVT ResVT = Use->getValueType(0);
19201 const TargetRegisterClass *ResRC =
19202 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19203 const TargetRegisterClass *ArgRC =
19204 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19205 Use->getOperand(0)->isDivergent());
19206 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19207 return false;
19208
19209 // At this point, we know that we perform a cross-register-bank copy.
19210 // Check if it is expensive.
19212 // Assume bitcasts are cheap, unless both register classes do not
19213 // explicitly share a common sub class.
19214 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19215 return false;
19216
19217 // Check if it will be merged with the load.
19218 // 1. Check the alignment / fast memory access constraint.
19219 unsigned IsFast = 0;
19220 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19221 Origin->getAddressSpace(), getAlign(),
19222 Origin->getMemOperand()->getFlags(), &IsFast) ||
19223 !IsFast)
19224 return false;
19225
19226 // 2. Check that the load is a legal operation for that type.
19227 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19228 return false;
19229
19230 // 3. Check that we do not have a zext in the way.
19231 if (Inst->getValueType(0) != getLoadedType())
19232 return false;
19233
19234 return true;
19235 }
19236};
19237
19238} // end anonymous namespace
19239
19240/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19241/// \p UsedBits looks like 0..0 1..1 0..0.
19242static bool areUsedBitsDense(const APInt &UsedBits) {
19243 // If all the bits are one, this is dense!
19244 if (UsedBits.isAllOnes())
19245 return true;
19246
19247 // Get rid of the unused bits on the right.
19248 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19249 // Get rid of the unused bits on the left.
19250 if (NarrowedUsedBits.countl_zero())
19251 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19252 // Check that the chunk of bits is completely used.
19253 return NarrowedUsedBits.isAllOnes();
19254}
19255
19256/// Check whether or not \p First and \p Second are next to each other
19257/// in memory. This means that there is no hole between the bits loaded
19258/// by \p First and the bits loaded by \p Second.
19259static bool areSlicesNextToEachOther(const LoadedSlice &First,
19260 const LoadedSlice &Second) {
19261 assert(First.Origin == Second.Origin && First.Origin &&
19262 "Unable to match different memory origins.");
19263 APInt UsedBits = First.getUsedBits();
19264 assert((UsedBits & Second.getUsedBits()) == 0 &&
19265 "Slices are not supposed to overlap.");
19266 UsedBits |= Second.getUsedBits();
19267 return areUsedBitsDense(UsedBits);
19268}
19269
19270/// Adjust the \p GlobalLSCost according to the target
19271/// paring capabilities and the layout of the slices.
19272/// \pre \p GlobalLSCost should account for at least as many loads as
19273/// there is in the slices in \p LoadedSlices.
19275 LoadedSlice::Cost &GlobalLSCost) {
19276 unsigned NumberOfSlices = LoadedSlices.size();
19277 // If there is less than 2 elements, no pairing is possible.
19278 if (NumberOfSlices < 2)
19279 return;
19280
19281 // Sort the slices so that elements that are likely to be next to each
19282 // other in memory are next to each other in the list.
19283 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19284 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19285 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19286 });
19287 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19288 // First (resp. Second) is the first (resp. Second) potentially candidate
19289 // to be placed in a paired load.
19290 const LoadedSlice *First = nullptr;
19291 const LoadedSlice *Second = nullptr;
19292 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19293 // Set the beginning of the pair.
19294 First = Second) {
19295 Second = &LoadedSlices[CurrSlice];
19296
19297 // If First is NULL, it means we start a new pair.
19298 // Get to the next slice.
19299 if (!First)
19300 continue;
19301
19302 EVT LoadedType = First->getLoadedType();
19303
19304 // If the types of the slices are different, we cannot pair them.
19305 if (LoadedType != Second->getLoadedType())
19306 continue;
19307
19308 // Check if the target supplies paired loads for this type.
19309 Align RequiredAlignment;
19310 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19311 // move to the next pair, this type is hopeless.
19312 Second = nullptr;
19313 continue;
19314 }
19315 // Check if we meet the alignment requirement.
19316 if (First->getAlign() < RequiredAlignment)
19317 continue;
19318
19319 // Check that both loads are next to each other in memory.
19320 if (!areSlicesNextToEachOther(*First, *Second))
19321 continue;
19322
19323 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19324 --GlobalLSCost.Loads;
19325 // Move to the next pair.
19326 Second = nullptr;
19327 }
19328}
19329
19330/// Check the profitability of all involved LoadedSlice.
19331/// Currently, it is considered profitable if there is exactly two
19332/// involved slices (1) which are (2) next to each other in memory, and
19333/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19334///
19335/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19336/// the elements themselves.
19337///
19338/// FIXME: When the cost model will be mature enough, we can relax
19339/// constraints (1) and (2).
19341 const APInt &UsedBits, bool ForCodeSize) {
19342 unsigned NumberOfSlices = LoadedSlices.size();
19344 return NumberOfSlices > 1;
19345
19346 // Check (1).
19347 if (NumberOfSlices != 2)
19348 return false;
19349
19350 // Check (2).
19351 if (!areUsedBitsDense(UsedBits))
19352 return false;
19353
19354 // Check (3).
19355 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19356 // The original code has one big load.
19357 OrigCost.Loads = 1;
19358 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19359 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19360 // Accumulate the cost of all the slices.
19361 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19362 GlobalSlicingCost += SliceCost;
19363
19364 // Account as cost in the original configuration the gain obtained
19365 // with the current slices.
19366 OrigCost.addSliceGain(LS);
19367 }
19368
19369 // If the target supports paired load, adjust the cost accordingly.
19370 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19371 return OrigCost > GlobalSlicingCost;
19372}
19373
19374/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19375/// operations, split it in the various pieces being extracted.
19376///
19377/// This sort of thing is introduced by SROA.
19378/// This slicing takes care not to insert overlapping loads.
19379/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19380bool DAGCombiner::SliceUpLoad(SDNode *N) {
19381 if (Level < AfterLegalizeDAG)
19382 return false;
19383
19384 LoadSDNode *LD = cast<LoadSDNode>(N);
19385 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19386 !LD->getValueType(0).isInteger())
19387 return false;
19388
19389 // The algorithm to split up a load of a scalable vector into individual
19390 // elements currently requires knowing the length of the loaded type,
19391 // so will need adjusting to work on scalable vectors.
19392 if (LD->getValueType(0).isScalableVector())
19393 return false;
19394
19395 // Keep track of already used bits to detect overlapping values.
19396 // In that case, we will just abort the transformation.
19397 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19398
19399 SmallVector<LoadedSlice, 4> LoadedSlices;
19400
19401 // Check if this load is used as several smaller chunks of bits.
19402 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19403 // of computation for each trunc.
19404 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19405 UI != UIEnd; ++UI) {
19406 // Skip the uses of the chain.
19407 if (UI.getUse().getResNo() != 0)
19408 continue;
19409
19410 SDNode *User = *UI;
19411 unsigned Shift = 0;
19412
19413 // Check if this is a trunc(lshr).
19414 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19415 isa<ConstantSDNode>(User->getOperand(1))) {
19416 Shift = User->getConstantOperandVal(1);
19417 User = *User->use_begin();
19418 }
19419
19420 // At this point, User is a Truncate, iff we encountered, trunc or
19421 // trunc(lshr).
19422 if (User->getOpcode() != ISD::TRUNCATE)
19423 return false;
19424
19425 // The width of the type must be a power of 2 and greater than 8-bits.
19426 // Otherwise the load cannot be represented in LLVM IR.
19427 // Moreover, if we shifted with a non-8-bits multiple, the slice
19428 // will be across several bytes. We do not support that.
19429 unsigned Width = User->getValueSizeInBits(0);
19430 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19431 return false;
19432
19433 // Build the slice for this chain of computations.
19434 LoadedSlice LS(User, LD, Shift, &DAG);
19435 APInt CurrentUsedBits = LS.getUsedBits();
19436
19437 // Check if this slice overlaps with another.
19438 if ((CurrentUsedBits & UsedBits) != 0)
19439 return false;
19440 // Update the bits used globally.
19441 UsedBits |= CurrentUsedBits;
19442
19443 // Check if the new slice would be legal.
19444 if (!LS.isLegal())
19445 return false;
19446
19447 // Record the slice.
19448 LoadedSlices.push_back(LS);
19449 }
19450
19451 // Abort slicing if it does not seem to be profitable.
19452 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19453 return false;
19454
19455 ++SlicedLoads;
19456
19457 // Rewrite each chain to use an independent load.
19458 // By construction, each chain can be represented by a unique load.
19459
19460 // Prepare the argument for the new token factor for all the slices.
19461 SmallVector<SDValue, 8> ArgChains;
19462 for (const LoadedSlice &LS : LoadedSlices) {
19463 SDValue SliceInst = LS.loadSlice();
19464 CombineTo(LS.Inst, SliceInst, true);
19465 if (SliceInst.getOpcode() != ISD::LOAD)
19466 SliceInst = SliceInst.getOperand(0);
19467 assert(SliceInst->getOpcode() == ISD::LOAD &&
19468 "It takes more than a zext to get to the loaded slice!!");
19469 ArgChains.push_back(SliceInst.getValue(1));
19470 }
19471
19472 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19473 ArgChains);
19474 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19475 AddToWorklist(Chain.getNode());
19476 return true;
19477}
19478
19479/// Check to see if V is (and load (ptr), imm), where the load is having
19480/// specific bytes cleared out. If so, return the byte size being masked out
19481/// and the shift amount.
19482static std::pair<unsigned, unsigned>
19484 std::pair<unsigned, unsigned> Result(0, 0);
19485
19486 // Check for the structure we're looking for.
19487 if (V->getOpcode() != ISD::AND ||
19488 !isa<ConstantSDNode>(V->getOperand(1)) ||
19489 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19490 return Result;
19491
19492 // Check the chain and pointer.
19493 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19494 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19495
19496 // This only handles simple types.
19497 if (V.getValueType() != MVT::i16 &&
19498 V.getValueType() != MVT::i32 &&
19499 V.getValueType() != MVT::i64)
19500 return Result;
19501
19502 // Check the constant mask. Invert it so that the bits being masked out are
19503 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19504 // follow the sign bit for uniformity.
19505 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19506 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19507 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19508 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19509 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19510 if (NotMaskLZ == 64) return Result; // All zero mask.
19511
19512 // See if we have a continuous run of bits. If so, we have 0*1+0*
19513 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19514 return Result;
19515
19516 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19517 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19518 NotMaskLZ -= 64-V.getValueSizeInBits();
19519
19520 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19521 switch (MaskedBytes) {
19522 case 1:
19523 case 2:
19524 case 4: break;
19525 default: return Result; // All one mask, or 5-byte mask.
19526 }
19527
19528 // Verify that the first bit starts at a multiple of mask so that the access
19529 // is aligned the same as the access width.
19530 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19531
19532 // For narrowing to be valid, it must be the case that the load the
19533 // immediately preceding memory operation before the store.
19534 if (LD == Chain.getNode())
19535 ; // ok.
19536 else if (Chain->getOpcode() == ISD::TokenFactor &&
19537 SDValue(LD, 1).hasOneUse()) {
19538 // LD has only 1 chain use so they are no indirect dependencies.
19539 if (!LD->isOperandOf(Chain.getNode()))
19540 return Result;
19541 } else
19542 return Result; // Fail.
19543
19544 Result.first = MaskedBytes;
19545 Result.second = NotMaskTZ/8;
19546 return Result;
19547}
19548
19549/// Check to see if IVal is something that provides a value as specified by
19550/// MaskInfo. If so, replace the specified store with a narrower store of
19551/// truncated IVal.
19552static SDValue
19553ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19554 SDValue IVal, StoreSDNode *St,
19555 DAGCombiner *DC) {
19556 unsigned NumBytes = MaskInfo.first;
19557 unsigned ByteShift = MaskInfo.second;
19558 SelectionDAG &DAG = DC->getDAG();
19559
19560 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19561 // that uses this. If not, this is not a replacement.
19562 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19563 ByteShift*8, (ByteShift+NumBytes)*8);
19564 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19565
19566 // Check that it is legal on the target to do this. It is legal if the new
19567 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19568 // legalization. If the source type is legal, but the store type isn't, see
19569 // if we can use a truncating store.
19570 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19571 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19572 bool UseTruncStore;
19573 if (DC->isTypeLegal(VT))
19574 UseTruncStore = false;
19575 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19576 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19577 UseTruncStore = true;
19578 else
19579 return SDValue();
19580
19581 // Can't do this for indexed stores.
19582 if (St->isIndexed())
19583 return SDValue();
19584
19585 // Check that the target doesn't think this is a bad idea.
19586 if (St->getMemOperand() &&
19587 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19588 *St->getMemOperand()))
19589 return SDValue();
19590
19591 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19592 // shifted by ByteShift and truncated down to NumBytes.
19593 if (ByteShift) {
19594 SDLoc DL(IVal);
19595 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19596 DAG.getConstant(ByteShift*8, DL,
19597 DC->getShiftAmountTy(IVal.getValueType())));
19598 }
19599
19600 // Figure out the offset for the store and the alignment of the access.
19601 unsigned StOffset;
19602 if (DAG.getDataLayout().isLittleEndian())
19603 StOffset = ByteShift;
19604 else
19605 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19606
19607 SDValue Ptr = St->getBasePtr();
19608 if (StOffset) {
19609 SDLoc DL(IVal);
19611 }
19612
19613 ++OpsNarrowed;
19614 if (UseTruncStore)
19615 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19616 St->getPointerInfo().getWithOffset(StOffset),
19617 VT, St->getOriginalAlign());
19618
19619 // Truncate down to the new size.
19620 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19621
19622 return DAG
19623 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19624 St->getPointerInfo().getWithOffset(StOffset),
19625 St->getOriginalAlign());
19626}
19627
19628/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19629/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19630/// narrowing the load and store if it would end up being a win for performance
19631/// or code size.
19632SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19633 StoreSDNode *ST = cast<StoreSDNode>(N);
19634 if (!ST->isSimple())
19635 return SDValue();
19636
19637 SDValue Chain = ST->getChain();
19638 SDValue Value = ST->getValue();
19639 SDValue Ptr = ST->getBasePtr();
19640 EVT VT = Value.getValueType();
19641
19642 if (ST->isTruncatingStore() || VT.isVector())
19643 return SDValue();
19644
19645 unsigned Opc = Value.getOpcode();
19646
19647 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19648 !Value.hasOneUse())
19649 return SDValue();
19650
19651 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19652 // is a byte mask indicating a consecutive number of bytes, check to see if
19653 // Y is known to provide just those bytes. If so, we try to replace the
19654 // load + replace + store sequence with a single (narrower) store, which makes
19655 // the load dead.
19657 std::pair<unsigned, unsigned> MaskedLoad;
19658 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19659 if (MaskedLoad.first)
19660 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19661 Value.getOperand(1), ST,this))
19662 return NewST;
19663
19664 // Or is commutative, so try swapping X and Y.
19665 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19666 if (MaskedLoad.first)
19667 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19668 Value.getOperand(0), ST,this))
19669 return NewST;
19670 }
19671
19673 return SDValue();
19674
19675 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19676 return SDValue();
19677
19678 SDValue N0 = Value.getOperand(0);
19679 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19680 Chain == SDValue(N0.getNode(), 1)) {
19681 LoadSDNode *LD = cast<LoadSDNode>(N0);
19682 if (LD->getBasePtr() != Ptr ||
19683 LD->getPointerInfo().getAddrSpace() !=
19684 ST->getPointerInfo().getAddrSpace())
19685 return SDValue();
19686
19687 // Find the type to narrow it the load / op / store to.
19688 SDValue N1 = Value.getOperand(1);
19689 unsigned BitWidth = N1.getValueSizeInBits();
19690 APInt Imm = N1->getAsAPIntVal();
19691 if (Opc == ISD::AND)
19693 if (Imm == 0 || Imm.isAllOnes())
19694 return SDValue();
19695 unsigned ShAmt = Imm.countr_zero();
19696 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19697 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19698 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19699 // The narrowing should be profitable, the load/store operation should be
19700 // legal (or custom) and the store size should be equal to the NewVT width.
19701 while (NewBW < BitWidth &&
19702 (NewVT.getStoreSizeInBits() != NewBW ||
19703 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19704 !TLI.isNarrowingProfitable(VT, NewVT))) {
19705 NewBW = NextPowerOf2(NewBW);
19706 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19707 }
19708 if (NewBW >= BitWidth)
19709 return SDValue();
19710
19711 // If the lsb changed does not start at the type bitwidth boundary,
19712 // start at the previous one.
19713 if (ShAmt % NewBW)
19714 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19716 std::min(BitWidth, ShAmt + NewBW));
19717 if ((Imm & Mask) == Imm) {
19718 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19719 if (Opc == ISD::AND)
19720 NewImm ^= APInt::getAllOnes(NewBW);
19721 uint64_t PtrOff = ShAmt / 8;
19722 // For big endian targets, we need to adjust the offset to the pointer to
19723 // load the correct bytes.
19724 if (DAG.getDataLayout().isBigEndian())
19725 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19726
19727 unsigned IsFast = 0;
19728 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19729 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19730 LD->getAddressSpace(), NewAlign,
19731 LD->getMemOperand()->getFlags(), &IsFast) ||
19732 !IsFast)
19733 return SDValue();
19734
19735 SDValue NewPtr =
19737 SDValue NewLD =
19738 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19739 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19740 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19741 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19742 DAG.getConstant(NewImm, SDLoc(Value),
19743 NewVT));
19744 SDValue NewST =
19745 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19746 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19747
19748 AddToWorklist(NewPtr.getNode());
19749 AddToWorklist(NewLD.getNode());
19750 AddToWorklist(NewVal.getNode());
19751 WorklistRemover DeadNodes(*this);
19752 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19753 ++OpsNarrowed;
19754 return NewST;
19755 }
19756 }
19757
19758 return SDValue();
19759}
19760
19761/// For a given floating point load / store pair, if the load value isn't used
19762/// by any other operations, then consider transforming the pair to integer
19763/// load / store operations if the target deems the transformation profitable.
19764SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19765 StoreSDNode *ST = cast<StoreSDNode>(N);
19766 SDValue Value = ST->getValue();
19767 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19768 Value.hasOneUse()) {
19769 LoadSDNode *LD = cast<LoadSDNode>(Value);
19770 EVT VT = LD->getMemoryVT();
19771 if (!VT.isFloatingPoint() ||
19772 VT != ST->getMemoryVT() ||
19773 LD->isNonTemporal() ||
19774 ST->isNonTemporal() ||
19775 LD->getPointerInfo().getAddrSpace() != 0 ||
19776 ST->getPointerInfo().getAddrSpace() != 0)
19777 return SDValue();
19778
19779 TypeSize VTSize = VT.getSizeInBits();
19780
19781 // We don't know the size of scalable types at compile time so we cannot
19782 // create an integer of the equivalent size.
19783 if (VTSize.isScalable())
19784 return SDValue();
19785
19786 unsigned FastLD = 0, FastST = 0;
19787 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19788 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19789 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19792 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19793 *LD->getMemOperand(), &FastLD) ||
19794 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19795 *ST->getMemOperand(), &FastST) ||
19796 !FastLD || !FastST)
19797 return SDValue();
19798
19799 SDValue NewLD =
19800 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19801 LD->getPointerInfo(), LD->getAlign());
19802
19803 SDValue NewST =
19804 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19805 ST->getPointerInfo(), ST->getAlign());
19806
19807 AddToWorklist(NewLD.getNode());
19808 AddToWorklist(NewST.getNode());
19809 WorklistRemover DeadNodes(*this);
19810 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19811 ++LdStFP2Int;
19812 return NewST;
19813 }
19814
19815 return SDValue();
19816}
19817
19818// This is a helper function for visitMUL to check the profitability
19819// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19820// MulNode is the original multiply, AddNode is (add x, c1),
19821// and ConstNode is c2.
19822//
19823// If the (add x, c1) has multiple uses, we could increase
19824// the number of adds if we make this transformation.
19825// It would only be worth doing this if we can remove a
19826// multiply in the process. Check for that here.
19827// To illustrate:
19828// (A + c1) * c3
19829// (A + c2) * c3
19830// We're checking for cases where we have common "c3 * A" expressions.
19831bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19832 SDValue ConstNode) {
19833 APInt Val;
19834
19835 // If the add only has one use, and the target thinks the folding is
19836 // profitable or does not lead to worse code, this would be OK to do.
19837 if (AddNode->hasOneUse() &&
19838 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19839 return true;
19840
19841 // Walk all the users of the constant with which we're multiplying.
19842 for (SDNode *Use : ConstNode->uses()) {
19843 if (Use == MulNode) // This use is the one we're on right now. Skip it.
19844 continue;
19845
19846 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19847 SDNode *OtherOp;
19848 SDNode *MulVar = AddNode.getOperand(0).getNode();
19849
19850 // OtherOp is what we're multiplying against the constant.
19851 if (Use->getOperand(0) == ConstNode)
19852 OtherOp = Use->getOperand(1).getNode();
19853 else
19854 OtherOp = Use->getOperand(0).getNode();
19855
19856 // Check to see if multiply is with the same operand of our "add".
19857 //
19858 // ConstNode = CONST
19859 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
19860 // ...
19861 // AddNode = (A + c1) <-- MulVar is A.
19862 // = AddNode * ConstNode <-- current visiting instruction.
19863 //
19864 // If we make this transformation, we will have a common
19865 // multiply (ConstNode * A) that we can save.
19866 if (OtherOp == MulVar)
19867 return true;
19868
19869 // Now check to see if a future expansion will give us a common
19870 // multiply.
19871 //
19872 // ConstNode = CONST
19873 // AddNode = (A + c1)
19874 // ... = AddNode * ConstNode <-- current visiting instruction.
19875 // ...
19876 // OtherOp = (A + c2)
19877 // Use = OtherOp * ConstNode <-- visiting Use.
19878 //
19879 // If we make this transformation, we will have a common
19880 // multiply (CONST * A) after we also do the same transformation
19881 // to the "t2" instruction.
19882 if (OtherOp->getOpcode() == ISD::ADD &&
19884 OtherOp->getOperand(0).getNode() == MulVar)
19885 return true;
19886 }
19887 }
19888
19889 // Didn't find a case where this would be profitable.
19890 return false;
19891}
19892
19893SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
19894 unsigned NumStores) {
19897 SDLoc StoreDL(StoreNodes[0].MemNode);
19898
19899 for (unsigned i = 0; i < NumStores; ++i) {
19900 Visited.insert(StoreNodes[i].MemNode);
19901 }
19902
19903 // don't include nodes that are children or repeated nodes.
19904 for (unsigned i = 0; i < NumStores; ++i) {
19905 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
19906 Chains.push_back(StoreNodes[i].MemNode->getChain());
19907 }
19908
19909 assert(!Chains.empty() && "Chain should have generated a chain");
19910 return DAG.getTokenFactor(StoreDL, Chains);
19911}
19912
19913bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
19914 const Value *UnderlyingObj = nullptr;
19915 for (const auto &MemOp : StoreNodes) {
19916 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
19917 // Pseudo value like stack frame has its own frame index and size, should
19918 // not use the first store's frame index for other frames.
19919 if (MMO->getPseudoValue())
19920 return false;
19921
19922 if (!MMO->getValue())
19923 return false;
19924
19925 const Value *Obj = getUnderlyingObject(MMO->getValue());
19926
19927 if (UnderlyingObj && UnderlyingObj != Obj)
19928 return false;
19929
19930 if (!UnderlyingObj)
19931 UnderlyingObj = Obj;
19932 }
19933
19934 return true;
19935}
19936
19937bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
19938 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
19939 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
19940 // Make sure we have something to merge.
19941 if (NumStores < 2)
19942 return false;
19943
19944 assert((!UseTrunc || !UseVector) &&
19945 "This optimization cannot emit a vector truncating store");
19946
19947 // The latest Node in the DAG.
19948 SDLoc DL(StoreNodes[0].MemNode);
19949
19950 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
19951 unsigned SizeInBits = NumStores * ElementSizeBits;
19952 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19953
19954 std::optional<MachineMemOperand::Flags> Flags;
19955 AAMDNodes AAInfo;
19956 for (unsigned I = 0; I != NumStores; ++I) {
19957 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19958 if (!Flags) {
19959 Flags = St->getMemOperand()->getFlags();
19960 AAInfo = St->getAAInfo();
19961 continue;
19962 }
19963 // Skip merging if there's an inconsistent flag.
19964 if (Flags != St->getMemOperand()->getFlags())
19965 return false;
19966 // Concatenate AA metadata.
19967 AAInfo = AAInfo.concat(St->getAAInfo());
19968 }
19969
19970 EVT StoreTy;
19971 if (UseVector) {
19972 unsigned Elts = NumStores * NumMemElts;
19973 // Get the type for the merged vector store.
19974 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19975 } else
19976 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
19977
19978 SDValue StoredVal;
19979 if (UseVector) {
19980 if (IsConstantSrc) {
19981 SmallVector<SDValue, 8> BuildVector;
19982 for (unsigned I = 0; I != NumStores; ++I) {
19983 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19984 SDValue Val = St->getValue();
19985 // If constant is of the wrong type, convert it now. This comes up
19986 // when one of our stores was truncating.
19987 if (MemVT != Val.getValueType()) {
19988 Val = peekThroughBitcasts(Val);
19989 // Deal with constants of wrong size.
19990 if (ElementSizeBits != Val.getValueSizeInBits()) {
19991 auto *C = dyn_cast<ConstantSDNode>(Val);
19992 if (!C)
19993 // Not clear how to truncate FP values.
19994 // TODO: Handle truncation of build_vector constants
19995 return false;
19996
19997 EVT IntMemVT =
19999 Val = DAG.getConstant(C->getAPIntValue()
20000 .zextOrTrunc(Val.getValueSizeInBits())
20001 .zextOrTrunc(ElementSizeBits),
20002 SDLoc(C), IntMemVT);
20003 }
20004 // Make sure correctly size type is the correct type.
20005 Val = DAG.getBitcast(MemVT, Val);
20006 }
20007 BuildVector.push_back(Val);
20008 }
20009 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20011 DL, StoreTy, BuildVector);
20012 } else {
20014 for (unsigned i = 0; i < NumStores; ++i) {
20015 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20017 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20018 // type MemVT. If the underlying value is not the correct
20019 // type, but it is an extraction of an appropriate vector we
20020 // can recast Val to be of the correct type. This may require
20021 // converting between EXTRACT_VECTOR_ELT and
20022 // EXTRACT_SUBVECTOR.
20023 if ((MemVT != Val.getValueType()) &&
20026 EVT MemVTScalarTy = MemVT.getScalarType();
20027 // We may need to add a bitcast here to get types to line up.
20028 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20029 Val = DAG.getBitcast(MemVT, Val);
20030 } else if (MemVT.isVector() &&
20032 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20033 } else {
20034 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20036 SDValue Vec = Val.getOperand(0);
20037 SDValue Idx = Val.getOperand(1);
20038 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20039 }
20040 }
20041 Ops.push_back(Val);
20042 }
20043
20044 // Build the extracted vector elements back into a vector.
20045 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20047 DL, StoreTy, Ops);
20048 }
20049 } else {
20050 // We should always use a vector store when merging extracted vector
20051 // elements, so this path implies a store of constants.
20052 assert(IsConstantSrc && "Merged vector elements should use vector store");
20053
20054 APInt StoreInt(SizeInBits, 0);
20055
20056 // Construct a single integer constant which is made of the smaller
20057 // constant inputs.
20058 bool IsLE = DAG.getDataLayout().isLittleEndian();
20059 for (unsigned i = 0; i < NumStores; ++i) {
20060 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20061 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20062
20063 SDValue Val = St->getValue();
20064 Val = peekThroughBitcasts(Val);
20065 StoreInt <<= ElementSizeBits;
20066 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20067 StoreInt |= C->getAPIntValue()
20068 .zextOrTrunc(ElementSizeBits)
20069 .zextOrTrunc(SizeInBits);
20070 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20071 StoreInt |= C->getValueAPF()
20072 .bitcastToAPInt()
20073 .zextOrTrunc(ElementSizeBits)
20074 .zextOrTrunc(SizeInBits);
20075 // If fp truncation is necessary give up for now.
20076 if (MemVT.getSizeInBits() != ElementSizeBits)
20077 return false;
20078 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20080 // Not yet handled
20081 return false;
20082 } else {
20083 llvm_unreachable("Invalid constant element type");
20084 }
20085 }
20086
20087 // Create the new Load and Store operations.
20088 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20089 }
20090
20091 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20092 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20093 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20094
20095 // make sure we use trunc store if it's necessary to be legal.
20096 // When generate the new widen store, if the first store's pointer info can
20097 // not be reused, discard the pointer info except the address space because
20098 // now the widen store can not be represented by the original pointer info
20099 // which is for the narrow memory object.
20100 SDValue NewStore;
20101 if (!UseTrunc) {
20102 NewStore = DAG.getStore(
20103 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20104 CanReusePtrInfo
20105 ? FirstInChain->getPointerInfo()
20106 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20107 FirstInChain->getAlign(), *Flags, AAInfo);
20108 } else { // Must be realized as a trunc store
20109 EVT LegalizedStoredValTy =
20110 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20111 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20112 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20113 SDValue ExtendedStoreVal =
20114 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20115 LegalizedStoredValTy);
20116 NewStore = DAG.getTruncStore(
20117 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20118 CanReusePtrInfo
20119 ? FirstInChain->getPointerInfo()
20120 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20121 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20122 AAInfo);
20123 }
20124
20125 // Replace all merged stores with the new store.
20126 for (unsigned i = 0; i < NumStores; ++i)
20127 CombineTo(StoreNodes[i].MemNode, NewStore);
20128
20129 AddToWorklist(NewChain.getNode());
20130 return true;
20131}
20132
20133void DAGCombiner::getStoreMergeCandidates(
20134 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20135 SDNode *&RootNode) {
20136 // This holds the base pointer, index, and the offset in bytes from the base
20137 // pointer. We must have a base and an offset. Do not handle stores to undef
20138 // base pointers.
20140 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20141 return;
20142
20144 StoreSource StoreSrc = getStoreSource(Val);
20145 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20146
20147 // Match on loadbaseptr if relevant.
20148 EVT MemVT = St->getMemoryVT();
20149 BaseIndexOffset LBasePtr;
20150 EVT LoadVT;
20151 if (StoreSrc == StoreSource::Load) {
20152 auto *Ld = cast<LoadSDNode>(Val);
20153 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20154 LoadVT = Ld->getMemoryVT();
20155 // Load and store should be the same type.
20156 if (MemVT != LoadVT)
20157 return;
20158 // Loads must only have one use.
20159 if (!Ld->hasNUsesOfValue(1, 0))
20160 return;
20161 // The memory operands must not be volatile/indexed/atomic.
20162 // TODO: May be able to relax for unordered atomics (see D66309)
20163 if (!Ld->isSimple() || Ld->isIndexed())
20164 return;
20165 }
20166 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20167 int64_t &Offset) -> bool {
20168 // The memory operands must not be volatile/indexed/atomic.
20169 // TODO: May be able to relax for unordered atomics (see D66309)
20170 if (!Other->isSimple() || Other->isIndexed())
20171 return false;
20172 // Don't mix temporal stores with non-temporal stores.
20173 if (St->isNonTemporal() != Other->isNonTemporal())
20174 return false;
20176 return false;
20177 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20178 // Allow merging constants of different types as integers.
20179 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20180 : Other->getMemoryVT() != MemVT;
20181 switch (StoreSrc) {
20182 case StoreSource::Load: {
20183 if (NoTypeMatch)
20184 return false;
20185 // The Load's Base Ptr must also match.
20186 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20187 if (!OtherLd)
20188 return false;
20189 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20190 if (LoadVT != OtherLd->getMemoryVT())
20191 return false;
20192 // Loads must only have one use.
20193 if (!OtherLd->hasNUsesOfValue(1, 0))
20194 return false;
20195 // The memory operands must not be volatile/indexed/atomic.
20196 // TODO: May be able to relax for unordered atomics (see D66309)
20197 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20198 return false;
20199 // Don't mix temporal loads with non-temporal loads.
20200 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20201 return false;
20202 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20203 *OtherLd))
20204 return false;
20205 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20206 return false;
20207 break;
20208 }
20209 case StoreSource::Constant:
20210 if (NoTypeMatch)
20211 return false;
20212 if (getStoreSource(OtherBC) != StoreSource::Constant)
20213 return false;
20214 break;
20215 case StoreSource::Extract:
20216 // Do not merge truncated stores here.
20217 if (Other->isTruncatingStore())
20218 return false;
20219 if (!MemVT.bitsEq(OtherBC.getValueType()))
20220 return false;
20221 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20222 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20223 return false;
20224 break;
20225 default:
20226 llvm_unreachable("Unhandled store source for merging");
20227 }
20229 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20230 };
20231
20232 // Check if the pair of StoreNode and the RootNode already bail out many
20233 // times which is over the limit in dependence check.
20234 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20235 SDNode *RootNode) -> bool {
20236 auto RootCount = StoreRootCountMap.find(StoreNode);
20237 return RootCount != StoreRootCountMap.end() &&
20238 RootCount->second.first == RootNode &&
20239 RootCount->second.second > StoreMergeDependenceLimit;
20240 };
20241
20242 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20243 // This must be a chain use.
20244 if (UseIter.getOperandNo() != 0)
20245 return;
20246 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20248 int64_t PtrDiff;
20249 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20250 !OverLimitInDependenceCheck(OtherStore, RootNode))
20251 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20252 }
20253 };
20254
20255 // We looking for a root node which is an ancestor to all mergable
20256 // stores. We search up through a load, to our root and then down
20257 // through all children. For instance we will find Store{1,2,3} if
20258 // St is Store1, Store2. or Store3 where the root is not a load
20259 // which always true for nonvolatile ops. TODO: Expand
20260 // the search to find all valid candidates through multiple layers of loads.
20261 //
20262 // Root
20263 // |-------|-------|
20264 // Load Load Store3
20265 // | |
20266 // Store1 Store2
20267 //
20268 // FIXME: We should be able to climb and
20269 // descend TokenFactors to find candidates as well.
20270
20271 RootNode = St->getChain().getNode();
20272
20273 unsigned NumNodesExplored = 0;
20274 const unsigned MaxSearchNodes = 1024;
20275 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20276 RootNode = Ldn->getChain().getNode();
20277 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20278 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20279 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20280 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20281 TryToAddCandidate(I2);
20282 }
20283 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20284 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20285 TryToAddCandidate(I);
20286 }
20287 }
20288 } else {
20289 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20290 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20291 TryToAddCandidate(I);
20292 }
20293}
20294
20295// We need to check that merging these stores does not cause a loop in the
20296// DAG. Any store candidate may depend on another candidate indirectly through
20297// its operands. Check in parallel by searching up from operands of candidates.
20298bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20299 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20300 SDNode *RootNode) {
20301 // FIXME: We should be able to truncate a full search of
20302 // predecessors by doing a BFS and keeping tabs the originating
20303 // stores from which worklist nodes come from in a similar way to
20304 // TokenFactor simplfication.
20305
20308
20309 // RootNode is a predecessor to all candidates so we need not search
20310 // past it. Add RootNode (peeking through TokenFactors). Do not count
20311 // these towards size check.
20312
20313 Worklist.push_back(RootNode);
20314 while (!Worklist.empty()) {
20315 auto N = Worklist.pop_back_val();
20316 if (!Visited.insert(N).second)
20317 continue; // Already present in Visited.
20318 if (N->getOpcode() == ISD::TokenFactor) {
20319 for (SDValue Op : N->ops())
20320 Worklist.push_back(Op.getNode());
20321 }
20322 }
20323
20324 // Don't count pruning nodes towards max.
20325 unsigned int Max = 1024 + Visited.size();
20326 // Search Ops of store candidates.
20327 for (unsigned i = 0; i < NumStores; ++i) {
20328 SDNode *N = StoreNodes[i].MemNode;
20329 // Of the 4 Store Operands:
20330 // * Chain (Op 0) -> We have already considered these
20331 // in candidate selection, but only by following the
20332 // chain dependencies. We could still have a chain
20333 // dependency to a load, that has a non-chain dep to
20334 // another load, that depends on a store, etc. So it is
20335 // possible to have dependencies that consist of a mix
20336 // of chain and non-chain deps, and we need to include
20337 // chain operands in the analysis here..
20338 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20339 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20340 // but aren't necessarily fromt the same base node, so
20341 // cycles possible (e.g. via indexed store).
20342 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20343 // non-indexed stores). Not constant on all targets (e.g. ARM)
20344 // and so can participate in a cycle.
20345 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20346 Worklist.push_back(N->getOperand(j).getNode());
20347 }
20348 // Search through DAG. We can stop early if we find a store node.
20349 for (unsigned i = 0; i < NumStores; ++i)
20350 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20351 Max)) {
20352 // If the searching bail out, record the StoreNode and RootNode in the
20353 // StoreRootCountMap. If we have seen the pair many times over a limit,
20354 // we won't add the StoreNode into StoreNodes set again.
20355 if (Visited.size() >= Max) {
20356 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20357 if (RootCount.first == RootNode)
20358 RootCount.second++;
20359 else
20360 RootCount = {RootNode, 1};
20361 }
20362 return false;
20363 }
20364 return true;
20365}
20366
20367unsigned
20368DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20369 int64_t ElementSizeBytes) const {
20370 while (true) {
20371 // Find a store past the width of the first store.
20372 size_t StartIdx = 0;
20373 while ((StartIdx + 1 < StoreNodes.size()) &&
20374 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20375 StoreNodes[StartIdx + 1].OffsetFromBase)
20376 ++StartIdx;
20377
20378 // Bail if we don't have enough candidates to merge.
20379 if (StartIdx + 1 >= StoreNodes.size())
20380 return 0;
20381
20382 // Trim stores that overlapped with the first store.
20383 if (StartIdx)
20384 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20385
20386 // Scan the memory operations on the chain and find the first
20387 // non-consecutive store memory address.
20388 unsigned NumConsecutiveStores = 1;
20389 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20390 // Check that the addresses are consecutive starting from the second
20391 // element in the list of stores.
20392 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20393 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20394 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20395 break;
20396 NumConsecutiveStores = i + 1;
20397 }
20398 if (NumConsecutiveStores > 1)
20399 return NumConsecutiveStores;
20400
20401 // There are no consecutive stores at the start of the list.
20402 // Remove the first store and try again.
20403 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20404 }
20405}
20406
20407bool DAGCombiner::tryStoreMergeOfConstants(
20408 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20409 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20410 LLVMContext &Context = *DAG.getContext();
20411 const DataLayout &DL = DAG.getDataLayout();
20412 int64_t ElementSizeBytes = MemVT.getStoreSize();
20413 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20414 bool MadeChange = false;
20415
20416 // Store the constants into memory as one consecutive store.
20417 while (NumConsecutiveStores >= 2) {
20418 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20419 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20420 Align FirstStoreAlign = FirstInChain->getAlign();
20421 unsigned LastLegalType = 1;
20422 unsigned LastLegalVectorType = 1;
20423 bool LastIntegerTrunc = false;
20424 bool NonZero = false;
20425 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20426 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20427 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20428 SDValue StoredVal = ST->getValue();
20429 bool IsElementZero = false;
20430 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20431 IsElementZero = C->isZero();
20432 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20433 IsElementZero = C->getConstantFPValue()->isNullValue();
20434 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20435 IsElementZero = true;
20436 if (IsElementZero) {
20437 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20438 FirstZeroAfterNonZero = i;
20439 }
20440 NonZero |= !IsElementZero;
20441
20442 // Find a legal type for the constant store.
20443 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20444 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20445 unsigned IsFast = 0;
20446
20447 // Break early when size is too large to be legal.
20448 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20449 break;
20450
20451 if (TLI.isTypeLegal(StoreTy) &&
20452 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20453 DAG.getMachineFunction()) &&
20454 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20455 *FirstInChain->getMemOperand(), &IsFast) &&
20456 IsFast) {
20457 LastIntegerTrunc = false;
20458 LastLegalType = i + 1;
20459 // Or check whether a truncstore is legal.
20460 } else if (TLI.getTypeAction(Context, StoreTy) ==
20462 EVT LegalizedStoredValTy =
20463 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20464 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20465 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20466 DAG.getMachineFunction()) &&
20467 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20468 *FirstInChain->getMemOperand(), &IsFast) &&
20469 IsFast) {
20470 LastIntegerTrunc = true;
20471 LastLegalType = i + 1;
20472 }
20473 }
20474
20475 // We only use vectors if the target allows it and the function is not
20476 // marked with the noimplicitfloat attribute.
20477 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20478 AllowVectors) {
20479 // Find a legal type for the vector store.
20480 unsigned Elts = (i + 1) * NumMemElts;
20481 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20482 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20483 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20484 TLI.allowsMemoryAccess(Context, DL, Ty,
20485 *FirstInChain->getMemOperand(), &IsFast) &&
20486 IsFast)
20487 LastLegalVectorType = i + 1;
20488 }
20489 }
20490
20491 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20492 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20493 bool UseTrunc = LastIntegerTrunc && !UseVector;
20494
20495 // Check if we found a legal integer type that creates a meaningful
20496 // merge.
20497 if (NumElem < 2) {
20498 // We know that candidate stores are in order and of correct
20499 // shape. While there is no mergeable sequence from the
20500 // beginning one may start later in the sequence. The only
20501 // reason a merge of size N could have failed where another of
20502 // the same size would not have, is if the alignment has
20503 // improved or we've dropped a non-zero value. Drop as many
20504 // candidates as we can here.
20505 unsigned NumSkip = 1;
20506 while ((NumSkip < NumConsecutiveStores) &&
20507 (NumSkip < FirstZeroAfterNonZero) &&
20508 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20509 NumSkip++;
20510
20511 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20512 NumConsecutiveStores -= NumSkip;
20513 continue;
20514 }
20515
20516 // Check that we can merge these candidates without causing a cycle.
20517 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20518 RootNode)) {
20519 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20520 NumConsecutiveStores -= NumElem;
20521 continue;
20522 }
20523
20524 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20525 /*IsConstantSrc*/ true,
20526 UseVector, UseTrunc);
20527
20528 // Remove merged stores for next iteration.
20529 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20530 NumConsecutiveStores -= NumElem;
20531 }
20532 return MadeChange;
20533}
20534
20535bool DAGCombiner::tryStoreMergeOfExtracts(
20536 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20537 EVT MemVT, SDNode *RootNode) {
20538 LLVMContext &Context = *DAG.getContext();
20539 const DataLayout &DL = DAG.getDataLayout();
20540 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20541 bool MadeChange = false;
20542
20543 // Loop on Consecutive Stores on success.
20544 while (NumConsecutiveStores >= 2) {
20545 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20546 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20547 Align FirstStoreAlign = FirstInChain->getAlign();
20548 unsigned NumStoresToMerge = 1;
20549 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20550 // Find a legal type for the vector store.
20551 unsigned Elts = (i + 1) * NumMemElts;
20552 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20553 unsigned IsFast = 0;
20554
20555 // Break early when size is too large to be legal.
20556 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20557 break;
20558
20559 if (TLI.isTypeLegal(Ty) &&
20560 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20561 TLI.allowsMemoryAccess(Context, DL, Ty,
20562 *FirstInChain->getMemOperand(), &IsFast) &&
20563 IsFast)
20564 NumStoresToMerge = i + 1;
20565 }
20566
20567 // Check if we found a legal integer type creating a meaningful
20568 // merge.
20569 if (NumStoresToMerge < 2) {
20570 // We know that candidate stores are in order and of correct
20571 // shape. While there is no mergeable sequence from the
20572 // beginning one may start later in the sequence. The only
20573 // reason a merge of size N could have failed where another of
20574 // the same size would not have, is if the alignment has
20575 // improved. Drop as many candidates as we can here.
20576 unsigned NumSkip = 1;
20577 while ((NumSkip < NumConsecutiveStores) &&
20578 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20579 NumSkip++;
20580
20581 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20582 NumConsecutiveStores -= NumSkip;
20583 continue;
20584 }
20585
20586 // Check that we can merge these candidates without causing a cycle.
20587 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20588 RootNode)) {
20589 StoreNodes.erase(StoreNodes.begin(),
20590 StoreNodes.begin() + NumStoresToMerge);
20591 NumConsecutiveStores -= NumStoresToMerge;
20592 continue;
20593 }
20594
20595 MadeChange |= mergeStoresOfConstantsOrVecElts(
20596 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20597 /*UseVector*/ true, /*UseTrunc*/ false);
20598
20599 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20600 NumConsecutiveStores -= NumStoresToMerge;
20601 }
20602 return MadeChange;
20603}
20604
20605bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20606 unsigned NumConsecutiveStores, EVT MemVT,
20607 SDNode *RootNode, bool AllowVectors,
20608 bool IsNonTemporalStore,
20609 bool IsNonTemporalLoad) {
20610 LLVMContext &Context = *DAG.getContext();
20611 const DataLayout &DL = DAG.getDataLayout();
20612 int64_t ElementSizeBytes = MemVT.getStoreSize();
20613 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20614 bool MadeChange = false;
20615
20616 // Look for load nodes which are used by the stored values.
20617 SmallVector<MemOpLink, 8> LoadNodes;
20618
20619 // Find acceptable loads. Loads need to have the same chain (token factor),
20620 // must not be zext, volatile, indexed, and they must be consecutive.
20621 BaseIndexOffset LdBasePtr;
20622
20623 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20624 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20626 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20627
20628 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20629 // If this is not the first ptr that we check.
20630 int64_t LdOffset = 0;
20631 if (LdBasePtr.getBase().getNode()) {
20632 // The base ptr must be the same.
20633 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20634 break;
20635 } else {
20636 // Check that all other base pointers are the same as this one.
20637 LdBasePtr = LdPtr;
20638 }
20639
20640 // We found a potential memory operand to merge.
20641 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20642 }
20643
20644 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20645 Align RequiredAlignment;
20646 bool NeedRotate = false;
20647 if (LoadNodes.size() == 2) {
20648 // If we have load/store pair instructions and we only have two values,
20649 // don't bother merging.
20650 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20651 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20652 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20653 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20654 break;
20655 }
20656 // If the loads are reversed, see if we can rotate the halves into place.
20657 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20658 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20659 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20660 if (Offset0 - Offset1 == ElementSizeBytes &&
20661 (hasOperation(ISD::ROTL, PairVT) ||
20662 hasOperation(ISD::ROTR, PairVT))) {
20663 std::swap(LoadNodes[0], LoadNodes[1]);
20664 NeedRotate = true;
20665 }
20666 }
20667 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20668 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20669 Align FirstStoreAlign = FirstInChain->getAlign();
20670 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20671
20672 // Scan the memory operations on the chain and find the first
20673 // non-consecutive load memory address. These variables hold the index in
20674 // the store node array.
20675
20676 unsigned LastConsecutiveLoad = 1;
20677
20678 // This variable refers to the size and not index in the array.
20679 unsigned LastLegalVectorType = 1;
20680 unsigned LastLegalIntegerType = 1;
20681 bool isDereferenceable = true;
20682 bool DoIntegerTruncate = false;
20683 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20684 SDValue LoadChain = FirstLoad->getChain();
20685 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20686 // All loads must share the same chain.
20687 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20688 break;
20689
20690 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20691 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20692 break;
20693 LastConsecutiveLoad = i;
20694
20695 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20696 isDereferenceable = false;
20697
20698 // Find a legal type for the vector store.
20699 unsigned Elts = (i + 1) * NumMemElts;
20700 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20701
20702 // Break early when size is too large to be legal.
20703 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20704 break;
20705
20706 unsigned IsFastSt = 0;
20707 unsigned IsFastLd = 0;
20708 // Don't try vector types if we need a rotate. We may still fail the
20709 // legality checks for the integer type, but we can't handle the rotate
20710 // case with vectors.
20711 // FIXME: We could use a shuffle in place of the rotate.
20712 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20713 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20714 DAG.getMachineFunction()) &&
20715 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20716 *FirstInChain->getMemOperand(), &IsFastSt) &&
20717 IsFastSt &&
20718 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20719 *FirstLoad->getMemOperand(), &IsFastLd) &&
20720 IsFastLd) {
20721 LastLegalVectorType = i + 1;
20722 }
20723
20724 // Find a legal type for the integer store.
20725 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20726 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20727 if (TLI.isTypeLegal(StoreTy) &&
20728 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20729 DAG.getMachineFunction()) &&
20730 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20731 *FirstInChain->getMemOperand(), &IsFastSt) &&
20732 IsFastSt &&
20733 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20734 *FirstLoad->getMemOperand(), &IsFastLd) &&
20735 IsFastLd) {
20736 LastLegalIntegerType = i + 1;
20737 DoIntegerTruncate = false;
20738 // Or check whether a truncstore and extload is legal.
20739 } else if (TLI.getTypeAction(Context, StoreTy) ==
20741 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20742 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20743 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20744 DAG.getMachineFunction()) &&
20745 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20746 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20747 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20748 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20749 *FirstInChain->getMemOperand(), &IsFastSt) &&
20750 IsFastSt &&
20751 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20752 *FirstLoad->getMemOperand(), &IsFastLd) &&
20753 IsFastLd) {
20754 LastLegalIntegerType = i + 1;
20755 DoIntegerTruncate = true;
20756 }
20757 }
20758 }
20759
20760 // Only use vector types if the vector type is larger than the integer
20761 // type. If they are the same, use integers.
20762 bool UseVectorTy =
20763 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20764 unsigned LastLegalType =
20765 std::max(LastLegalVectorType, LastLegalIntegerType);
20766
20767 // We add +1 here because the LastXXX variables refer to location while
20768 // the NumElem refers to array/index size.
20769 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20770 NumElem = std::min(LastLegalType, NumElem);
20771 Align FirstLoadAlign = FirstLoad->getAlign();
20772
20773 if (NumElem < 2) {
20774 // We know that candidate stores are in order and of correct
20775 // shape. While there is no mergeable sequence from the
20776 // beginning one may start later in the sequence. The only
20777 // reason a merge of size N could have failed where another of
20778 // the same size would not have is if the alignment or either
20779 // the load or store has improved. Drop as many candidates as we
20780 // can here.
20781 unsigned NumSkip = 1;
20782 while ((NumSkip < LoadNodes.size()) &&
20783 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20784 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20785 NumSkip++;
20786 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20787 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20788 NumConsecutiveStores -= NumSkip;
20789 continue;
20790 }
20791
20792 // Check that we can merge these candidates without causing a cycle.
20793 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20794 RootNode)) {
20795 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20796 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20797 NumConsecutiveStores -= NumElem;
20798 continue;
20799 }
20800
20801 // Find if it is better to use vectors or integers to load and store
20802 // to memory.
20803 EVT JointMemOpVT;
20804 if (UseVectorTy) {
20805 // Find a legal type for the vector store.
20806 unsigned Elts = NumElem * NumMemElts;
20807 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20808 } else {
20809 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20810 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20811 }
20812
20813 SDLoc LoadDL(LoadNodes[0].MemNode);
20814 SDLoc StoreDL(StoreNodes[0].MemNode);
20815
20816 // The merged loads are required to have the same incoming chain, so
20817 // using the first's chain is acceptable.
20818
20819 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20820 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20821 AddToWorklist(NewStoreChain.getNode());
20822
20823 MachineMemOperand::Flags LdMMOFlags =
20824 isDereferenceable ? MachineMemOperand::MODereferenceable
20826 if (IsNonTemporalLoad)
20828
20829 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20830
20831 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20834
20835 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20836
20837 SDValue NewLoad, NewStore;
20838 if (UseVectorTy || !DoIntegerTruncate) {
20839 NewLoad = DAG.getLoad(
20840 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20841 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20842 SDValue StoreOp = NewLoad;
20843 if (NeedRotate) {
20844 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20845 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20846 "Unexpected type for rotate-able load pair");
20847 SDValue RotAmt =
20848 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20849 // Target can convert to the identical ROTR if it does not have ROTL.
20850 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20851 }
20852 NewStore = DAG.getStore(
20853 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20854 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20855 : MachinePointerInfo(FirstStoreAS),
20856 FirstStoreAlign, StMMOFlags);
20857 } else { // This must be the truncstore/extload case
20858 EVT ExtendedTy =
20859 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20860 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20861 FirstLoad->getChain(), FirstLoad->getBasePtr(),
20862 FirstLoad->getPointerInfo(), JointMemOpVT,
20863 FirstLoadAlign, LdMMOFlags);
20864 NewStore = DAG.getTruncStore(
20865 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20866 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20867 : MachinePointerInfo(FirstStoreAS),
20868 JointMemOpVT, FirstInChain->getAlign(),
20869 FirstInChain->getMemOperand()->getFlags());
20870 }
20871
20872 // Transfer chain users from old loads to the new load.
20873 for (unsigned i = 0; i < NumElem; ++i) {
20874 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
20876 SDValue(NewLoad.getNode(), 1));
20877 }
20878
20879 // Replace all stores with the new store. Recursively remove corresponding
20880 // values if they are no longer used.
20881 for (unsigned i = 0; i < NumElem; ++i) {
20882 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
20883 CombineTo(StoreNodes[i].MemNode, NewStore);
20884 if (Val->use_empty())
20885 recursivelyDeleteUnusedNodes(Val.getNode());
20886 }
20887
20888 MadeChange = true;
20889 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20890 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20891 NumConsecutiveStores -= NumElem;
20892 }
20893 return MadeChange;
20894}
20895
20896bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
20897 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
20898 return false;
20899
20900 // TODO: Extend this function to merge stores of scalable vectors.
20901 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
20902 // store since we know <vscale x 16 x i8> is exactly twice as large as
20903 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
20904 EVT MemVT = St->getMemoryVT();
20905 if (MemVT.isScalableVT())
20906 return false;
20907 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
20908 return false;
20909
20910 // This function cannot currently deal with non-byte-sized memory sizes.
20911 int64_t ElementSizeBytes = MemVT.getStoreSize();
20912 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
20913 return false;
20914
20915 // Do not bother looking at stored values that are not constants, loads, or
20916 // extracted vector elements.
20917 SDValue StoredVal = peekThroughBitcasts(St->getValue());
20918 const StoreSource StoreSrc = getStoreSource(StoredVal);
20919 if (StoreSrc == StoreSource::Unknown)
20920 return false;
20921
20922 SmallVector<MemOpLink, 8> StoreNodes;
20923 SDNode *RootNode;
20924 // Find potential store merge candidates by searching through chain sub-DAG
20925 getStoreMergeCandidates(St, StoreNodes, RootNode);
20926
20927 // Check if there is anything to merge.
20928 if (StoreNodes.size() < 2)
20929 return false;
20930
20931 // Sort the memory operands according to their distance from the
20932 // base pointer.
20933 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
20934 return LHS.OffsetFromBase < RHS.OffsetFromBase;
20935 });
20936
20937 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
20938 Attribute::NoImplicitFloat);
20939 bool IsNonTemporalStore = St->isNonTemporal();
20940 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
20941 cast<LoadSDNode>(StoredVal)->isNonTemporal();
20942
20943 // Store Merge attempts to merge the lowest stores. This generally
20944 // works out as if successful, as the remaining stores are checked
20945 // after the first collection of stores is merged. However, in the
20946 // case that a non-mergeable store is found first, e.g., {p[-2],
20947 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
20948 // mergeable cases. To prevent this, we prune such stores from the
20949 // front of StoreNodes here.
20950 bool MadeChange = false;
20951 while (StoreNodes.size() > 1) {
20952 unsigned NumConsecutiveStores =
20953 getConsecutiveStores(StoreNodes, ElementSizeBytes);
20954 // There are no more stores in the list to examine.
20955 if (NumConsecutiveStores == 0)
20956 return MadeChange;
20957
20958 // We have at least 2 consecutive stores. Try to merge them.
20959 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
20960 switch (StoreSrc) {
20961 case StoreSource::Constant:
20962 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
20963 MemVT, RootNode, AllowVectors);
20964 break;
20965
20966 case StoreSource::Extract:
20967 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
20968 MemVT, RootNode);
20969 break;
20970
20971 case StoreSource::Load:
20972 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
20973 MemVT, RootNode, AllowVectors,
20974 IsNonTemporalStore, IsNonTemporalLoad);
20975 break;
20976
20977 default:
20978 llvm_unreachable("Unhandled store source type");
20979 }
20980 }
20981 return MadeChange;
20982}
20983
20984SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
20985 SDLoc SL(ST);
20986 SDValue ReplStore;
20987
20988 // Replace the chain to avoid dependency.
20989 if (ST->isTruncatingStore()) {
20990 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
20991 ST->getBasePtr(), ST->getMemoryVT(),
20992 ST->getMemOperand());
20993 } else {
20994 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
20995 ST->getMemOperand());
20996 }
20997
20998 // Create token to keep both nodes around.
20999 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21000 MVT::Other, ST->getChain(), ReplStore);
21001
21002 // Make sure the new and old chains are cleaned up.
21003 AddToWorklist(Token.getNode());
21004
21005 // Don't add users to work list.
21006 return CombineTo(ST, Token, false);
21007}
21008
21009SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21010 SDValue Value = ST->getValue();
21011 if (Value.getOpcode() == ISD::TargetConstantFP)
21012 return SDValue();
21013
21014 if (!ISD::isNormalStore(ST))
21015 return SDValue();
21016
21017 SDLoc DL(ST);
21018
21019 SDValue Chain = ST->getChain();
21020 SDValue Ptr = ST->getBasePtr();
21021
21022 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21023
21024 // NOTE: If the original store is volatile, this transform must not increase
21025 // the number of stores. For example, on x86-32 an f64 can be stored in one
21026 // processor operation but an i64 (which is not legal) requires two. So the
21027 // transform should not be done in this case.
21028
21029 SDValue Tmp;
21030 switch (CFP->getSimpleValueType(0).SimpleTy) {
21031 default:
21032 llvm_unreachable("Unknown FP type");
21033 case MVT::f16: // We don't do this for these yet.
21034 case MVT::bf16:
21035 case MVT::f80:
21036 case MVT::f128:
21037 case MVT::ppcf128:
21038 return SDValue();
21039 case MVT::f32:
21040 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21041 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21042 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21043 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21044 MVT::i32);
21045 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21046 }
21047
21048 return SDValue();
21049 case MVT::f64:
21050 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21051 ST->isSimple()) ||
21052 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21053 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21054 getZExtValue(), SDLoc(CFP), MVT::i64);
21055 return DAG.getStore(Chain, DL, Tmp,
21056 Ptr, ST->getMemOperand());
21057 }
21058
21059 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21060 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21061 // Many FP stores are not made apparent until after legalize, e.g. for
21062 // argument passing. Since this is so common, custom legalize the
21063 // 64-bit integer store into two 32-bit stores.
21065 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21066 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21067 if (DAG.getDataLayout().isBigEndian())
21068 std::swap(Lo, Hi);
21069
21070 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21071 AAMDNodes AAInfo = ST->getAAInfo();
21072
21073 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21074 ST->getOriginalAlign(), MMOFlags, AAInfo);
21076 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21077 ST->getPointerInfo().getWithOffset(4),
21078 ST->getOriginalAlign(), MMOFlags, AAInfo);
21079 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21080 St0, St1);
21081 }
21082
21083 return SDValue();
21084 }
21085}
21086
21087// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21088//
21089// If a store of a load with an element inserted into it has no other
21090// uses in between the chain, then we can consider the vector store
21091// dead and replace it with just the single scalar element store.
21092SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21093 SDLoc DL(ST);
21094 SDValue Value = ST->getValue();
21095 SDValue Ptr = ST->getBasePtr();
21096 SDValue Chain = ST->getChain();
21097 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21098 return SDValue();
21099
21100 SDValue Elt = Value.getOperand(1);
21101 SDValue Idx = Value.getOperand(2);
21102
21103 // If the element isn't byte sized or is implicitly truncated then we can't
21104 // compute an offset.
21105 EVT EltVT = Elt.getValueType();
21106 if (!EltVT.isByteSized() ||
21107 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21108 return SDValue();
21109
21110 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21111 if (!Ld || Ld->getBasePtr() != Ptr ||
21112 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21113 !ISD::isNormalStore(ST) ||
21114 Ld->getAddressSpace() != ST->getAddressSpace() ||
21116 return SDValue();
21117
21118 unsigned IsFast;
21119 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21120 Elt.getValueType(), ST->getAddressSpace(),
21121 ST->getAlign(), ST->getMemOperand()->getFlags(),
21122 &IsFast) ||
21123 !IsFast)
21124 return SDValue();
21125
21126 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21127
21128 // If the offset is a known constant then try to recover the pointer
21129 // info
21130 SDValue NewPtr;
21131 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21132 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21133 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21134 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21135 } else {
21136 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21137 }
21138
21139 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21140 ST->getMemOperand()->getFlags());
21141}
21142
21143SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21144 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21145 SDValue Val = ST->getVal();
21146 EVT VT = Val.getValueType();
21147 EVT MemVT = ST->getMemoryVT();
21148
21149 if (MemVT.bitsLT(VT)) { // Is truncating store
21150 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21151 MemVT.getScalarSizeInBits());
21152 // See if we can simplify the operation with SimplifyDemandedBits, which
21153 // only works if the value has a single use.
21154 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21155 return SDValue(N, 0);
21156 }
21157
21158 return SDValue();
21159}
21160
21161SDValue DAGCombiner::visitSTORE(SDNode *N) {
21162 StoreSDNode *ST = cast<StoreSDNode>(N);
21163 SDValue Chain = ST->getChain();
21164 SDValue Value = ST->getValue();
21165 SDValue Ptr = ST->getBasePtr();
21166
21167 // If this is a store of a bit convert, store the input value if the
21168 // resultant store does not need a higher alignment than the original.
21169 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21170 ST->isUnindexed()) {
21171 EVT SVT = Value.getOperand(0).getValueType();
21172 // If the store is volatile, we only want to change the store type if the
21173 // resulting store is legal. Otherwise we might increase the number of
21174 // memory accesses. We don't care if the original type was legal or not
21175 // as we assume software couldn't rely on the number of accesses of an
21176 // illegal type.
21177 // TODO: May be able to relax for unordered atomics (see D66309)
21178 if (((!LegalOperations && ST->isSimple()) ||
21179 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21180 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21181 DAG, *ST->getMemOperand())) {
21182 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21183 ST->getMemOperand());
21184 }
21185 }
21186
21187 // Turn 'store undef, Ptr' -> nothing.
21188 if (Value.isUndef() && ST->isUnindexed())
21189 return Chain;
21190
21191 // Try to infer better alignment information than the store already has.
21192 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21193 !ST->isAtomic()) {
21194 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21195 if (*Alignment > ST->getAlign() &&
21196 isAligned(*Alignment, ST->getSrcValueOffset())) {
21197 SDValue NewStore =
21198 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21199 ST->getMemoryVT(), *Alignment,
21200 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21201 // NewStore will always be N as we are only refining the alignment
21202 assert(NewStore.getNode() == N);
21203 (void)NewStore;
21204 }
21205 }
21206 }
21207
21208 // Try transforming a pair floating point load / store ops to integer
21209 // load / store ops.
21210 if (SDValue NewST = TransformFPLoadStorePair(N))
21211 return NewST;
21212
21213 // Try transforming several stores into STORE (BSWAP).
21214 if (SDValue Store = mergeTruncStores(ST))
21215 return Store;
21216
21217 if (ST->isUnindexed()) {
21218 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21219 // adjacent stores.
21220 if (findBetterNeighborChains(ST)) {
21221 // replaceStoreChain uses CombineTo, which handled all of the worklist
21222 // manipulation. Return the original node to not do anything else.
21223 return SDValue(ST, 0);
21224 }
21225 Chain = ST->getChain();
21226 }
21227
21228 // FIXME: is there such a thing as a truncating indexed store?
21229 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21230 Value.getValueType().isInteger() &&
21231 (!isa<ConstantSDNode>(Value) ||
21232 !cast<ConstantSDNode>(Value)->isOpaque())) {
21233 // Convert a truncating store of a extension into a standard store.
21234 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21235 Value.getOpcode() == ISD::SIGN_EXTEND ||
21236 Value.getOpcode() == ISD::ANY_EXTEND) &&
21237 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21238 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21239 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21240 ST->getMemOperand());
21241
21242 APInt TruncDemandedBits =
21243 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21244 ST->getMemoryVT().getScalarSizeInBits());
21245
21246 // See if we can simplify the operation with SimplifyDemandedBits, which
21247 // only works if the value has a single use.
21248 AddToWorklist(Value.getNode());
21249 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21250 // Re-visit the store if anything changed and the store hasn't been merged
21251 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21252 // node back to the worklist if necessary, but we also need to re-visit
21253 // the Store node itself.
21254 if (N->getOpcode() != ISD::DELETED_NODE)
21255 AddToWorklist(N);
21256 return SDValue(N, 0);
21257 }
21258
21259 // Otherwise, see if we can simplify the input to this truncstore with
21260 // knowledge that only the low bits are being used. For example:
21261 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21262 if (SDValue Shorter =
21263 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21264 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21265 ST->getMemOperand());
21266
21267 // If we're storing a truncated constant, see if we can simplify it.
21268 // TODO: Move this to targetShrinkDemandedConstant?
21269 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21270 if (!Cst->isOpaque()) {
21271 const APInt &CValue = Cst->getAPIntValue();
21272 APInt NewVal = CValue & TruncDemandedBits;
21273 if (NewVal != CValue) {
21274 SDValue Shorter =
21275 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21276 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21277 ST->getMemoryVT(), ST->getMemOperand());
21278 }
21279 }
21280 }
21281
21282 // If this is a load followed by a store to the same location, then the store
21283 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21284 // TODO: Add big-endian truncate support with test coverage.
21285 // TODO: Can relax for unordered atomics (see D66309)
21286 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21288 : Value;
21289 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21290 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21291 ST->isUnindexed() && ST->isSimple() &&
21292 Ld->getAddressSpace() == ST->getAddressSpace() &&
21293 // There can't be any side effects between the load and store, such as
21294 // a call or store.
21296 // The store is dead, remove it.
21297 return Chain;
21298 }
21299 }
21300
21301 // Try scalarizing vector stores of loads where we only change one element
21302 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21303 return NewST;
21304
21305 // TODO: Can relax for unordered atomics (see D66309)
21306 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21307 if (ST->isUnindexed() && ST->isSimple() &&
21308 ST1->isUnindexed() && ST1->isSimple()) {
21309 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21310 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21311 ST->getAddressSpace() == ST1->getAddressSpace()) {
21312 // If this is a store followed by a store with the same value to the
21313 // same location, then the store is dead/noop.
21314 return Chain;
21315 }
21316
21317 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21318 !ST1->getBasePtr().isUndef() &&
21319 ST->getAddressSpace() == ST1->getAddressSpace()) {
21320 // If we consider two stores and one smaller in size is a scalable
21321 // vector type and another one a bigger size store with a fixed type,
21322 // then we could not allow the scalable store removal because we don't
21323 // know its final size in the end.
21324 if (ST->getMemoryVT().isScalableVector() ||
21325 ST1->getMemoryVT().isScalableVector()) {
21326 if (ST1->getBasePtr() == Ptr &&
21327 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21328 ST->getMemoryVT().getStoreSize())) {
21329 CombineTo(ST1, ST1->getChain());
21330 return SDValue(N, 0);
21331 }
21332 } else {
21333 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21334 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21335 // If this is a store who's preceding store to a subset of the current
21336 // location and no one other node is chained to that store we can
21337 // effectively drop the store. Do not remove stores to undef as they
21338 // may be used as data sinks.
21339 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21340 ChainBase,
21341 ST1->getMemoryVT().getFixedSizeInBits())) {
21342 CombineTo(ST1, ST1->getChain());
21343 return SDValue(N, 0);
21344 }
21345 }
21346 }
21347 }
21348 }
21349
21350 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21351 // truncating store. We can do this even if this is already a truncstore.
21352 if ((Value.getOpcode() == ISD::FP_ROUND ||
21353 Value.getOpcode() == ISD::TRUNCATE) &&
21354 Value->hasOneUse() && ST->isUnindexed() &&
21355 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21356 ST->getMemoryVT(), LegalOperations)) {
21357 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21358 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21359 }
21360
21361 // Always perform this optimization before types are legal. If the target
21362 // prefers, also try this after legalization to catch stores that were created
21363 // by intrinsics or other nodes.
21364 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21365 while (true) {
21366 // There can be multiple store sequences on the same chain.
21367 // Keep trying to merge store sequences until we are unable to do so
21368 // or until we merge the last store on the chain.
21369 bool Changed = mergeConsecutiveStores(ST);
21370 if (!Changed) break;
21371 // Return N as merge only uses CombineTo and no worklist clean
21372 // up is necessary.
21373 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21374 return SDValue(N, 0);
21375 }
21376 }
21377
21378 // Try transforming N to an indexed store.
21379 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21380 return SDValue(N, 0);
21381
21382 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21383 //
21384 // Make sure to do this only after attempting to merge stores in order to
21385 // avoid changing the types of some subset of stores due to visit order,
21386 // preventing their merging.
21387 if (isa<ConstantFPSDNode>(ST->getValue())) {
21388 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21389 return NewSt;
21390 }
21391
21392 if (SDValue NewSt = splitMergedValStore(ST))
21393 return NewSt;
21394
21395 return ReduceLoadOpStoreWidth(N);
21396}
21397
21398SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21399 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21400 if (!LifetimeEnd->hasOffset())
21401 return SDValue();
21402
21403 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21404 LifetimeEnd->getOffset(), false);
21405
21406 // We walk up the chains to find stores.
21407 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21408 while (!Chains.empty()) {
21409 SDValue Chain = Chains.pop_back_val();
21410 if (!Chain.hasOneUse())
21411 continue;
21412 switch (Chain.getOpcode()) {
21413 case ISD::TokenFactor:
21414 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21415 Chains.push_back(Chain.getOperand(--Nops));
21416 break;
21418 case ISD::LIFETIME_END:
21419 // We can forward past any lifetime start/end that can be proven not to
21420 // alias the node.
21421 if (!mayAlias(Chain.getNode(), N))
21422 Chains.push_back(Chain.getOperand(0));
21423 break;
21424 case ISD::STORE: {
21425 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21426 // TODO: Can relax for unordered atomics (see D66309)
21427 if (!ST->isSimple() || ST->isIndexed())
21428 continue;
21429 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21430 // The bounds of a scalable store are not known until runtime, so this
21431 // store cannot be elided.
21432 if (StoreSize.isScalable())
21433 continue;
21434 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21435 // If we store purely within object bounds just before its lifetime ends,
21436 // we can remove the store.
21437 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21438 StoreSize.getFixedValue() * 8)) {
21439 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21440 dbgs() << "\nwithin LIFETIME_END of : ";
21441 LifetimeEndBase.dump(); dbgs() << "\n");
21442 CombineTo(ST, ST->getChain());
21443 return SDValue(N, 0);
21444 }
21445 }
21446 }
21447 }
21448 return SDValue();
21449}
21450
21451/// For the instruction sequence of store below, F and I values
21452/// are bundled together as an i64 value before being stored into memory.
21453/// Sometimes it is more efficent to generate separate stores for F and I,
21454/// which can remove the bitwise instructions or sink them to colder places.
21455///
21456/// (store (or (zext (bitcast F to i32) to i64),
21457/// (shl (zext I to i64), 32)), addr) -->
21458/// (store F, addr) and (store I, addr+4)
21459///
21460/// Similarly, splitting for other merged store can also be beneficial, like:
21461/// For pair of {i32, i32}, i64 store --> two i32 stores.
21462/// For pair of {i32, i16}, i64 store --> two i32 stores.
21463/// For pair of {i16, i16}, i32 store --> two i16 stores.
21464/// For pair of {i16, i8}, i32 store --> two i16 stores.
21465/// For pair of {i8, i8}, i16 store --> two i8 stores.
21466///
21467/// We allow each target to determine specifically which kind of splitting is
21468/// supported.
21469///
21470/// The store patterns are commonly seen from the simple code snippet below
21471/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21472/// void goo(const std::pair<int, float> &);
21473/// hoo() {
21474/// ...
21475/// goo(std::make_pair(tmp, ftmp));
21476/// ...
21477/// }
21478///
21479SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21480 if (OptLevel == CodeGenOptLevel::None)
21481 return SDValue();
21482
21483 // Can't change the number of memory accesses for a volatile store or break
21484 // atomicity for an atomic one.
21485 if (!ST->isSimple())
21486 return SDValue();
21487
21488 SDValue Val = ST->getValue();
21489 SDLoc DL(ST);
21490
21491 // Match OR operand.
21492 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21493 return SDValue();
21494
21495 // Match SHL operand and get Lower and Higher parts of Val.
21496 SDValue Op1 = Val.getOperand(0);
21497 SDValue Op2 = Val.getOperand(1);
21498 SDValue Lo, Hi;
21499 if (Op1.getOpcode() != ISD::SHL) {
21500 std::swap(Op1, Op2);
21501 if (Op1.getOpcode() != ISD::SHL)
21502 return SDValue();
21503 }
21504 Lo = Op2;
21505 Hi = Op1.getOperand(0);
21506 if (!Op1.hasOneUse())
21507 return SDValue();
21508
21509 // Match shift amount to HalfValBitSize.
21510 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21511 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21512 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21513 return SDValue();
21514
21515 // Lo and Hi are zero-extended from int with size less equal than 32
21516 // to i64.
21517 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21518 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21519 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21520 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21521 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21522 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21523 return SDValue();
21524
21525 // Use the EVT of low and high parts before bitcast as the input
21526 // of target query.
21527 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21528 ? Lo.getOperand(0).getValueType()
21529 : Lo.getValueType();
21530 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21531 ? Hi.getOperand(0).getValueType()
21532 : Hi.getValueType();
21533 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21534 return SDValue();
21535
21536 // Start to split store.
21537 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21538 AAMDNodes AAInfo = ST->getAAInfo();
21539
21540 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21541 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21542 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21543 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21544
21545 SDValue Chain = ST->getChain();
21546 SDValue Ptr = ST->getBasePtr();
21547 // Lower value store.
21548 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21549 ST->getOriginalAlign(), MMOFlags, AAInfo);
21550 Ptr =
21551 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21552 // Higher value store.
21553 SDValue St1 = DAG.getStore(
21554 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21555 ST->getOriginalAlign(), MMOFlags, AAInfo);
21556 return St1;
21557}
21558
21559// Merge an insertion into an existing shuffle:
21560// (insert_vector_elt (vector_shuffle X, Y, Mask),
21561// .(extract_vector_elt X, N), InsIndex)
21562// --> (vector_shuffle X, Y, NewMask)
21563// and variations where shuffle operands may be CONCAT_VECTORS.
21565 SmallVectorImpl<int> &NewMask, SDValue Elt,
21566 unsigned InsIndex) {
21567 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21568 !isa<ConstantSDNode>(Elt.getOperand(1)))
21569 return false;
21570
21571 // Vec's operand 0 is using indices from 0 to N-1 and
21572 // operand 1 from N to 2N - 1, where N is the number of
21573 // elements in the vectors.
21574 SDValue InsertVal0 = Elt.getOperand(0);
21575 int ElementOffset = -1;
21576
21577 // We explore the inputs of the shuffle in order to see if we find the
21578 // source of the extract_vector_elt. If so, we can use it to modify the
21579 // shuffle rather than perform an insert_vector_elt.
21581 ArgWorkList.emplace_back(Mask.size(), Y);
21582 ArgWorkList.emplace_back(0, X);
21583
21584 while (!ArgWorkList.empty()) {
21585 int ArgOffset;
21586 SDValue ArgVal;
21587 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21588
21589 if (ArgVal == InsertVal0) {
21590 ElementOffset = ArgOffset;
21591 break;
21592 }
21593
21594 // Peek through concat_vector.
21595 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21596 int CurrentArgOffset =
21597 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21598 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21599 for (SDValue Op : reverse(ArgVal->ops())) {
21600 CurrentArgOffset -= Step;
21601 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21602 }
21603
21604 // Make sure we went through all the elements and did not screw up index
21605 // computation.
21606 assert(CurrentArgOffset == ArgOffset);
21607 }
21608 }
21609
21610 // If we failed to find a match, see if we can replace an UNDEF shuffle
21611 // operand.
21612 if (ElementOffset == -1) {
21613 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21614 return false;
21615 ElementOffset = Mask.size();
21616 Y = InsertVal0;
21617 }
21618
21619 NewMask.assign(Mask.begin(), Mask.end());
21620 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21621 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21622 "NewMask[InsIndex] is out of bound");
21623 return true;
21624}
21625
21626// Merge an insertion into an existing shuffle:
21627// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21628// InsIndex)
21629// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21630// CONCAT_VECTORS.
21631SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21632 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21633 "Expected extract_vector_elt");
21634 SDValue InsertVal = N->getOperand(1);
21635 SDValue Vec = N->getOperand(0);
21636
21637 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21638 if (!SVN || !Vec.hasOneUse())
21639 return SDValue();
21640
21641 ArrayRef<int> Mask = SVN->getMask();
21642 SDValue X = Vec.getOperand(0);
21643 SDValue Y = Vec.getOperand(1);
21644
21645 SmallVector<int, 16> NewMask(Mask);
21646 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21647 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21648 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21649 if (LegalShuffle)
21650 return LegalShuffle;
21651 }
21652
21653 return SDValue();
21654}
21655
21656// Convert a disguised subvector insertion into a shuffle:
21657// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21658// bitcast(shuffle (bitcast V), (extended X), Mask)
21659// Note: We do not use an insert_subvector node because that requires a
21660// legal subvector type.
21661SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21662 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21663 "Expected extract_vector_elt");
21664 SDValue InsertVal = N->getOperand(1);
21665
21666 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21667 !InsertVal.getOperand(0).getValueType().isVector())
21668 return SDValue();
21669
21670 SDValue SubVec = InsertVal.getOperand(0);
21671 SDValue DestVec = N->getOperand(0);
21672 EVT SubVecVT = SubVec.getValueType();
21673 EVT VT = DestVec.getValueType();
21674 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21675 // If the source only has a single vector element, the cost of creating adding
21676 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21677 if (NumSrcElts == 1)
21678 return SDValue();
21679 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21680 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21681
21682 // Step 1: Create a shuffle mask that implements this insert operation. The
21683 // vector that we are inserting into will be operand 0 of the shuffle, so
21684 // those elements are just 'i'. The inserted subvector is in the first
21685 // positions of operand 1 of the shuffle. Example:
21686 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21687 SmallVector<int, 16> Mask(NumMaskVals);
21688 for (unsigned i = 0; i != NumMaskVals; ++i) {
21689 if (i / NumSrcElts == InsIndex)
21690 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21691 else
21692 Mask[i] = i;
21693 }
21694
21695 // Bail out if the target can not handle the shuffle we want to create.
21696 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21697 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21698 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21699 return SDValue();
21700
21701 // Step 2: Create a wide vector from the inserted source vector by appending
21702 // undefined elements. This is the same size as our destination vector.
21703 SDLoc DL(N);
21704 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21705 ConcatOps[0] = SubVec;
21706 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21707
21708 // Step 3: Shuffle in the padded subvector.
21709 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21710 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21711 AddToWorklist(PaddedSubV.getNode());
21712 AddToWorklist(DestVecBC.getNode());
21713 AddToWorklist(Shuf.getNode());
21714 return DAG.getBitcast(VT, Shuf);
21715}
21716
21717// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21718// possible and the new load will be quick. We use more loads but less shuffles
21719// and inserts.
21720SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21721 EVT VT = N->getValueType(0);
21722
21723 // InsIndex is expected to be the first of last lane.
21724 if (!VT.isFixedLengthVector() ||
21725 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21726 return SDValue();
21727
21728 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21729 // depending on the InsIndex.
21730 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21731 SDValue Scalar = N->getOperand(1);
21732 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21733 return InsIndex == P.index() || P.value() < 0 ||
21734 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21735 (InsIndex == VT.getVectorNumElements() - 1 &&
21736 P.value() == (int)P.index() + 1);
21737 }))
21738 return SDValue();
21739
21740 // We optionally skip over an extend so long as both loads are extended in the
21741 // same way from the same type.
21742 unsigned Extend = 0;
21743 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21744 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21745 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21746 Extend = Scalar.getOpcode();
21747 Scalar = Scalar.getOperand(0);
21748 }
21749
21750 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21751 if (!ScalarLoad)
21752 return SDValue();
21753
21754 SDValue Vec = Shuffle->getOperand(0);
21755 if (Extend) {
21756 if (Vec.getOpcode() != Extend)
21757 return SDValue();
21758 Vec = Vec.getOperand(0);
21759 }
21760 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21761 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21762 return SDValue();
21763
21764 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21765 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21766 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21767 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21768 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21769 return SDValue();
21770
21771 // Check that the offset between the pointers to produce a single continuous
21772 // load.
21773 if (InsIndex == 0) {
21774 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21775 -1))
21776 return SDValue();
21777 } else {
21779 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21780 return SDValue();
21781 }
21782
21783 // And that the new unaligned load will be fast.
21784 unsigned IsFast = 0;
21785 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21786 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21787 Vec.getValueType(), VecLoad->getAddressSpace(),
21788 NewAlign, VecLoad->getMemOperand()->getFlags(),
21789 &IsFast) ||
21790 !IsFast)
21791 return SDValue();
21792
21793 // Calculate the new Ptr and create the new load.
21794 SDLoc DL(N);
21795 SDValue Ptr = ScalarLoad->getBasePtr();
21796 if (InsIndex != 0)
21797 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21798 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21799 MachinePointerInfo PtrInfo =
21800 InsIndex == 0 ? ScalarLoad->getPointerInfo()
21801 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21802
21803 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21804 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21805 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21806 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21807 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21808}
21809
21810SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21811 SDValue InVec = N->getOperand(0);
21812 SDValue InVal = N->getOperand(1);
21813 SDValue EltNo = N->getOperand(2);
21814 SDLoc DL(N);
21815
21816 EVT VT = InVec.getValueType();
21817 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21818
21819 // Insert into out-of-bounds element is undefined.
21820 if (IndexC && VT.isFixedLengthVector() &&
21821 IndexC->getZExtValue() >= VT.getVectorNumElements())
21822 return DAG.getUNDEF(VT);
21823
21824 // Remove redundant insertions:
21825 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21826 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21827 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21828 return InVec;
21829
21830 if (!IndexC) {
21831 // If this is variable insert to undef vector, it might be better to splat:
21832 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21833 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21834 return DAG.getSplat(VT, DL, InVal);
21835 return SDValue();
21836 }
21837
21838 if (VT.isScalableVector())
21839 return SDValue();
21840
21841 unsigned NumElts = VT.getVectorNumElements();
21842
21843 // We must know which element is being inserted for folds below here.
21844 unsigned Elt = IndexC->getZExtValue();
21845
21846 // Handle <1 x ???> vector insertion special cases.
21847 if (NumElts == 1) {
21848 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21849 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21850 InVal.getOperand(0).getValueType() == VT &&
21851 isNullConstant(InVal.getOperand(1)))
21852 return InVal.getOperand(0);
21853 }
21854
21855 // Canonicalize insert_vector_elt dag nodes.
21856 // Example:
21857 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21858 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21859 //
21860 // Do this only if the child insert_vector node has one use; also
21861 // do this only if indices are both constants and Idx1 < Idx0.
21862 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21863 && isa<ConstantSDNode>(InVec.getOperand(2))) {
21864 unsigned OtherElt = InVec.getConstantOperandVal(2);
21865 if (Elt < OtherElt) {
21866 // Swap nodes.
21867 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
21868 InVec.getOperand(0), InVal, EltNo);
21869 AddToWorklist(NewOp.getNode());
21870 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
21871 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
21872 }
21873 }
21874
21875 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
21876 return Shuf;
21877
21878 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
21879 return Shuf;
21880
21881 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
21882 return Shuf;
21883
21884 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
21885 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
21886 // vXi1 vector - we don't need to recurse.
21887 if (NumElts == 1)
21888 return DAG.getBuildVector(VT, DL, {InVal});
21889
21890 // If we haven't already collected the element, insert into the op list.
21891 EVT MaxEltVT = InVal.getValueType();
21892 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
21893 unsigned Idx) {
21894 if (!Ops[Idx]) {
21895 Ops[Idx] = Elt;
21896 if (VT.isInteger()) {
21897 EVT EltVT = Elt.getValueType();
21898 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
21899 }
21900 }
21901 };
21902
21903 // Ensure all the operands are the same value type, fill any missing
21904 // operands with UNDEF and create the BUILD_VECTOR.
21905 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
21906 assert(Ops.size() == NumElts && "Unexpected vector size");
21907 for (SDValue &Op : Ops) {
21908 if (Op)
21909 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
21910 else
21911 Op = DAG.getUNDEF(MaxEltVT);
21912 }
21913 return DAG.getBuildVector(VT, DL, Ops);
21914 };
21915
21916 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
21917 Ops[Elt] = InVal;
21918
21919 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
21920 for (SDValue CurVec = InVec; CurVec;) {
21921 // UNDEF - build new BUILD_VECTOR from already inserted operands.
21922 if (CurVec.isUndef())
21923 return CanonicalizeBuildVector(Ops);
21924
21925 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
21926 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
21927 for (unsigned I = 0; I != NumElts; ++I)
21928 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
21929 return CanonicalizeBuildVector(Ops);
21930 }
21931
21932 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
21933 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
21934 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
21935 return CanonicalizeBuildVector(Ops);
21936 }
21937
21938 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
21939 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
21940 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
21941 if (CurIdx->getAPIntValue().ult(NumElts)) {
21942 unsigned Idx = CurIdx->getZExtValue();
21943 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
21944
21945 // Found entire BUILD_VECTOR.
21946 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
21947 return CanonicalizeBuildVector(Ops);
21948
21949 CurVec = CurVec->getOperand(0);
21950 continue;
21951 }
21952
21953 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
21954 // update the shuffle mask (and second operand if we started with unary
21955 // shuffle) and create a new legal shuffle.
21956 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
21957 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
21958 SDValue LHS = SVN->getOperand(0);
21959 SDValue RHS = SVN->getOperand(1);
21961 bool Merged = true;
21962 for (auto I : enumerate(Ops)) {
21963 SDValue &Op = I.value();
21964 if (Op) {
21965 SmallVector<int, 16> NewMask;
21966 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
21967 Merged = false;
21968 break;
21969 }
21970 Mask = std::move(NewMask);
21971 }
21972 }
21973 if (Merged)
21974 if (SDValue NewShuffle =
21975 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
21976 return NewShuffle;
21977 }
21978
21979 // If all insertions are zero value, try to convert to AND mask.
21980 // TODO: Do this for -1 with OR mask?
21981 if (!LegalOperations && llvm::isNullConstant(InVal) &&
21982 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
21983 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
21984 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
21985 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
21987 for (unsigned I = 0; I != NumElts; ++I)
21988 Mask[I] = Ops[I] ? Zero : AllOnes;
21989 return DAG.getNode(ISD::AND, DL, VT, CurVec,
21990 DAG.getBuildVector(VT, DL, Mask));
21991 }
21992
21993 // Failed to find a match in the chain - bail.
21994 break;
21995 }
21996
21997 // See if we can fill in the missing constant elements as zeros.
21998 // TODO: Should we do this for any constant?
21999 APInt DemandedZeroElts = APInt::getZero(NumElts);
22000 for (unsigned I = 0; I != NumElts; ++I)
22001 if (!Ops[I])
22002 DemandedZeroElts.setBit(I);
22003
22004 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22005 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22006 : DAG.getConstantFP(0, DL, MaxEltVT);
22007 for (unsigned I = 0; I != NumElts; ++I)
22008 if (!Ops[I])
22009 Ops[I] = Zero;
22010
22011 return CanonicalizeBuildVector(Ops);
22012 }
22013 }
22014
22015 return SDValue();
22016}
22017
22018SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22019 SDValue EltNo,
22020 LoadSDNode *OriginalLoad) {
22021 assert(OriginalLoad->isSimple());
22022
22023 EVT ResultVT = EVE->getValueType(0);
22024 EVT VecEltVT = InVecVT.getVectorElementType();
22025
22026 // If the vector element type is not a multiple of a byte then we are unable
22027 // to correctly compute an address to load only the extracted element as a
22028 // scalar.
22029 if (!VecEltVT.isByteSized())
22030 return SDValue();
22031
22032 ISD::LoadExtType ExtTy =
22033 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22034 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22035 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22036 return SDValue();
22037
22038 Align Alignment = OriginalLoad->getAlign();
22040 SDLoc DL(EVE);
22041 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22042 int Elt = ConstEltNo->getZExtValue();
22043 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22044 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22045 Alignment = commonAlignment(Alignment, PtrOff);
22046 } else {
22047 // Discard the pointer info except the address space because the memory
22048 // operand can't represent this new access since the offset is variable.
22049 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22050 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22051 }
22052
22053 unsigned IsFast = 0;
22054 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22055 OriginalLoad->getAddressSpace(), Alignment,
22056 OriginalLoad->getMemOperand()->getFlags(),
22057 &IsFast) ||
22058 !IsFast)
22059 return SDValue();
22060
22061 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22062 InVecVT, EltNo);
22063
22064 // We are replacing a vector load with a scalar load. The new load must have
22065 // identical memory op ordering to the original.
22066 SDValue Load;
22067 if (ResultVT.bitsGT(VecEltVT)) {
22068 // If the result type of vextract is wider than the load, then issue an
22069 // extending load instead.
22070 ISD::LoadExtType ExtType =
22071 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22072 : ISD::EXTLOAD;
22073 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22074 NewPtr, MPI, VecEltVT, Alignment,
22075 OriginalLoad->getMemOperand()->getFlags(),
22076 OriginalLoad->getAAInfo());
22077 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22078 } else {
22079 // The result type is narrower or the same width as the vector element
22080 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22081 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22082 OriginalLoad->getAAInfo());
22083 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22084 if (ResultVT.bitsLT(VecEltVT))
22085 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22086 else
22087 Load = DAG.getBitcast(ResultVT, Load);
22088 }
22089 ++OpsNarrowed;
22090 return Load;
22091}
22092
22093/// Transform a vector binary operation into a scalar binary operation by moving
22094/// the math/logic after an extract element of a vector.
22096 bool LegalOperations) {
22097 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22098 SDValue Vec = ExtElt->getOperand(0);
22099 SDValue Index = ExtElt->getOperand(1);
22100 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22101 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22102 Vec->getNumValues() != 1)
22103 return SDValue();
22104
22105 // Targets may want to avoid this to prevent an expensive register transfer.
22106 if (!TLI.shouldScalarizeBinop(Vec))
22107 return SDValue();
22108
22109 // Extracting an element of a vector constant is constant-folded, so this
22110 // transform is just replacing a vector op with a scalar op while moving the
22111 // extract.
22112 SDValue Op0 = Vec.getOperand(0);
22113 SDValue Op1 = Vec.getOperand(1);
22114 APInt SplatVal;
22115 if (isAnyConstantBuildVector(Op0, true) ||
22116 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22117 isAnyConstantBuildVector(Op1, true) ||
22118 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22119 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22120 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22121 SDLoc DL(ExtElt);
22122 EVT VT = ExtElt->getValueType(0);
22123 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22124 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22125 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22126 }
22127
22128 return SDValue();
22129}
22130
22131// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22132// recursively analyse all of it's users. and try to model themselves as
22133// bit sequence extractions. If all of them agree on the new, narrower element
22134// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22135// new element type, do so now.
22136// This is mainly useful to recover from legalization that scalarized
22137// the vector as wide elements, but tries to rebuild it with narrower elements.
22138//
22139// Some more nodes could be modelled if that helps cover interesting patterns.
22140bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22141 SDNode *N) {
22142 // We perform this optimization post type-legalization because
22143 // the type-legalizer often scalarizes integer-promoted vectors.
22144 // Performing this optimization before may cause legalizaton cycles.
22145 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22146 return false;
22147
22148 // TODO: Add support for big-endian.
22149 if (DAG.getDataLayout().isBigEndian())
22150 return false;
22151
22152 SDValue VecOp = N->getOperand(0);
22153 EVT VecVT = VecOp.getValueType();
22154 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22155
22156 // We must start with a constant extraction index.
22157 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22158 if (!IndexC)
22159 return false;
22160
22161 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22162 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22163
22164 // TODO: deal with the case of implicit anyext of the extraction.
22165 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22166 EVT ScalarVT = N->getValueType(0);
22167 if (VecVT.getScalarType() != ScalarVT)
22168 return false;
22169
22170 // TODO: deal with the cases other than everything being integer-typed.
22171 if (!ScalarVT.isScalarInteger())
22172 return false;
22173
22174 struct Entry {
22176
22177 // Which bits of VecOp does it contain?
22178 unsigned BitPos;
22179 int NumBits;
22180 // NOTE: the actual width of \p Producer may be wider than NumBits!
22181
22182 Entry(Entry &&) = default;
22183 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22184 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22185
22186 Entry() = delete;
22187 Entry(const Entry &) = delete;
22188 Entry &operator=(const Entry &) = delete;
22189 Entry &operator=(Entry &&) = delete;
22190 };
22191 SmallVector<Entry, 32> Worklist;
22193
22194 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22195 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22196 /*NumBits=*/VecEltBitWidth);
22197
22198 while (!Worklist.empty()) {
22199 Entry E = Worklist.pop_back_val();
22200 // Does the node not even use any of the VecOp bits?
22201 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22202 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22203 return false; // Let's allow the other combines clean this up first.
22204 // Did we fail to model any of the users of the Producer?
22205 bool ProducerIsLeaf = false;
22206 // Look at each user of this Producer.
22207 for (SDNode *User : E.Producer->uses()) {
22208 switch (User->getOpcode()) {
22209 // TODO: support ISD::BITCAST
22210 // TODO: support ISD::ANY_EXTEND
22211 // TODO: support ISD::ZERO_EXTEND
22212 // TODO: support ISD::SIGN_EXTEND
22213 case ISD::TRUNCATE:
22214 // Truncation simply means we keep position, but extract less bits.
22215 Worklist.emplace_back(User, E.BitPos,
22216 /*NumBits=*/User->getValueSizeInBits(0));
22217 break;
22218 // TODO: support ISD::SRA
22219 // TODO: support ISD::SHL
22220 case ISD::SRL:
22221 // We should be shifting the Producer by a constant amount.
22222 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22223 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22224 // Logical right-shift means that we start extraction later,
22225 // but stop it at the same position we did previously.
22226 unsigned ShAmt = ShAmtC->getZExtValue();
22227 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22228 break;
22229 }
22230 [[fallthrough]];
22231 default:
22232 // We can not model this user of the Producer.
22233 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22234 ProducerIsLeaf = true;
22235 // Profitability check: all users that we can not model
22236 // must be ISD::BUILD_VECTOR's.
22237 if (User->getOpcode() != ISD::BUILD_VECTOR)
22238 return false;
22239 break;
22240 }
22241 }
22242 if (ProducerIsLeaf)
22243 Leafs.emplace_back(std::move(E));
22244 }
22245
22246 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22247
22248 // If we are still at the same element granularity, give up,
22249 if (NewVecEltBitWidth == VecEltBitWidth)
22250 return false;
22251
22252 // The vector width must be a multiple of the new element width.
22253 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22254 return false;
22255
22256 // All leafs must agree on the new element width.
22257 // All leafs must not expect any "padding" bits ontop of that width.
22258 // All leafs must start extraction from multiple of that width.
22259 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22260 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22261 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22262 E.BitPos % NewVecEltBitWidth == 0;
22263 }))
22264 return false;
22265
22266 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22267 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22268 VecVT.getSizeInBits() / NewVecEltBitWidth);
22269
22270 if (LegalTypes &&
22271 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22272 return false;
22273
22274 if (LegalOperations &&
22275 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22277 return false;
22278
22279 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22280 for (const Entry &E : Leafs) {
22281 SDLoc DL(E.Producer);
22282 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22283 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22284 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22285 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22286 DAG.getVectorIdxConstant(NewIndex, DL));
22287 CombineTo(E.Producer, V);
22288 }
22289
22290 return true;
22291}
22292
22293SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22294 SDValue VecOp = N->getOperand(0);
22295 SDValue Index = N->getOperand(1);
22296 EVT ScalarVT = N->getValueType(0);
22297 EVT VecVT = VecOp.getValueType();
22298 if (VecOp.isUndef())
22299 return DAG.getUNDEF(ScalarVT);
22300
22301 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22302 //
22303 // This only really matters if the index is non-constant since other combines
22304 // on the constant elements already work.
22305 SDLoc DL(N);
22306 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22307 Index == VecOp.getOperand(2)) {
22308 SDValue Elt = VecOp.getOperand(1);
22309 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22310 }
22311
22312 // (vextract (scalar_to_vector val, 0) -> val
22313 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22314 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22315 if (DAG.isKnownNeverZero(Index))
22316 return DAG.getUNDEF(ScalarVT);
22317
22318 // Check if the result type doesn't match the inserted element type.
22319 // The inserted element and extracted element may have mismatched bitwidth.
22320 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22321 SDValue InOp = VecOp.getOperand(0);
22322 if (InOp.getValueType() != ScalarVT) {
22323 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22324 if (InOp.getValueType().bitsGT(ScalarVT))
22325 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22326 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22327 }
22328 return InOp;
22329 }
22330
22331 // extract_vector_elt of out-of-bounds element -> UNDEF
22332 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22333 if (IndexC && VecVT.isFixedLengthVector() &&
22334 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22335 return DAG.getUNDEF(ScalarVT);
22336
22337 // extract_vector_elt (build_vector x, y), 1 -> y
22338 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22339 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22340 TLI.isTypeLegal(VecVT)) {
22341 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22342 VecVT.isFixedLengthVector()) &&
22343 "BUILD_VECTOR used for scalable vectors");
22344 unsigned IndexVal =
22345 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22346 SDValue Elt = VecOp.getOperand(IndexVal);
22347 EVT InEltVT = Elt.getValueType();
22348
22349 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22350 isNullConstant(Elt)) {
22351 // Sometimes build_vector's scalar input types do not match result type.
22352 if (ScalarVT == InEltVT)
22353 return Elt;
22354
22355 // TODO: It may be useful to truncate if free if the build_vector
22356 // implicitly converts.
22357 }
22358 }
22359
22360 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
22361 return BO;
22362
22363 if (VecVT.isScalableVector())
22364 return SDValue();
22365
22366 // All the code from this point onwards assumes fixed width vectors, but it's
22367 // possible that some of the combinations could be made to work for scalable
22368 // vectors too.
22369 unsigned NumElts = VecVT.getVectorNumElements();
22370 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22371
22372 // See if the extracted element is constant, in which case fold it if its
22373 // a legal fp immediate.
22374 if (IndexC && ScalarVT.isFloatingPoint()) {
22375 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22376 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22377 if (KnownElt.isConstant()) {
22378 APFloat CstFP =
22379 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22380 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22381 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22382 }
22383 }
22384
22385 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22386 // there are regressions on multiple targets without it. We can end up with a
22387 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22388 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22389 VecOp.hasOneUse()) {
22390 // The vector index of the LSBs of the source depend on the endian-ness.
22391 bool IsLE = DAG.getDataLayout().isLittleEndian();
22392 unsigned ExtractIndex = IndexC->getZExtValue();
22393 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22394 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22395 SDValue BCSrc = VecOp.getOperand(0);
22396 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22397 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22398
22399 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22400 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22401 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22402 // trunc i64 X to i32
22403 SDValue X = BCSrc.getOperand(0);
22404 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22405 "Extract element and scalar to vector can't change element type "
22406 "from FP to integer.");
22407 unsigned XBitWidth = X.getValueSizeInBits();
22408 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22409
22410 // An extract element return value type can be wider than its vector
22411 // operand element type. In that case, the high bits are undefined, so
22412 // it's possible that we may need to extend rather than truncate.
22413 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22414 assert(XBitWidth % VecEltBitWidth == 0 &&
22415 "Scalar bitwidth must be a multiple of vector element bitwidth");
22416 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22417 }
22418 }
22419 }
22420
22421 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22422 // We only perform this optimization before the op legalization phase because
22423 // we may introduce new vector instructions which are not backed by TD
22424 // patterns. For example on AVX, extracting elements from a wide vector
22425 // without using extract_subvector. However, if we can find an underlying
22426 // scalar value, then we can always use that.
22427 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22428 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22429 // Find the new index to extract from.
22430 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22431
22432 // Extracting an undef index is undef.
22433 if (OrigElt == -1)
22434 return DAG.getUNDEF(ScalarVT);
22435
22436 // Select the right vector half to extract from.
22437 SDValue SVInVec;
22438 if (OrigElt < (int)NumElts) {
22439 SVInVec = VecOp.getOperand(0);
22440 } else {
22441 SVInVec = VecOp.getOperand(1);
22442 OrigElt -= NumElts;
22443 }
22444
22445 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22446 SDValue InOp = SVInVec.getOperand(OrigElt);
22447 if (InOp.getValueType() != ScalarVT) {
22448 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22449 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22450 }
22451
22452 return InOp;
22453 }
22454
22455 // FIXME: We should handle recursing on other vector shuffles and
22456 // scalar_to_vector here as well.
22457
22458 if (!LegalOperations ||
22459 // FIXME: Should really be just isOperationLegalOrCustom.
22462 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22463 DAG.getVectorIdxConstant(OrigElt, DL));
22464 }
22465 }
22466
22467 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22468 // simplify it based on the (valid) extraction indices.
22469 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22470 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22471 Use->getOperand(0) == VecOp &&
22472 isa<ConstantSDNode>(Use->getOperand(1));
22473 })) {
22474 APInt DemandedElts = APInt::getZero(NumElts);
22475 for (SDNode *Use : VecOp->uses()) {
22476 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22477 if (CstElt->getAPIntValue().ult(NumElts))
22478 DemandedElts.setBit(CstElt->getZExtValue());
22479 }
22480 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22481 // We simplified the vector operand of this extract element. If this
22482 // extract is not dead, visit it again so it is folded properly.
22483 if (N->getOpcode() != ISD::DELETED_NODE)
22484 AddToWorklist(N);
22485 return SDValue(N, 0);
22486 }
22487 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22488 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22489 // We simplified the vector operand of this extract element. If this
22490 // extract is not dead, visit it again so it is folded properly.
22491 if (N->getOpcode() != ISD::DELETED_NODE)
22492 AddToWorklist(N);
22493 return SDValue(N, 0);
22494 }
22495 }
22496
22497 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22498 return SDValue(N, 0);
22499
22500 // Everything under here is trying to match an extract of a loaded value.
22501 // If the result of load has to be truncated, then it's not necessarily
22502 // profitable.
22503 bool BCNumEltsChanged = false;
22504 EVT ExtVT = VecVT.getVectorElementType();
22505 EVT LVT = ExtVT;
22506 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22507 return SDValue();
22508
22509 if (VecOp.getOpcode() == ISD::BITCAST) {
22510 // Don't duplicate a load with other uses.
22511 if (!VecOp.hasOneUse())
22512 return SDValue();
22513
22514 EVT BCVT = VecOp.getOperand(0).getValueType();
22515 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22516 return SDValue();
22517 if (NumElts != BCVT.getVectorNumElements())
22518 BCNumEltsChanged = true;
22519 VecOp = VecOp.getOperand(0);
22520 ExtVT = BCVT.getVectorElementType();
22521 }
22522
22523 // extract (vector load $addr), i --> load $addr + i * size
22524 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22525 ISD::isNormalLoad(VecOp.getNode()) &&
22526 !Index->hasPredecessor(VecOp.getNode())) {
22527 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22528 if (VecLoad && VecLoad->isSimple())
22529 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22530 }
22531
22532 // Perform only after legalization to ensure build_vector / vector_shuffle
22533 // optimizations have already been done.
22534 if (!LegalOperations || !IndexC)
22535 return SDValue();
22536
22537 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22538 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22539 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22540 int Elt = IndexC->getZExtValue();
22541 LoadSDNode *LN0 = nullptr;
22542 if (ISD::isNormalLoad(VecOp.getNode())) {
22543 LN0 = cast<LoadSDNode>(VecOp);
22544 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22545 VecOp.getOperand(0).getValueType() == ExtVT &&
22546 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22547 // Don't duplicate a load with other uses.
22548 if (!VecOp.hasOneUse())
22549 return SDValue();
22550
22551 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22552 }
22553 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22554 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22555 // =>
22556 // (load $addr+1*size)
22557
22558 // Don't duplicate a load with other uses.
22559 if (!VecOp.hasOneUse())
22560 return SDValue();
22561
22562 // If the bit convert changed the number of elements, it is unsafe
22563 // to examine the mask.
22564 if (BCNumEltsChanged)
22565 return SDValue();
22566
22567 // Select the input vector, guarding against out of range extract vector.
22568 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22569 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22570
22571 if (VecOp.getOpcode() == ISD::BITCAST) {
22572 // Don't duplicate a load with other uses.
22573 if (!VecOp.hasOneUse())
22574 return SDValue();
22575
22576 VecOp = VecOp.getOperand(0);
22577 }
22578 if (ISD::isNormalLoad(VecOp.getNode())) {
22579 LN0 = cast<LoadSDNode>(VecOp);
22580 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22581 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22582 }
22583 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22584 VecVT.getVectorElementType() == ScalarVT &&
22585 (!LegalTypes ||
22586 TLI.isTypeLegal(
22588 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22589 // -> extract_vector_elt a, 0
22590 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22591 // -> extract_vector_elt a, 1
22592 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22593 // -> extract_vector_elt b, 0
22594 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22595 // -> extract_vector_elt b, 1
22596 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22597 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22598 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22599 Index.getValueType());
22600
22601 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22603 ConcatVT.getVectorElementType(),
22604 ConcatOp, NewIdx);
22605 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22606 }
22607
22608 // Make sure we found a non-volatile load and the extractelement is
22609 // the only use.
22610 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22611 return SDValue();
22612
22613 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22614 if (Elt == -1)
22615 return DAG.getUNDEF(LVT);
22616
22617 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22618}
22619
22620// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22621SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22622 // We perform this optimization post type-legalization because
22623 // the type-legalizer often scalarizes integer-promoted vectors.
22624 // Performing this optimization before may create bit-casts which
22625 // will be type-legalized to complex code sequences.
22626 // We perform this optimization only before the operation legalizer because we
22627 // may introduce illegal operations.
22628 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22629 return SDValue();
22630
22631 unsigned NumInScalars = N->getNumOperands();
22632 SDLoc DL(N);
22633 EVT VT = N->getValueType(0);
22634
22635 // Check to see if this is a BUILD_VECTOR of a bunch of values
22636 // which come from any_extend or zero_extend nodes. If so, we can create
22637 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22638 // optimizations. We do not handle sign-extend because we can't fill the sign
22639 // using shuffles.
22640 EVT SourceType = MVT::Other;
22641 bool AllAnyExt = true;
22642
22643 for (unsigned i = 0; i != NumInScalars; ++i) {
22644 SDValue In = N->getOperand(i);
22645 // Ignore undef inputs.
22646 if (In.isUndef()) continue;
22647
22648 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22649 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22650
22651 // Abort if the element is not an extension.
22652 if (!ZeroExt && !AnyExt) {
22653 SourceType = MVT::Other;
22654 break;
22655 }
22656
22657 // The input is a ZeroExt or AnyExt. Check the original type.
22658 EVT InTy = In.getOperand(0).getValueType();
22659
22660 // Check that all of the widened source types are the same.
22661 if (SourceType == MVT::Other)
22662 // First time.
22663 SourceType = InTy;
22664 else if (InTy != SourceType) {
22665 // Multiple income types. Abort.
22666 SourceType = MVT::Other;
22667 break;
22668 }
22669
22670 // Check if all of the extends are ANY_EXTENDs.
22671 AllAnyExt &= AnyExt;
22672 }
22673
22674 // In order to have valid types, all of the inputs must be extended from the
22675 // same source type and all of the inputs must be any or zero extend.
22676 // Scalar sizes must be a power of two.
22677 EVT OutScalarTy = VT.getScalarType();
22678 bool ValidTypes =
22679 SourceType != MVT::Other &&
22680 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22681 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22682
22683 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22684 // turn into a single shuffle instruction.
22685 if (!ValidTypes)
22686 return SDValue();
22687
22688 // If we already have a splat buildvector, then don't fold it if it means
22689 // introducing zeros.
22690 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22691 return SDValue();
22692
22693 bool isLE = DAG.getDataLayout().isLittleEndian();
22694 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22695 assert(ElemRatio > 1 && "Invalid element size ratio");
22696 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22697 DAG.getConstant(0, DL, SourceType);
22698
22699 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22700 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22701
22702 // Populate the new build_vector
22703 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22704 SDValue Cast = N->getOperand(i);
22705 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22706 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22707 Cast.isUndef()) && "Invalid cast opcode");
22708 SDValue In;
22709 if (Cast.isUndef())
22710 In = DAG.getUNDEF(SourceType);
22711 else
22712 In = Cast->getOperand(0);
22713 unsigned Index = isLE ? (i * ElemRatio) :
22714 (i * ElemRatio + (ElemRatio - 1));
22715
22716 assert(Index < Ops.size() && "Invalid index");
22717 Ops[Index] = In;
22718 }
22719
22720 // The type of the new BUILD_VECTOR node.
22721 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22722 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22723 "Invalid vector size");
22724 // Check if the new vector type is legal.
22725 if (!isTypeLegal(VecVT) ||
22726 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22728 return SDValue();
22729
22730 // Make the new BUILD_VECTOR.
22731 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22732
22733 // The new BUILD_VECTOR node has the potential to be further optimized.
22734 AddToWorklist(BV.getNode());
22735 // Bitcast to the desired type.
22736 return DAG.getBitcast(VT, BV);
22737}
22738
22739// Simplify (build_vec (trunc $1)
22740// (trunc (srl $1 half-width))
22741// (trunc (srl $1 (2 * half-width))))
22742// to (bitcast $1)
22743SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22744 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22745
22746 EVT VT = N->getValueType(0);
22747
22748 // Don't run this before LegalizeTypes if VT is legal.
22749 // Targets may have other preferences.
22750 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22751 return SDValue();
22752
22753 // Only for little endian
22754 if (!DAG.getDataLayout().isLittleEndian())
22755 return SDValue();
22756
22757 SDLoc DL(N);
22758 EVT OutScalarTy = VT.getScalarType();
22759 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22760
22761 // Only for power of two types to be sure that bitcast works well
22762 if (!isPowerOf2_64(ScalarTypeBitsize))
22763 return SDValue();
22764
22765 unsigned NumInScalars = N->getNumOperands();
22766
22767 // Look through bitcasts
22768 auto PeekThroughBitcast = [](SDValue Op) {
22769 if (Op.getOpcode() == ISD::BITCAST)
22770 return Op.getOperand(0);
22771 return Op;
22772 };
22773
22774 // The source value where all the parts are extracted.
22775 SDValue Src;
22776 for (unsigned i = 0; i != NumInScalars; ++i) {
22777 SDValue In = PeekThroughBitcast(N->getOperand(i));
22778 // Ignore undef inputs.
22779 if (In.isUndef()) continue;
22780
22781 if (In.getOpcode() != ISD::TRUNCATE)
22782 return SDValue();
22783
22784 In = PeekThroughBitcast(In.getOperand(0));
22785
22786 if (In.getOpcode() != ISD::SRL) {
22787 // For now only build_vec without shuffling, handle shifts here in the
22788 // future.
22789 if (i != 0)
22790 return SDValue();
22791
22792 Src = In;
22793 } else {
22794 // In is SRL
22795 SDValue part = PeekThroughBitcast(In.getOperand(0));
22796
22797 if (!Src) {
22798 Src = part;
22799 } else if (Src != part) {
22800 // Vector parts do not stem from the same variable
22801 return SDValue();
22802 }
22803
22804 SDValue ShiftAmtVal = In.getOperand(1);
22805 if (!isa<ConstantSDNode>(ShiftAmtVal))
22806 return SDValue();
22807
22808 uint64_t ShiftAmt = In.getConstantOperandVal(1);
22809
22810 // The extracted value is not extracted at the right position
22811 if (ShiftAmt != i * ScalarTypeBitsize)
22812 return SDValue();
22813 }
22814 }
22815
22816 // Only cast if the size is the same
22817 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22818 return SDValue();
22819
22820 return DAG.getBitcast(VT, Src);
22821}
22822
22823SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22824 ArrayRef<int> VectorMask,
22825 SDValue VecIn1, SDValue VecIn2,
22826 unsigned LeftIdx, bool DidSplitVec) {
22827 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22828
22829 EVT VT = N->getValueType(0);
22830 EVT InVT1 = VecIn1.getValueType();
22831 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22832
22833 unsigned NumElems = VT.getVectorNumElements();
22834 unsigned ShuffleNumElems = NumElems;
22835
22836 // If we artificially split a vector in two already, then the offsets in the
22837 // operands will all be based off of VecIn1, even those in VecIn2.
22838 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22839
22840 uint64_t VTSize = VT.getFixedSizeInBits();
22841 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22842 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22843
22844 assert(InVT2Size <= InVT1Size &&
22845 "Inputs must be sorted to be in non-increasing vector size order.");
22846
22847 // We can't generate a shuffle node with mismatched input and output types.
22848 // Try to make the types match the type of the output.
22849 if (InVT1 != VT || InVT2 != VT) {
22850 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22851 // If the output vector length is a multiple of both input lengths,
22852 // we can concatenate them and pad the rest with undefs.
22853 unsigned NumConcats = VTSize / InVT1Size;
22854 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22855 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22856 ConcatOps[0] = VecIn1;
22857 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22858 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22859 VecIn2 = SDValue();
22860 } else if (InVT1Size == VTSize * 2) {
22861 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22862 return SDValue();
22863
22864 if (!VecIn2.getNode()) {
22865 // If we only have one input vector, and it's twice the size of the
22866 // output, split it in two.
22867 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
22868 DAG.getVectorIdxConstant(NumElems, DL));
22869 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
22870 // Since we now have shorter input vectors, adjust the offset of the
22871 // second vector's start.
22872 Vec2Offset = NumElems;
22873 } else {
22874 assert(InVT2Size <= InVT1Size &&
22875 "Second input is not going to be larger than the first one.");
22876
22877 // VecIn1 is wider than the output, and we have another, possibly
22878 // smaller input. Pad the smaller input with undefs, shuffle at the
22879 // input vector width, and extract the output.
22880 // The shuffle type is different than VT, so check legality again.
22881 if (LegalOperations &&
22883 return SDValue();
22884
22885 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
22886 // lower it back into a BUILD_VECTOR. So if the inserted type is
22887 // illegal, don't even try.
22888 if (InVT1 != InVT2) {
22889 if (!TLI.isTypeLegal(InVT2))
22890 return SDValue();
22891 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22892 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22893 }
22894 ShuffleNumElems = NumElems * 2;
22895 }
22896 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
22897 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
22898 ConcatOps[0] = VecIn2;
22899 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22900 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
22901 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
22902 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
22903 return SDValue();
22904 // If dest vector has less than two elements, then use shuffle and extract
22905 // from larger regs will cost even more.
22906 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
22907 return SDValue();
22908 assert(InVT2Size <= InVT1Size &&
22909 "Second input is not going to be larger than the first one.");
22910
22911 // VecIn1 is wider than the output, and we have another, possibly
22912 // smaller input. Pad the smaller input with undefs, shuffle at the
22913 // input vector width, and extract the output.
22914 // The shuffle type is different than VT, so check legality again.
22915 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22916 return SDValue();
22917
22918 if (InVT1 != InVT2) {
22919 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22920 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22921 }
22922 ShuffleNumElems = InVT1Size / VTSize * NumElems;
22923 } else {
22924 // TODO: Support cases where the length mismatch isn't exactly by a
22925 // factor of 2.
22926 // TODO: Move this check upwards, so that if we have bad type
22927 // mismatches, we don't create any DAG nodes.
22928 return SDValue();
22929 }
22930 }
22931
22932 // Initialize mask to undef.
22933 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
22934
22935 // Only need to run up to the number of elements actually used, not the
22936 // total number of elements in the shuffle - if we are shuffling a wider
22937 // vector, the high lanes should be set to undef.
22938 for (unsigned i = 0; i != NumElems; ++i) {
22939 if (VectorMask[i] <= 0)
22940 continue;
22941
22942 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
22943 if (VectorMask[i] == (int)LeftIdx) {
22944 Mask[i] = ExtIndex;
22945 } else if (VectorMask[i] == (int)LeftIdx + 1) {
22946 Mask[i] = Vec2Offset + ExtIndex;
22947 }
22948 }
22949
22950 // The type the input vectors may have changed above.
22951 InVT1 = VecIn1.getValueType();
22952
22953 // If we already have a VecIn2, it should have the same type as VecIn1.
22954 // If we don't, get an undef/zero vector of the appropriate type.
22955 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
22956 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
22957
22958 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
22959 if (ShuffleNumElems > NumElems)
22960 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
22961
22962 return Shuffle;
22963}
22964
22966 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22967
22968 // First, determine where the build vector is not undef.
22969 // TODO: We could extend this to handle zero elements as well as undefs.
22970 int NumBVOps = BV->getNumOperands();
22971 int ZextElt = -1;
22972 for (int i = 0; i != NumBVOps; ++i) {
22973 SDValue Op = BV->getOperand(i);
22974 if (Op.isUndef())
22975 continue;
22976 if (ZextElt == -1)
22977 ZextElt = i;
22978 else
22979 return SDValue();
22980 }
22981 // Bail out if there's no non-undef element.
22982 if (ZextElt == -1)
22983 return SDValue();
22984
22985 // The build vector contains some number of undef elements and exactly
22986 // one other element. That other element must be a zero-extended scalar
22987 // extracted from a vector at a constant index to turn this into a shuffle.
22988 // Also, require that the build vector does not implicitly truncate/extend
22989 // its elements.
22990 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
22991 EVT VT = BV->getValueType(0);
22992 SDValue Zext = BV->getOperand(ZextElt);
22993 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
22995 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
22997 return SDValue();
22998
22999 // The zero-extend must be a multiple of the source size, and we must be
23000 // building a vector of the same size as the source of the extract element.
23001 SDValue Extract = Zext.getOperand(0);
23002 unsigned DestSize = Zext.getValueSizeInBits();
23003 unsigned SrcSize = Extract.getValueSizeInBits();
23004 if (DestSize % SrcSize != 0 ||
23005 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23006 return SDValue();
23007
23008 // Create a shuffle mask that will combine the extracted element with zeros
23009 // and undefs.
23010 int ZextRatio = DestSize / SrcSize;
23011 int NumMaskElts = NumBVOps * ZextRatio;
23012 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23013 for (int i = 0; i != NumMaskElts; ++i) {
23014 if (i / ZextRatio == ZextElt) {
23015 // The low bits of the (potentially translated) extracted element map to
23016 // the source vector. The high bits map to zero. We will use a zero vector
23017 // as the 2nd source operand of the shuffle, so use the 1st element of
23018 // that vector (mask value is number-of-elements) for the high bits.
23019 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23020 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23021 : NumMaskElts;
23022 }
23023
23024 // Undef elements of the build vector remain undef because we initialize
23025 // the shuffle mask with -1.
23026 }
23027
23028 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23029 // bitcast (shuffle V, ZeroVec, VectorMask)
23030 SDLoc DL(BV);
23031 EVT VecVT = Extract.getOperand(0).getValueType();
23032 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23033 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23034 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23035 ZeroVec, ShufMask, DAG);
23036 if (!Shuf)
23037 return SDValue();
23038 return DAG.getBitcast(VT, Shuf);
23039}
23040
23041// FIXME: promote to STLExtras.
23042template <typename R, typename T>
23043static auto getFirstIndexOf(R &&Range, const T &Val) {
23044 auto I = find(Range, Val);
23045 if (I == Range.end())
23046 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23047 return std::distance(Range.begin(), I);
23048}
23049
23050// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23051// operations. If the types of the vectors we're extracting from allow it,
23052// turn this into a vector_shuffle node.
23053SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23054 SDLoc DL(N);
23055 EVT VT = N->getValueType(0);
23056
23057 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23058 if (!isTypeLegal(VT))
23059 return SDValue();
23060
23062 return V;
23063
23064 // May only combine to shuffle after legalize if shuffle is legal.
23065 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23066 return SDValue();
23067
23068 bool UsesZeroVector = false;
23069 unsigned NumElems = N->getNumOperands();
23070
23071 // Record, for each element of the newly built vector, which input vector
23072 // that element comes from. -1 stands for undef, 0 for the zero vector,
23073 // and positive values for the input vectors.
23074 // VectorMask maps each element to its vector number, and VecIn maps vector
23075 // numbers to their initial SDValues.
23076
23077 SmallVector<int, 8> VectorMask(NumElems, -1);
23079 VecIn.push_back(SDValue());
23080
23081 for (unsigned i = 0; i != NumElems; ++i) {
23082 SDValue Op = N->getOperand(i);
23083
23084 if (Op.isUndef())
23085 continue;
23086
23087 // See if we can use a blend with a zero vector.
23088 // TODO: Should we generalize this to a blend with an arbitrary constant
23089 // vector?
23091 UsesZeroVector = true;
23092 VectorMask[i] = 0;
23093 continue;
23094 }
23095
23096 // Not an undef or zero. If the input is something other than an
23097 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23098 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23099 !isa<ConstantSDNode>(Op.getOperand(1)))
23100 return SDValue();
23101 SDValue ExtractedFromVec = Op.getOperand(0);
23102
23103 if (ExtractedFromVec.getValueType().isScalableVector())
23104 return SDValue();
23105
23106 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23107 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23108 return SDValue();
23109
23110 // All inputs must have the same element type as the output.
23111 if (VT.getVectorElementType() !=
23112 ExtractedFromVec.getValueType().getVectorElementType())
23113 return SDValue();
23114
23115 // Have we seen this input vector before?
23116 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23117 // a map back from SDValues to numbers isn't worth it.
23118 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23119 if (Idx == -1) { // A new source vector?
23120 Idx = VecIn.size();
23121 VecIn.push_back(ExtractedFromVec);
23122 }
23123
23124 VectorMask[i] = Idx;
23125 }
23126
23127 // If we didn't find at least one input vector, bail out.
23128 if (VecIn.size() < 2)
23129 return SDValue();
23130
23131 // If all the Operands of BUILD_VECTOR extract from same
23132 // vector, then split the vector efficiently based on the maximum
23133 // vector access index and adjust the VectorMask and
23134 // VecIn accordingly.
23135 bool DidSplitVec = false;
23136 if (VecIn.size() == 2) {
23137 unsigned MaxIndex = 0;
23138 unsigned NearestPow2 = 0;
23139 SDValue Vec = VecIn.back();
23140 EVT InVT = Vec.getValueType();
23141 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23142
23143 for (unsigned i = 0; i < NumElems; i++) {
23144 if (VectorMask[i] <= 0)
23145 continue;
23146 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23147 IndexVec[i] = Index;
23148 MaxIndex = std::max(MaxIndex, Index);
23149 }
23150
23151 NearestPow2 = PowerOf2Ceil(MaxIndex);
23152 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23153 NumElems * 2 < NearestPow2) {
23154 unsigned SplitSize = NearestPow2 / 2;
23155 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23156 InVT.getVectorElementType(), SplitSize);
23157 if (TLI.isTypeLegal(SplitVT) &&
23158 SplitSize + SplitVT.getVectorNumElements() <=
23159 InVT.getVectorNumElements()) {
23160 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23161 DAG.getVectorIdxConstant(SplitSize, DL));
23162 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23163 DAG.getVectorIdxConstant(0, DL));
23164 VecIn.pop_back();
23165 VecIn.push_back(VecIn1);
23166 VecIn.push_back(VecIn2);
23167 DidSplitVec = true;
23168
23169 for (unsigned i = 0; i < NumElems; i++) {
23170 if (VectorMask[i] <= 0)
23171 continue;
23172 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23173 }
23174 }
23175 }
23176 }
23177
23178 // Sort input vectors by decreasing vector element count,
23179 // while preserving the relative order of equally-sized vectors.
23180 // Note that we keep the first "implicit zero vector as-is.
23181 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23182 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23183 [](const SDValue &a, const SDValue &b) {
23184 return a.getValueType().getVectorNumElements() >
23185 b.getValueType().getVectorNumElements();
23186 });
23187
23188 // We now also need to rebuild the VectorMask, because it referenced element
23189 // order in VecIn, and we just sorted them.
23190 for (int &SourceVectorIndex : VectorMask) {
23191 if (SourceVectorIndex <= 0)
23192 continue;
23193 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23194 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23195 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23196 SourceVectorIndex = Idx;
23197 }
23198
23199 VecIn = std::move(SortedVecIn);
23200
23201 // TODO: Should this fire if some of the input vectors has illegal type (like
23202 // it does now), or should we let legalization run its course first?
23203
23204 // Shuffle phase:
23205 // Take pairs of vectors, and shuffle them so that the result has elements
23206 // from these vectors in the correct places.
23207 // For example, given:
23208 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23209 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23210 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23211 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23212 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23213 // We will generate:
23214 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23215 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23216 SmallVector<SDValue, 4> Shuffles;
23217 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23218 unsigned LeftIdx = 2 * In + 1;
23219 SDValue VecLeft = VecIn[LeftIdx];
23220 SDValue VecRight =
23221 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23222
23223 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23224 VecRight, LeftIdx, DidSplitVec))
23225 Shuffles.push_back(Shuffle);
23226 else
23227 return SDValue();
23228 }
23229
23230 // If we need the zero vector as an "ingredient" in the blend tree, add it
23231 // to the list of shuffles.
23232 if (UsesZeroVector)
23233 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23234 : DAG.getConstantFP(0.0, DL, VT));
23235
23236 // If we only have one shuffle, we're done.
23237 if (Shuffles.size() == 1)
23238 return Shuffles[0];
23239
23240 // Update the vector mask to point to the post-shuffle vectors.
23241 for (int &Vec : VectorMask)
23242 if (Vec == 0)
23243 Vec = Shuffles.size() - 1;
23244 else
23245 Vec = (Vec - 1) / 2;
23246
23247 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23248 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23249 // generate:
23250 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23251 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23252 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23253 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23254 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23255 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23256 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23257
23258 // Make sure the initial size of the shuffle list is even.
23259 if (Shuffles.size() % 2)
23260 Shuffles.push_back(DAG.getUNDEF(VT));
23261
23262 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23263 if (CurSize % 2) {
23264 Shuffles[CurSize] = DAG.getUNDEF(VT);
23265 CurSize++;
23266 }
23267 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23268 int Left = 2 * In;
23269 int Right = 2 * In + 1;
23270 SmallVector<int, 8> Mask(NumElems, -1);
23271 SDValue L = Shuffles[Left];
23272 ArrayRef<int> LMask;
23273 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23274 L.use_empty() && L.getOperand(1).isUndef() &&
23275 L.getOperand(0).getValueType() == L.getValueType();
23276 if (IsLeftShuffle) {
23277 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23278 L = L.getOperand(0);
23279 }
23280 SDValue R = Shuffles[Right];
23281 ArrayRef<int> RMask;
23282 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23283 R.use_empty() && R.getOperand(1).isUndef() &&
23284 R.getOperand(0).getValueType() == R.getValueType();
23285 if (IsRightShuffle) {
23286 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23287 R = R.getOperand(0);
23288 }
23289 for (unsigned I = 0; I != NumElems; ++I) {
23290 if (VectorMask[I] == Left) {
23291 Mask[I] = I;
23292 if (IsLeftShuffle)
23293 Mask[I] = LMask[I];
23294 VectorMask[I] = In;
23295 } else if (VectorMask[I] == Right) {
23296 Mask[I] = I + NumElems;
23297 if (IsRightShuffle)
23298 Mask[I] = RMask[I] + NumElems;
23299 VectorMask[I] = In;
23300 }
23301 }
23302
23303 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23304 }
23305 }
23306 return Shuffles[0];
23307}
23308
23309// Try to turn a build vector of zero extends of extract vector elts into a
23310// a vector zero extend and possibly an extract subvector.
23311// TODO: Support sign extend?
23312// TODO: Allow undef elements?
23313SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23314 if (LegalOperations)
23315 return SDValue();
23316
23317 EVT VT = N->getValueType(0);
23318
23319 bool FoundZeroExtend = false;
23320 SDValue Op0 = N->getOperand(0);
23321 auto checkElem = [&](SDValue Op) -> int64_t {
23322 unsigned Opc = Op.getOpcode();
23323 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23324 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23325 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23326 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23327 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23328 return C->getZExtValue();
23329 return -1;
23330 };
23331
23332 // Make sure the first element matches
23333 // (zext (extract_vector_elt X, C))
23334 // Offset must be a constant multiple of the
23335 // known-minimum vector length of the result type.
23336 int64_t Offset = checkElem(Op0);
23337 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23338 return SDValue();
23339
23340 unsigned NumElems = N->getNumOperands();
23341 SDValue In = Op0.getOperand(0).getOperand(0);
23342 EVT InSVT = In.getValueType().getScalarType();
23343 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23344
23345 // Don't create an illegal input type after type legalization.
23346 if (LegalTypes && !TLI.isTypeLegal(InVT))
23347 return SDValue();
23348
23349 // Ensure all the elements come from the same vector and are adjacent.
23350 for (unsigned i = 1; i != NumElems; ++i) {
23351 if ((Offset + i) != checkElem(N->getOperand(i)))
23352 return SDValue();
23353 }
23354
23355 SDLoc DL(N);
23356 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23357 Op0.getOperand(0).getOperand(1));
23358 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23359 VT, In);
23360}
23361
23362// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23363// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23364// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23365// This patten can appear during legalization.
23366//
23367// NOTE: This can be generalized to allow more than a single
23368// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23369SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23370 // Don't run this after legalization. Targets may have other preferences.
23371 if (Level >= AfterLegalizeDAG)
23372 return SDValue();
23373
23374 // FIXME: support big-endian.
23375 if (DAG.getDataLayout().isBigEndian())
23376 return SDValue();
23377
23378 EVT VT = N->getValueType(0);
23379 EVT OpVT = N->getOperand(0).getValueType();
23380 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23381
23382 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23383
23384 if (!TLI.isTypeLegal(OpIntVT) ||
23385 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23386 return SDValue();
23387
23388 unsigned EltBitwidth = VT.getScalarSizeInBits();
23389 // NOTE: the actual width of operands may be wider than that!
23390
23391 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23392 // active bits they all have? We'll want to truncate them all to that width.
23393 unsigned ActiveBits = 0;
23394 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23395 for (auto I : enumerate(N->ops())) {
23396 SDValue Op = I.value();
23397 // FIXME: support UNDEF elements?
23398 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23399 unsigned OpActiveBits =
23400 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23401 if (OpActiveBits == 0) {
23402 KnownZeroOps.setBit(I.index());
23403 continue;
23404 }
23405 // Profitability check: don't allow non-zero constant operands.
23406 return SDValue();
23407 }
23408 // Profitability check: there must only be a single non-zero operand,
23409 // and it must be the first operand of the BUILD_VECTOR.
23410 if (I.index() != 0)
23411 return SDValue();
23412 // The operand must be a zero-extension itself.
23413 // FIXME: this could be generalized to known leading zeros check.
23414 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23415 return SDValue();
23416 unsigned CurrActiveBits =
23417 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23418 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23419 ActiveBits = CurrActiveBits;
23420 // We want to at least halve the element size.
23421 if (2 * ActiveBits > EltBitwidth)
23422 return SDValue();
23423 }
23424
23425 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23426 if (ActiveBits == 0)
23427 return SDValue();
23428
23429 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23430 // into how many chunks can we split our element width?
23431 EVT NewScalarIntVT, NewIntVT;
23432 std::optional<unsigned> Factor;
23433 // We can split the element into at least two chunks, but not into more
23434 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23435 // for which the element width is a multiple of it,
23436 // and the resulting types/operations on that chunk width are legal.
23437 assert(2 * ActiveBits <= EltBitwidth &&
23438 "We know that half or less bits of the element are active.");
23439 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23440 if (EltBitwidth % Scale != 0)
23441 continue;
23442 unsigned ChunkBitwidth = EltBitwidth / Scale;
23443 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23444 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23445 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23446 Scale * N->getNumOperands());
23447 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23448 (LegalOperations &&
23449 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23451 continue;
23452 Factor = Scale;
23453 break;
23454 }
23455 if (!Factor)
23456 return SDValue();
23457
23458 SDLoc DL(N);
23459 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23460
23461 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23463 NewOps.reserve(NewIntVT.getVectorNumElements());
23464 for (auto I : enumerate(N->ops())) {
23465 SDValue Op = I.value();
23466 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23467 unsigned SrcOpIdx = I.index();
23468 if (KnownZeroOps[SrcOpIdx]) {
23469 NewOps.append(*Factor, ZeroOp);
23470 continue;
23471 }
23472 Op = DAG.getBitcast(OpIntVT, Op);
23473 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23474 NewOps.emplace_back(Op);
23475 NewOps.append(*Factor - 1, ZeroOp);
23476 }
23477 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23478 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23479 NewBV = DAG.getBitcast(VT, NewBV);
23480 return NewBV;
23481}
23482
23483SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23484 EVT VT = N->getValueType(0);
23485
23486 // A vector built entirely of undefs is undef.
23488 return DAG.getUNDEF(VT);
23489
23490 // If this is a splat of a bitcast from another vector, change to a
23491 // concat_vector.
23492 // For example:
23493 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23494 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23495 //
23496 // If X is a build_vector itself, the concat can become a larger build_vector.
23497 // TODO: Maybe this is useful for non-splat too?
23498 if (!LegalOperations) {
23499 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23500 // Only change build_vector to a concat_vector if the splat value type is
23501 // same as the vector element type.
23502 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23504 EVT SrcVT = Splat.getValueType();
23505 if (SrcVT.isVector()) {
23506 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23507 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23508 SrcVT.getVectorElementType(), NumElts);
23509 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23510 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23511 SDValue Concat =
23512 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23513 return DAG.getBitcast(VT, Concat);
23514 }
23515 }
23516 }
23517 }
23518
23519 // Check if we can express BUILD VECTOR via subvector extract.
23520 if (!LegalTypes && (N->getNumOperands() > 1)) {
23521 SDValue Op0 = N->getOperand(0);
23522 auto checkElem = [&](SDValue Op) -> uint64_t {
23523 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23524 (Op0.getOperand(0) == Op.getOperand(0)))
23525 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23526 return CNode->getZExtValue();
23527 return -1;
23528 };
23529
23530 int Offset = checkElem(Op0);
23531 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23532 if (Offset + i != checkElem(N->getOperand(i))) {
23533 Offset = -1;
23534 break;
23535 }
23536 }
23537
23538 if ((Offset == 0) &&
23539 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23540 return Op0.getOperand(0);
23541 if ((Offset != -1) &&
23542 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23543 0)) // IDX must be multiple of output size.
23544 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23545 Op0.getOperand(0), Op0.getOperand(1));
23546 }
23547
23548 if (SDValue V = convertBuildVecZextToZext(N))
23549 return V;
23550
23551 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23552 return V;
23553
23554 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23555 return V;
23556
23557 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23558 return V;
23559
23560 if (SDValue V = reduceBuildVecToShuffle(N))
23561 return V;
23562
23563 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23564 // Do this late as some of the above may replace the splat.
23566 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23567 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23568 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23569 }
23570
23571 return SDValue();
23572}
23573
23575 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23576 EVT OpVT = N->getOperand(0).getValueType();
23577
23578 // If the operands are legal vectors, leave them alone.
23579 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23580 return SDValue();
23581
23582 SDLoc DL(N);
23583 EVT VT = N->getValueType(0);
23585 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23586
23587 // Keep track of what we encounter.
23588 bool AnyInteger = false;
23589 bool AnyFP = false;
23590 for (const SDValue &Op : N->ops()) {
23591 if (ISD::BITCAST == Op.getOpcode() &&
23592 !Op.getOperand(0).getValueType().isVector())
23593 Ops.push_back(Op.getOperand(0));
23594 else if (ISD::UNDEF == Op.getOpcode())
23595 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23596 else
23597 return SDValue();
23598
23599 // Note whether we encounter an integer or floating point scalar.
23600 // If it's neither, bail out, it could be something weird like x86mmx.
23601 EVT LastOpVT = Ops.back().getValueType();
23602 if (LastOpVT.isFloatingPoint())
23603 AnyFP = true;
23604 else if (LastOpVT.isInteger())
23605 AnyInteger = true;
23606 else
23607 return SDValue();
23608 }
23609
23610 // If any of the operands is a floating point scalar bitcast to a vector,
23611 // use floating point types throughout, and bitcast everything.
23612 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23613 if (AnyFP) {
23615 if (AnyInteger) {
23616 for (SDValue &Op : Ops) {
23617 if (Op.getValueType() == SVT)
23618 continue;
23619 if (Op.isUndef())
23620 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23621 else
23622 Op = DAG.getBitcast(SVT, Op);
23623 }
23624 }
23625 }
23626
23627 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23628 VT.getSizeInBits() / SVT.getSizeInBits());
23629 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23630}
23631
23632// Attempt to merge nested concat_vectors/undefs.
23633// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23634// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23636 SelectionDAG &DAG) {
23637 EVT VT = N->getValueType(0);
23638
23639 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23640 EVT SubVT;
23641 SDValue FirstConcat;
23642 for (const SDValue &Op : N->ops()) {
23643 if (Op.isUndef())
23644 continue;
23645 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23646 return SDValue();
23647 if (!FirstConcat) {
23648 SubVT = Op.getOperand(0).getValueType();
23649 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23650 return SDValue();
23651 FirstConcat = Op;
23652 continue;
23653 }
23654 if (SubVT != Op.getOperand(0).getValueType())
23655 return SDValue();
23656 }
23657 assert(FirstConcat && "Concat of all-undefs found");
23658
23659 SmallVector<SDValue> ConcatOps;
23660 for (const SDValue &Op : N->ops()) {
23661 if (Op.isUndef()) {
23662 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23663 continue;
23664 }
23665 ConcatOps.append(Op->op_begin(), Op->op_end());
23666 }
23667 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23668}
23669
23670// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23671// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23672// most two distinct vectors the same size as the result, attempt to turn this
23673// into a legal shuffle.
23675 EVT VT = N->getValueType(0);
23676 EVT OpVT = N->getOperand(0).getValueType();
23677
23678 // We currently can't generate an appropriate shuffle for a scalable vector.
23679 if (VT.isScalableVector())
23680 return SDValue();
23681
23682 int NumElts = VT.getVectorNumElements();
23683 int NumOpElts = OpVT.getVectorNumElements();
23684
23685 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23687
23688 for (SDValue Op : N->ops()) {
23690
23691 // UNDEF nodes convert to UNDEF shuffle mask values.
23692 if (Op.isUndef()) {
23693 Mask.append((unsigned)NumOpElts, -1);
23694 continue;
23695 }
23696
23697 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23698 return SDValue();
23699
23700 // What vector are we extracting the subvector from and at what index?
23701 SDValue ExtVec = Op.getOperand(0);
23702 int ExtIdx = Op.getConstantOperandVal(1);
23703
23704 // We want the EVT of the original extraction to correctly scale the
23705 // extraction index.
23706 EVT ExtVT = ExtVec.getValueType();
23707 ExtVec = peekThroughBitcasts(ExtVec);
23708
23709 // UNDEF nodes convert to UNDEF shuffle mask values.
23710 if (ExtVec.isUndef()) {
23711 Mask.append((unsigned)NumOpElts, -1);
23712 continue;
23713 }
23714
23715 // Ensure that we are extracting a subvector from a vector the same
23716 // size as the result.
23717 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23718 return SDValue();
23719
23720 // Scale the subvector index to account for any bitcast.
23721 int NumExtElts = ExtVT.getVectorNumElements();
23722 if (0 == (NumExtElts % NumElts))
23723 ExtIdx /= (NumExtElts / NumElts);
23724 else if (0 == (NumElts % NumExtElts))
23725 ExtIdx *= (NumElts / NumExtElts);
23726 else
23727 return SDValue();
23728
23729 // At most we can reference 2 inputs in the final shuffle.
23730 if (SV0.isUndef() || SV0 == ExtVec) {
23731 SV0 = ExtVec;
23732 for (int i = 0; i != NumOpElts; ++i)
23733 Mask.push_back(i + ExtIdx);
23734 } else if (SV1.isUndef() || SV1 == ExtVec) {
23735 SV1 = ExtVec;
23736 for (int i = 0; i != NumOpElts; ++i)
23737 Mask.push_back(i + ExtIdx + NumElts);
23738 } else {
23739 return SDValue();
23740 }
23741 }
23742
23743 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23744 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23745 DAG.getBitcast(VT, SV1), Mask, DAG);
23746}
23747
23749 unsigned CastOpcode = N->getOperand(0).getOpcode();
23750 switch (CastOpcode) {
23751 case ISD::SINT_TO_FP:
23752 case ISD::UINT_TO_FP:
23753 case ISD::FP_TO_SINT:
23754 case ISD::FP_TO_UINT:
23755 // TODO: Allow more opcodes?
23756 // case ISD::BITCAST:
23757 // case ISD::TRUNCATE:
23758 // case ISD::ZERO_EXTEND:
23759 // case ISD::SIGN_EXTEND:
23760 // case ISD::FP_EXTEND:
23761 break;
23762 default:
23763 return SDValue();
23764 }
23765
23766 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23767 if (!SrcVT.isVector())
23768 return SDValue();
23769
23770 // All operands of the concat must be the same kind of cast from the same
23771 // source type.
23773 for (SDValue Op : N->ops()) {
23774 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23775 Op.getOperand(0).getValueType() != SrcVT)
23776 return SDValue();
23777 SrcOps.push_back(Op.getOperand(0));
23778 }
23779
23780 // The wider cast must be supported by the target. This is unusual because
23781 // the operation support type parameter depends on the opcode. In addition,
23782 // check the other type in the cast to make sure this is really legal.
23783 EVT VT = N->getValueType(0);
23784 EVT SrcEltVT = SrcVT.getVectorElementType();
23785 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23786 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23787 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23788 switch (CastOpcode) {
23789 case ISD::SINT_TO_FP:
23790 case ISD::UINT_TO_FP:
23791 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23792 !TLI.isTypeLegal(VT))
23793 return SDValue();
23794 break;
23795 case ISD::FP_TO_SINT:
23796 case ISD::FP_TO_UINT:
23797 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23798 !TLI.isTypeLegal(ConcatSrcVT))
23799 return SDValue();
23800 break;
23801 default:
23802 llvm_unreachable("Unexpected cast opcode");
23803 }
23804
23805 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23806 SDLoc DL(N);
23807 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23808 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23809}
23810
23811// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23812// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23813// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23815 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23816 bool LegalOperations) {
23817 EVT VT = N->getValueType(0);
23818 EVT OpVT = N->getOperand(0).getValueType();
23819 if (VT.isScalableVector())
23820 return SDValue();
23821
23822 // For now, only allow simple 2-operand concatenations.
23823 if (N->getNumOperands() != 2)
23824 return SDValue();
23825
23826 // Don't create illegal types/shuffles when not allowed to.
23827 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23828 (LegalOperations &&
23830 return SDValue();
23831
23832 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23833 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23834 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23835 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23836 // (4) and for now, the SHUFFLE_VECTOR must be unary.
23837 ShuffleVectorSDNode *SVN = nullptr;
23838 for (SDValue Op : N->ops()) {
23839 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23840 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23841 all_of(N->ops(), [CurSVN](SDValue Op) {
23842 // FIXME: can we allow UNDEF operands?
23843 return !Op.isUndef() &&
23844 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23845 })) {
23846 SVN = CurSVN;
23847 break;
23848 }
23849 }
23850 if (!SVN)
23851 return SDValue();
23852
23853 // We are going to pad the shuffle operands, so any indice, that was picking
23854 // from the second operand, must be adjusted.
23855 SmallVector<int, 16> AdjustedMask;
23856 AdjustedMask.reserve(SVN->getMask().size());
23857 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23858 append_range(AdjustedMask, SVN->getMask());
23859
23860 // Identity masks for the operands of the (padded) shuffle.
23861 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23862 MutableArrayRef<int> FirstShufOpIdentityMask =
23863 MutableArrayRef<int>(IdentityMask)
23865 MutableArrayRef<int> SecondShufOpIdentityMask =
23867 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
23868 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
23870
23871 // New combined shuffle mask.
23873 Mask.reserve(VT.getVectorNumElements());
23874 for (SDValue Op : N->ops()) {
23875 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
23876 if (Op.getNode() == SVN) {
23877 append_range(Mask, AdjustedMask);
23878 continue;
23879 }
23880 if (Op == SVN->getOperand(0)) {
23881 append_range(Mask, FirstShufOpIdentityMask);
23882 continue;
23883 }
23884 if (Op == SVN->getOperand(1)) {
23885 append_range(Mask, SecondShufOpIdentityMask);
23886 continue;
23887 }
23888 llvm_unreachable("Unexpected operand!");
23889 }
23890
23891 // Don't create illegal shuffle masks.
23892 if (!TLI.isShuffleMaskLegal(Mask, VT))
23893 return SDValue();
23894
23895 // Pad the shuffle operands with UNDEF.
23896 SDLoc dl(N);
23897 std::array<SDValue, 2> ShufOps;
23898 for (auto I : zip(SVN->ops(), ShufOps)) {
23899 SDValue ShufOp = std::get<0>(I);
23900 SDValue &NewShufOp = std::get<1>(I);
23901 if (ShufOp.isUndef())
23902 NewShufOp = DAG.getUNDEF(VT);
23903 else {
23904 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
23905 DAG.getUNDEF(OpVT));
23906 ShufOpParts[0] = ShufOp;
23907 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
23908 }
23909 }
23910 // Finally, create the new wide shuffle.
23911 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
23912}
23913
23914SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
23915 // If we only have one input vector, we don't need to do any concatenation.
23916 if (N->getNumOperands() == 1)
23917 return N->getOperand(0);
23918
23919 // Check if all of the operands are undefs.
23920 EVT VT = N->getValueType(0);
23922 return DAG.getUNDEF(VT);
23923
23924 // Optimize concat_vectors where all but the first of the vectors are undef.
23925 if (all_of(drop_begin(N->ops()),
23926 [](const SDValue &Op) { return Op.isUndef(); })) {
23927 SDValue In = N->getOperand(0);
23928 assert(In.getValueType().isVector() && "Must concat vectors");
23929
23930 // If the input is a concat_vectors, just make a larger concat by padding
23931 // with smaller undefs.
23932 //
23933 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
23934 // here could cause an infinite loop. That legalizing happens when LegalDAG
23935 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
23936 // scalable.
23937 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
23938 !(LegalDAG && In.getValueType().isScalableVector())) {
23939 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
23940 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
23941 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
23942 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23943 }
23944
23946
23947 // concat_vectors(scalar_to_vector(scalar), undef) ->
23948 // scalar_to_vector(scalar)
23949 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23950 Scalar.hasOneUse()) {
23951 EVT SVT = Scalar.getValueType().getVectorElementType();
23952 if (SVT == Scalar.getOperand(0).getValueType())
23953 Scalar = Scalar.getOperand(0);
23954 }
23955
23956 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
23957 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
23958 // If the bitcast type isn't legal, it might be a trunc of a legal type;
23959 // look through the trunc so we can still do the transform:
23960 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
23961 if (Scalar->getOpcode() == ISD::TRUNCATE &&
23962 !TLI.isTypeLegal(Scalar.getValueType()) &&
23963 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
23964 Scalar = Scalar->getOperand(0);
23965
23966 EVT SclTy = Scalar.getValueType();
23967
23968 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
23969 return SDValue();
23970
23971 // Bail out if the vector size is not a multiple of the scalar size.
23972 if (VT.getSizeInBits() % SclTy.getSizeInBits())
23973 return SDValue();
23974
23975 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
23976 if (VNTNumElms < 2)
23977 return SDValue();
23978
23979 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
23980 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
23981 return SDValue();
23982
23983 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
23984 return DAG.getBitcast(VT, Res);
23985 }
23986 }
23987
23988 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
23989 // We have already tested above for an UNDEF only concatenation.
23990 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
23991 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
23992 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
23993 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
23994 };
23995 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
23997 EVT SVT = VT.getScalarType();
23998
23999 EVT MinVT = SVT;
24000 if (!SVT.isFloatingPoint()) {
24001 // If BUILD_VECTOR are from built from integer, they may have different
24002 // operand types. Get the smallest type and truncate all operands to it.
24003 bool FoundMinVT = false;
24004 for (const SDValue &Op : N->ops())
24005 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24006 EVT OpSVT = Op.getOperand(0).getValueType();
24007 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24008 FoundMinVT = true;
24009 }
24010 assert(FoundMinVT && "Concat vector type mismatch");
24011 }
24012
24013 for (const SDValue &Op : N->ops()) {
24014 EVT OpVT = Op.getValueType();
24015 unsigned NumElts = OpVT.getVectorNumElements();
24016
24017 if (ISD::UNDEF == Op.getOpcode())
24018 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24019
24020 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24021 if (SVT.isFloatingPoint()) {
24022 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24023 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24024 } else {
24025 for (unsigned i = 0; i != NumElts; ++i)
24026 Opnds.push_back(
24027 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24028 }
24029 }
24030 }
24031
24032 assert(VT.getVectorNumElements() == Opnds.size() &&
24033 "Concat vector type mismatch");
24034 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24035 }
24036
24037 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24038 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24040 return V;
24041
24042 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24043 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24045 return V;
24046
24047 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24049 return V;
24050 }
24051
24052 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24053 return V;
24054
24056 N, DAG, TLI, LegalTypes, LegalOperations))
24057 return V;
24058
24059 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24060 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24061 // operands and look for a CONCAT operations that place the incoming vectors
24062 // at the exact same location.
24063 //
24064 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24065 SDValue SingleSource = SDValue();
24066 unsigned PartNumElem =
24067 N->getOperand(0).getValueType().getVectorMinNumElements();
24068
24069 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24070 SDValue Op = N->getOperand(i);
24071
24072 if (Op.isUndef())
24073 continue;
24074
24075 // Check if this is the identity extract:
24076 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24077 return SDValue();
24078
24079 // Find the single incoming vector for the extract_subvector.
24080 if (SingleSource.getNode()) {
24081 if (Op.getOperand(0) != SingleSource)
24082 return SDValue();
24083 } else {
24084 SingleSource = Op.getOperand(0);
24085
24086 // Check the source type is the same as the type of the result.
24087 // If not, this concat may extend the vector, so we can not
24088 // optimize it away.
24089 if (SingleSource.getValueType() != N->getValueType(0))
24090 return SDValue();
24091 }
24092
24093 // Check that we are reading from the identity index.
24094 unsigned IdentityIndex = i * PartNumElem;
24095 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24096 return SDValue();
24097 }
24098
24099 if (SingleSource.getNode())
24100 return SingleSource;
24101
24102 return SDValue();
24103}
24104
24105// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24106// if the subvector can be sourced for free.
24108 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24109 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24110 return V.getOperand(1);
24111 }
24112 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24113 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24114 V.getOperand(0).getValueType() == SubVT &&
24115 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24116 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24117 return V.getOperand(SubIdx);
24118 }
24119 return SDValue();
24120}
24121
24123 SelectionDAG &DAG,
24124 bool LegalOperations) {
24125 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24126 SDValue BinOp = Extract->getOperand(0);
24127 unsigned BinOpcode = BinOp.getOpcode();
24128 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24129 return SDValue();
24130
24131 EVT VecVT = BinOp.getValueType();
24132 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24133 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24134 return SDValue();
24135
24136 SDValue Index = Extract->getOperand(1);
24137 EVT SubVT = Extract->getValueType(0);
24138 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24139 return SDValue();
24140
24141 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24142 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24143
24144 // TODO: We could handle the case where only 1 operand is being inserted by
24145 // creating an extract of the other operand, but that requires checking
24146 // number of uses and/or costs.
24147 if (!Sub0 || !Sub1)
24148 return SDValue();
24149
24150 // We are inserting both operands of the wide binop only to extract back
24151 // to the narrow vector size. Eliminate all of the insert/extract:
24152 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24153 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24154 BinOp->getFlags());
24155}
24156
24157/// If we are extracting a subvector produced by a wide binary operator try
24158/// to use a narrow binary operator and/or avoid concatenation and extraction.
24160 bool LegalOperations) {
24161 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24162 // some of these bailouts with other transforms.
24163
24164 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24165 return V;
24166
24167 // The extract index must be a constant, so we can map it to a concat operand.
24168 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24169 if (!ExtractIndexC)
24170 return SDValue();
24171
24172 // We are looking for an optionally bitcasted wide vector binary operator
24173 // feeding an extract subvector.
24174 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24175 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24176 unsigned BOpcode = BinOp.getOpcode();
24177 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24178 return SDValue();
24179
24180 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24181 // reduced to the unary fneg when it is visited, and we probably want to deal
24182 // with fneg in a target-specific way.
24183 if (BOpcode == ISD::FSUB) {
24184 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24185 if (C && C->getValueAPF().isNegZero())
24186 return SDValue();
24187 }
24188
24189 // The binop must be a vector type, so we can extract some fraction of it.
24190 EVT WideBVT = BinOp.getValueType();
24191 // The optimisations below currently assume we are dealing with fixed length
24192 // vectors. It is possible to add support for scalable vectors, but at the
24193 // moment we've done no analysis to prove whether they are profitable or not.
24194 if (!WideBVT.isFixedLengthVector())
24195 return SDValue();
24196
24197 EVT VT = Extract->getValueType(0);
24198 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24199 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24200 "Extract index is not a multiple of the vector length.");
24201
24202 // Bail out if this is not a proper multiple width extraction.
24203 unsigned WideWidth = WideBVT.getSizeInBits();
24204 unsigned NarrowWidth = VT.getSizeInBits();
24205 if (WideWidth % NarrowWidth != 0)
24206 return SDValue();
24207
24208 // Bail out if we are extracting a fraction of a single operation. This can
24209 // occur because we potentially looked through a bitcast of the binop.
24210 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24211 unsigned WideNumElts = WideBVT.getVectorNumElements();
24212 if (WideNumElts % NarrowingRatio != 0)
24213 return SDValue();
24214
24215 // Bail out if the target does not support a narrower version of the binop.
24216 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24217 WideNumElts / NarrowingRatio);
24218 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24219 LegalOperations))
24220 return SDValue();
24221
24222 // If extraction is cheap, we don't need to look at the binop operands
24223 // for concat ops. The narrow binop alone makes this transform profitable.
24224 // We can't just reuse the original extract index operand because we may have
24225 // bitcasted.
24226 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24227 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24228 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24229 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24230 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24231 SDLoc DL(Extract);
24232 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24233 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24234 BinOp.getOperand(0), NewExtIndex);
24235 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24236 BinOp.getOperand(1), NewExtIndex);
24237 SDValue NarrowBinOp =
24238 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24239 return DAG.getBitcast(VT, NarrowBinOp);
24240 }
24241
24242 // Only handle the case where we are doubling and then halving. A larger ratio
24243 // may require more than two narrow binops to replace the wide binop.
24244 if (NarrowingRatio != 2)
24245 return SDValue();
24246
24247 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24248 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24249 // flavors, but no other 256-bit integer support. This could be extended to
24250 // handle any binop, but that may require fixing/adding other folds to avoid
24251 // codegen regressions.
24252 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24253 return SDValue();
24254
24255 // We need at least one concatenation operation of a binop operand to make
24256 // this transform worthwhile. The concat must double the input vector sizes.
24257 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24258 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24259 return V.getOperand(ConcatOpNum);
24260 return SDValue();
24261 };
24262 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24263 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24264
24265 if (SubVecL || SubVecR) {
24266 // If a binop operand was not the result of a concat, we must extract a
24267 // half-sized operand for our new narrow binop:
24268 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24269 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24270 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24271 SDLoc DL(Extract);
24272 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24273 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24274 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24275 BinOp.getOperand(0), IndexC);
24276
24277 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24278 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24279 BinOp.getOperand(1), IndexC);
24280
24281 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24282 return DAG.getBitcast(VT, NarrowBinOp);
24283 }
24284
24285 return SDValue();
24286}
24287
24288/// If we are extracting a subvector from a wide vector load, convert to a
24289/// narrow load to eliminate the extraction:
24290/// (extract_subvector (load wide vector)) --> (load narrow vector)
24292 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24293 if (DAG.getDataLayout().isBigEndian())
24294 return SDValue();
24295
24296 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24297 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24298 return SDValue();
24299
24300 // Allow targets to opt-out.
24301 EVT VT = Extract->getValueType(0);
24302
24303 // We can only create byte sized loads.
24304 if (!VT.isByteSized())
24305 return SDValue();
24306
24307 unsigned Index = Extract->getConstantOperandVal(1);
24308 unsigned NumElts = VT.getVectorMinNumElements();
24309 // A fixed length vector being extracted from a scalable vector
24310 // may not be any *smaller* than the scalable one.
24311 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24312 return SDValue();
24313
24314 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24315 // multiple of the minimum number of elements in the result type.
24316 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24317 "multiple of the result's element count");
24318
24319 // It's fine to use TypeSize here as we know the offset will not be negative.
24320 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24321
24322 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24323 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24324 return SDValue();
24325
24326 // The narrow load will be offset from the base address of the old load if
24327 // we are extracting from something besides index 0 (little-endian).
24328 SDLoc DL(Extract);
24329
24330 // TODO: Use "BaseIndexOffset" to make this more effective.
24331 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24332
24335 MachineMemOperand *MMO;
24336 if (Offset.isScalable()) {
24337 MachinePointerInfo MPI =
24339 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24340 } else
24341 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24342 StoreSize);
24343
24344 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24345 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24346 return NewLd;
24347}
24348
24349/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24350/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24351/// EXTRACT_SUBVECTOR(Op?, ?),
24352/// Mask'))
24353/// iff it is legal and profitable to do so. Notably, the trimmed mask
24354/// (containing only the elements that are extracted)
24355/// must reference at most two subvectors.
24357 SelectionDAG &DAG,
24358 const TargetLowering &TLI,
24359 bool LegalOperations) {
24360 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24361 "Must only be called on EXTRACT_SUBVECTOR's");
24362
24363 SDValue N0 = N->getOperand(0);
24364
24365 // Only deal with non-scalable vectors.
24366 EVT NarrowVT = N->getValueType(0);
24367 EVT WideVT = N0.getValueType();
24368 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24369 return SDValue();
24370
24371 // The operand must be a shufflevector.
24372 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24373 if (!WideShuffleVector)
24374 return SDValue();
24375
24376 // The old shuffleneeds to go away.
24377 if (!WideShuffleVector->hasOneUse())
24378 return SDValue();
24379
24380 // And the narrow shufflevector that we'll form must be legal.
24381 if (LegalOperations &&
24383 return SDValue();
24384
24385 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24386 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24387 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24388 "Extract index is not a multiple of the output vector length.");
24389
24390 int WideNumElts = WideVT.getVectorNumElements();
24391
24392 SmallVector<int, 16> NewMask;
24393 NewMask.reserve(NumEltsExtracted);
24394 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24395 DemandedSubvectors;
24396
24397 // Try to decode the wide mask into narrow mask from at most two subvectors.
24398 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24399 NumEltsExtracted)) {
24400 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24401 "Out-of-bounds shuffle mask?");
24402
24403 if (M < 0) {
24404 // Does not depend on operands, does not require adjustment.
24405 NewMask.emplace_back(M);
24406 continue;
24407 }
24408
24409 // From which operand of the shuffle does this shuffle mask element pick?
24410 int WideShufOpIdx = M / WideNumElts;
24411 // Which element of that operand is picked?
24412 int OpEltIdx = M % WideNumElts;
24413
24414 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24415 "Shuffle mask vector decomposition failure.");
24416
24417 // And which NumEltsExtracted-sized subvector of that operand is that?
24418 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24419 // And which element within that subvector of that operand is that?
24420 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24421
24422 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24423 "Shuffle mask subvector decomposition failure.");
24424
24425 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24426 WideShufOpIdx * WideNumElts) == M &&
24427 "Shuffle mask full decomposition failure.");
24428
24429 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24430
24431 if (Op.isUndef()) {
24432 // Picking from an undef operand. Let's adjust mask instead.
24433 NewMask.emplace_back(-1);
24434 continue;
24435 }
24436
24437 const std::pair<SDValue, int> DemandedSubvector =
24438 std::make_pair(Op, OpSubvecIdx);
24439
24440 if (DemandedSubvectors.insert(DemandedSubvector)) {
24441 if (DemandedSubvectors.size() > 2)
24442 return SDValue(); // We can't handle more than two subvectors.
24443 // How many elements into the WideVT does this subvector start?
24444 int Index = NumEltsExtracted * OpSubvecIdx;
24445 // Bail out if the extraction isn't going to be cheap.
24446 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24447 return SDValue();
24448 }
24449
24450 // Ok, but from which operand of the new shuffle will this element pick?
24451 int NewOpIdx =
24452 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24453 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24454
24455 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24456 NewMask.emplace_back(AdjM);
24457 }
24458 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24459 assert(DemandedSubvectors.size() <= 2 &&
24460 "Should have ended up demanding at most two subvectors.");
24461
24462 // Did we discover that the shuffle does not actually depend on operands?
24463 if (DemandedSubvectors.empty())
24464 return DAG.getUNDEF(NarrowVT);
24465
24466 // Profitability check: only deal with extractions from the first subvector
24467 // unless the mask becomes an identity mask.
24468 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24469 any_of(NewMask, [](int M) { return M < 0; }))
24470 for (auto &DemandedSubvector : DemandedSubvectors)
24471 if (DemandedSubvector.second != 0)
24472 return SDValue();
24473
24474 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24475 // operand[s]/index[es], so there is no point in checking for it's legality.
24476
24477 // Do not turn a legal shuffle into an illegal one.
24478 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24479 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24480 return SDValue();
24481
24482 SDLoc DL(N);
24483
24485 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24486 &DemandedSubvector : DemandedSubvectors) {
24487 // How many elements into the WideVT does this subvector start?
24488 int Index = NumEltsExtracted * DemandedSubvector.second;
24489 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24490 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24491 DemandedSubvector.first, IndexC));
24492 }
24493 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24494 "Should end up with either one or two ops");
24495
24496 // If we ended up with only one operand, pad with an undef.
24497 if (NewOps.size() == 1)
24498 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24499
24500 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24501}
24502
24503SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24504 EVT NVT = N->getValueType(0);
24505 SDValue V = N->getOperand(0);
24506 uint64_t ExtIdx = N->getConstantOperandVal(1);
24507 SDLoc DL(N);
24508
24509 // Extract from UNDEF is UNDEF.
24510 if (V.isUndef())
24511 return DAG.getUNDEF(NVT);
24512
24514 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24515 return NarrowLoad;
24516
24517 // Combine an extract of an extract into a single extract_subvector.
24518 // ext (ext X, C), 0 --> ext X, C
24519 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24520 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24521 V.getConstantOperandVal(1)) &&
24523 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24524 V.getOperand(1));
24525 }
24526 }
24527
24528 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24529 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24530 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24531 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24532 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24533
24534 // extract_subvector(insert_subvector(x,y,c1),c2)
24535 // --> extract_subvector(y,c2-c1)
24536 // iff we're just extracting from the inserted subvector.
24537 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24538 SDValue InsSub = V.getOperand(1);
24539 EVT InsSubVT = InsSub.getValueType();
24540 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24541 unsigned InsIdx = V.getConstantOperandVal(2);
24542 unsigned NumSubElts = NVT.getVectorMinNumElements();
24543 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24544 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24545 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24546 V.getValueType().isFixedLengthVector())
24547 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24548 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24549 }
24550
24551 // Try to move vector bitcast after extract_subv by scaling extraction index:
24552 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24553 if (V.getOpcode() == ISD::BITCAST &&
24554 V.getOperand(0).getValueType().isVector() &&
24555 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24556 SDValue SrcOp = V.getOperand(0);
24557 EVT SrcVT = SrcOp.getValueType();
24558 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24559 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24560 if ((SrcNumElts % DestNumElts) == 0) {
24561 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24562 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24563 EVT NewExtVT =
24564 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24566 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24567 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24568 V.getOperand(0), NewIndex);
24569 return DAG.getBitcast(NVT, NewExtract);
24570 }
24571 }
24572 if ((DestNumElts % SrcNumElts) == 0) {
24573 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24574 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24575 ElementCount NewExtEC =
24576 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24577 EVT ScalarVT = SrcVT.getScalarType();
24578 if ((ExtIdx % DestSrcRatio) == 0) {
24579 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24580 EVT NewExtVT =
24581 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24583 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24584 SDValue NewExtract =
24585 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24586 V.getOperand(0), NewIndex);
24587 return DAG.getBitcast(NVT, NewExtract);
24588 }
24589 if (NewExtEC.isScalar() &&
24591 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24592 SDValue NewExtract =
24593 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24594 V.getOperand(0), NewIndex);
24595 return DAG.getBitcast(NVT, NewExtract);
24596 }
24597 }
24598 }
24599 }
24600 }
24601
24602 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24603 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24604 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24605 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24606 "Concat and extract subvector do not change element type");
24607 assert((ExtIdx % ExtNumElts) == 0 &&
24608 "Extract index is not a multiple of the input vector length.");
24609
24610 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24611 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24612
24613 // If the concatenated source types match this extract, it's a direct
24614 // simplification:
24615 // extract_subvec (concat V1, V2, ...), i --> Vi
24616 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24617 return V.getOperand(ConcatOpIdx);
24618
24619 // If the concatenated source vectors are a multiple length of this extract,
24620 // then extract a fraction of one of those source vectors directly from a
24621 // concat operand. Example:
24622 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24623 // v2i8 extract_subvec v8i8 Y, 6
24624 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24625 ConcatSrcNumElts % ExtNumElts == 0) {
24626 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24627 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24628 "Trying to extract from >1 concat operand?");
24629 assert(NewExtIdx % ExtNumElts == 0 &&
24630 "Extract index is not a multiple of the input vector length.");
24631 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24632 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24633 V.getOperand(ConcatOpIdx), NewIndexC);
24634 }
24635 }
24636
24637 if (SDValue V =
24638 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24639 return V;
24640
24642
24643 // If the input is a build vector. Try to make a smaller build vector.
24644 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24645 EVT InVT = V.getValueType();
24646 unsigned ExtractSize = NVT.getSizeInBits();
24647 unsigned EltSize = InVT.getScalarSizeInBits();
24648 // Only do this if we won't split any elements.
24649 if (ExtractSize % EltSize == 0) {
24650 unsigned NumElems = ExtractSize / EltSize;
24651 EVT EltVT = InVT.getVectorElementType();
24652 EVT ExtractVT =
24653 NumElems == 1 ? EltVT
24654 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24655 if ((Level < AfterLegalizeDAG ||
24656 (NumElems == 1 ||
24657 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24658 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24659 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24660
24661 if (NumElems == 1) {
24662 SDValue Src = V->getOperand(IdxVal);
24663 if (EltVT != Src.getValueType())
24664 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24665 return DAG.getBitcast(NVT, Src);
24666 }
24667
24668 // Extract the pieces from the original build_vector.
24669 SDValue BuildVec =
24670 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24671 return DAG.getBitcast(NVT, BuildVec);
24672 }
24673 }
24674 }
24675
24676 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24677 // Handle only simple case where vector being inserted and vector
24678 // being extracted are of same size.
24679 EVT SmallVT = V.getOperand(1).getValueType();
24680 if (!NVT.bitsEq(SmallVT))
24681 return SDValue();
24682
24683 // Combine:
24684 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24685 // Into:
24686 // indices are equal or bit offsets are equal => V1
24687 // otherwise => (extract_subvec V1, ExtIdx)
24688 uint64_t InsIdx = V.getConstantOperandVal(2);
24689 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24690 ExtIdx * NVT.getScalarSizeInBits()) {
24691 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24692 return SDValue();
24693
24694 return DAG.getBitcast(NVT, V.getOperand(1));
24695 }
24696 return DAG.getNode(
24698 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24699 N->getOperand(1));
24700 }
24701
24702 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24703 return NarrowBOp;
24704
24706 return SDValue(N, 0);
24707
24708 return SDValue();
24709}
24710
24711/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24712/// followed by concatenation. Narrow vector ops may have better performance
24713/// than wide ops, and this can unlock further narrowing of other vector ops.
24714/// Targets can invert this transform later if it is not profitable.
24716 SelectionDAG &DAG) {
24717 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24718 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24719 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24720 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24721 return SDValue();
24722
24723 // Split the wide shuffle mask into halves. Any mask element that is accessing
24724 // operand 1 is offset down to account for narrowing of the vectors.
24725 ArrayRef<int> Mask = Shuf->getMask();
24726 EVT VT = Shuf->getValueType(0);
24727 unsigned NumElts = VT.getVectorNumElements();
24728 unsigned HalfNumElts = NumElts / 2;
24729 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24730 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24731 for (unsigned i = 0; i != NumElts; ++i) {
24732 if (Mask[i] == -1)
24733 continue;
24734 // If we reference the upper (undef) subvector then the element is undef.
24735 if ((Mask[i] % NumElts) >= HalfNumElts)
24736 continue;
24737 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24738 if (i < HalfNumElts)
24739 Mask0[i] = M;
24740 else
24741 Mask1[i - HalfNumElts] = M;
24742 }
24743
24744 // Ask the target if this is a valid transform.
24745 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24746 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24747 HalfNumElts);
24748 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24749 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24750 return SDValue();
24751
24752 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24753 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24754 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24755 SDLoc DL(Shuf);
24756 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24757 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24758 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24759}
24760
24761// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24762// or turn a shuffle of a single concat into simpler shuffle then concat.
24764 EVT VT = N->getValueType(0);
24765 unsigned NumElts = VT.getVectorNumElements();
24766
24767 SDValue N0 = N->getOperand(0);
24768 SDValue N1 = N->getOperand(1);
24769 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24770 ArrayRef<int> Mask = SVN->getMask();
24771
24773 EVT ConcatVT = N0.getOperand(0).getValueType();
24774 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24775 unsigned NumConcats = NumElts / NumElemsPerConcat;
24776
24777 auto IsUndefMaskElt = [](int i) { return i == -1; };
24778
24779 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24780 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24781 // half vector elements.
24782 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24783 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24784 IsUndefMaskElt)) {
24785 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24786 N0.getOperand(1),
24787 Mask.slice(0, NumElemsPerConcat));
24788 N1 = DAG.getUNDEF(ConcatVT);
24789 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24790 }
24791
24792 // Look at every vector that's inserted. We're looking for exact
24793 // subvector-sized copies from a concatenated vector
24794 for (unsigned I = 0; I != NumConcats; ++I) {
24795 unsigned Begin = I * NumElemsPerConcat;
24796 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24797
24798 // Make sure we're dealing with a copy.
24799 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24800 Ops.push_back(DAG.getUNDEF(ConcatVT));
24801 continue;
24802 }
24803
24804 int OpIdx = -1;
24805 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24806 if (IsUndefMaskElt(SubMask[i]))
24807 continue;
24808 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24809 return SDValue();
24810 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24811 if (0 <= OpIdx && EltOpIdx != OpIdx)
24812 return SDValue();
24813 OpIdx = EltOpIdx;
24814 }
24815 assert(0 <= OpIdx && "Unknown concat_vectors op");
24816
24817 if (OpIdx < (int)N0.getNumOperands())
24818 Ops.push_back(N0.getOperand(OpIdx));
24819 else
24820 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24821 }
24822
24823 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24824}
24825
24826// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24827// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24828//
24829// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24830// a simplification in some sense, but it isn't appropriate in general: some
24831// BUILD_VECTORs are substantially cheaper than others. The general case
24832// of a BUILD_VECTOR requires inserting each element individually (or
24833// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24834// all constants is a single constant pool load. A BUILD_VECTOR where each
24835// element is identical is a splat. A BUILD_VECTOR where most of the operands
24836// are undef lowers to a small number of element insertions.
24837//
24838// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24839// We don't fold shuffles where one side is a non-zero constant, and we don't
24840// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24841// non-constant operands. This seems to work out reasonably well in practice.
24843 SelectionDAG &DAG,
24844 const TargetLowering &TLI) {
24845 EVT VT = SVN->getValueType(0);
24846 unsigned NumElts = VT.getVectorNumElements();
24847 SDValue N0 = SVN->getOperand(0);
24848 SDValue N1 = SVN->getOperand(1);
24849
24850 if (!N0->hasOneUse())
24851 return SDValue();
24852
24853 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24854 // discussed above.
24855 if (!N1.isUndef()) {
24856 if (!N1->hasOneUse())
24857 return SDValue();
24858
24859 bool N0AnyConst = isAnyConstantBuildVector(N0);
24860 bool N1AnyConst = isAnyConstantBuildVector(N1);
24861 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24862 return SDValue();
24863 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24864 return SDValue();
24865 }
24866
24867 // If both inputs are splats of the same value then we can safely merge this
24868 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
24869 bool IsSplat = false;
24870 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
24871 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
24872 if (BV0 && BV1)
24873 if (SDValue Splat0 = BV0->getSplatValue())
24874 IsSplat = (Splat0 == BV1->getSplatValue());
24875
24877 SmallSet<SDValue, 16> DuplicateOps;
24878 for (int M : SVN->getMask()) {
24879 SDValue Op = DAG.getUNDEF(VT.getScalarType());
24880 if (M >= 0) {
24881 int Idx = M < (int)NumElts ? M : M - NumElts;
24882 SDValue &S = (M < (int)NumElts ? N0 : N1);
24883 if (S.getOpcode() == ISD::BUILD_VECTOR) {
24884 Op = S.getOperand(Idx);
24885 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24886 SDValue Op0 = S.getOperand(0);
24887 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
24888 } else {
24889 // Operand can't be combined - bail out.
24890 return SDValue();
24891 }
24892 }
24893
24894 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
24895 // generating a splat; semantically, this is fine, but it's likely to
24896 // generate low-quality code if the target can't reconstruct an appropriate
24897 // shuffle.
24898 if (!Op.isUndef() && !isIntOrFPConstant(Op))
24899 if (!IsSplat && !DuplicateOps.insert(Op).second)
24900 return SDValue();
24901
24902 Ops.push_back(Op);
24903 }
24904
24905 // BUILD_VECTOR requires all inputs to be of the same type, find the
24906 // maximum type and extend them all.
24907 EVT SVT = VT.getScalarType();
24908 if (SVT.isInteger())
24909 for (SDValue &Op : Ops)
24910 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
24911 if (SVT != VT.getScalarType())
24912 for (SDValue &Op : Ops)
24913 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
24914 : (TLI.isZExtFree(Op.getValueType(), SVT)
24915 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
24916 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
24917 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
24918}
24919
24920// Match shuffles that can be converted to *_vector_extend_in_reg.
24921// This is often generated during legalization.
24922// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
24923// and returns the EVT to which the extension should be performed.
24924// NOTE: this assumes that the src is the first operand of the shuffle.
24926 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
24927 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24928 bool LegalOperations) {
24929 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24930
24931 // TODO Add support for big-endian when we have a test case.
24932 if (!VT.isInteger() || IsBigEndian)
24933 return std::nullopt;
24934
24935 unsigned NumElts = VT.getVectorNumElements();
24936 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24937
24938 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
24939 // power-of-2 extensions as they are the most likely.
24940 // FIXME: should try Scale == NumElts case too,
24941 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
24942 // The vector width must be a multiple of Scale.
24943 if (NumElts % Scale != 0)
24944 continue;
24945
24946 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
24947 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
24948
24949 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
24950 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
24951 continue;
24952
24953 if (Match(Scale))
24954 return OutVT;
24955 }
24956
24957 return std::nullopt;
24958}
24959
24960// Match shuffles that can be converted to any_vector_extend_in_reg.
24961// This is often generated during legalization.
24962// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
24964 SelectionDAG &DAG,
24965 const TargetLowering &TLI,
24966 bool LegalOperations) {
24967 EVT VT = SVN->getValueType(0);
24968 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24969
24970 // TODO Add support for big-endian when we have a test case.
24971 if (!VT.isInteger() || IsBigEndian)
24972 return SDValue();
24973
24974 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
24975 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
24976 Mask = SVN->getMask()](unsigned Scale) {
24977 for (unsigned i = 0; i != NumElts; ++i) {
24978 if (Mask[i] < 0)
24979 continue;
24980 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
24981 continue;
24982 return false;
24983 }
24984 return true;
24985 };
24986
24987 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
24988 SDValue N0 = SVN->getOperand(0);
24989 // Never create an illegal type. Only create unsupported operations if we
24990 // are pre-legalization.
24991 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24992 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
24993 if (!OutVT)
24994 return SDValue();
24995 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
24996}
24997
24998// Match shuffles that can be converted to zero_extend_vector_inreg.
24999// This is often generated during legalization.
25000// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25002 SelectionDAG &DAG,
25003 const TargetLowering &TLI,
25004 bool LegalOperations) {
25005 bool LegalTypes = true;
25006 EVT VT = SVN->getValueType(0);
25007 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25008 unsigned NumElts = VT.getVectorNumElements();
25009 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25010
25011 // TODO: add support for big-endian when we have a test case.
25012 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25013 if (!VT.isInteger() || IsBigEndian)
25014 return SDValue();
25015
25016 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
25017 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25018 for (int &Indice : Mask) {
25019 if (Indice < 0)
25020 continue;
25021 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25022 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25023 Fn(Indice, OpIdx, OpEltIdx);
25024 }
25025 };
25026
25027 // Which elements of which operand does this shuffle demand?
25028 std::array<APInt, 2> OpsDemandedElts;
25029 for (APInt &OpDemandedElts : OpsDemandedElts)
25030 OpDemandedElts = APInt::getZero(NumElts);
25031 ForEachDecomposedIndice(
25032 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25033 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25034 });
25035
25036 // Element-wise(!), which of these demanded elements are know to be zero?
25037 std::array<APInt, 2> OpsKnownZeroElts;
25038 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25039 std::get<2>(I) =
25040 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25041
25042 // Manifest zeroable element knowledge in the shuffle mask.
25043 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25044 // this is a local invention, but it won't leak into DAG.
25045 // FIXME: should we not manifest them, but just check when matching?
25046 bool HadZeroableElts = false;
25047 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25048 int &Indice, int OpIdx, int OpEltIdx) {
25049 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25050 Indice = -2; // Zeroable element.
25051 HadZeroableElts = true;
25052 }
25053 });
25054
25055 // Don't proceed unless we've refined at least one zeroable mask indice.
25056 // If we didn't, then we are still trying to match the same shuffle mask
25057 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25058 // and evidently failed. Proceeding will lead to endless combine loops.
25059 if (!HadZeroableElts)
25060 return SDValue();
25061
25062 // The shuffle may be more fine-grained than we want. Widen elements first.
25063 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25064 SmallVector<int, 16> ScaledMask;
25065 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25066 assert(Mask.size() >= ScaledMask.size() &&
25067 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25068 int Prescale = Mask.size() / ScaledMask.size();
25069
25070 NumElts = ScaledMask.size();
25071 EltSizeInBits *= Prescale;
25072
25073 EVT PrescaledVT = EVT::getVectorVT(
25074 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25075 NumElts);
25076
25077 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25078 return SDValue();
25079
25080 // For example,
25081 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25082 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25083 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25084 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25085 "Unexpected mask scaling factor.");
25086 ArrayRef<int> Mask = ScaledMask;
25087 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25088 SrcElt != NumSrcElts; ++SrcElt) {
25089 // Analyze the shuffle mask in Scale-sized chunks.
25090 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25091 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25092 Mask = Mask.drop_front(MaskChunk.size());
25093 // The first indice in this chunk must be SrcElt, but not zero!
25094 // FIXME: undef should be fine, but that results in more-defined result.
25095 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25096 return false;
25097 // The rest of the indices in this chunk must be zeros.
25098 // FIXME: undef should be fine, but that results in more-defined result.
25099 if (!all_of(MaskChunk.drop_front(1),
25100 [](int Indice) { return Indice == -2; }))
25101 return false;
25102 }
25103 assert(Mask.empty() && "Did not process the whole mask?");
25104 return true;
25105 };
25106
25107 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25108 for (bool Commuted : {false, true}) {
25109 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25110 if (Commuted)
25112 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25113 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25114 LegalOperations);
25115 if (OutVT)
25116 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25117 DAG.getBitcast(PrescaledVT, Op)));
25118 }
25119 return SDValue();
25120}
25121
25122// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25123// each source element of a large type into the lowest elements of a smaller
25124// destination type. This is often generated during legalization.
25125// If the source node itself was a '*_extend_vector_inreg' node then we should
25126// then be able to remove it.
25128 SelectionDAG &DAG) {
25129 EVT VT = SVN->getValueType(0);
25130 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25131
25132 // TODO Add support for big-endian when we have a test case.
25133 if (!VT.isInteger() || IsBigEndian)
25134 return SDValue();
25135
25137
25138 unsigned Opcode = N0.getOpcode();
25139 if (!ISD::isExtVecInRegOpcode(Opcode))
25140 return SDValue();
25141
25142 SDValue N00 = N0.getOperand(0);
25143 ArrayRef<int> Mask = SVN->getMask();
25144 unsigned NumElts = VT.getVectorNumElements();
25145 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25146 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25147 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25148
25149 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25150 return SDValue();
25151 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25152
25153 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25154 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25155 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25156 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25157 for (unsigned i = 0; i != NumElts; ++i) {
25158 if (Mask[i] < 0)
25159 continue;
25160 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25161 continue;
25162 return false;
25163 }
25164 return true;
25165 };
25166
25167 // At the moment we just handle the case where we've truncated back to the
25168 // same size as before the extension.
25169 // TODO: handle more extension/truncation cases as cases arise.
25170 if (EltSizeInBits != ExtSrcSizeInBits)
25171 return SDValue();
25172
25173 // We can remove *extend_vector_inreg only if the truncation happens at
25174 // the same scale as the extension.
25175 if (isTruncate(ExtScale))
25176 return DAG.getBitcast(VT, N00);
25177
25178 return SDValue();
25179}
25180
25181// Combine shuffles of splat-shuffles of the form:
25182// shuffle (shuffle V, undef, splat-mask), undef, M
25183// If splat-mask contains undef elements, we need to be careful about
25184// introducing undef's in the folded mask which are not the result of composing
25185// the masks of the shuffles.
25187 SelectionDAG &DAG) {
25188 EVT VT = Shuf->getValueType(0);
25189 unsigned NumElts = VT.getVectorNumElements();
25190
25191 if (!Shuf->getOperand(1).isUndef())
25192 return SDValue();
25193
25194 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25195 // in disguise, with all demanded elements being identical.
25196 // FIXME: this can be done per-operand.
25197 if (!Shuf->isSplat()) {
25198 APInt DemandedElts(NumElts, 0);
25199 for (int Idx : Shuf->getMask()) {
25200 if (Idx < 0)
25201 continue; // Ignore sentinel indices.
25202 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25203 DemandedElts.setBit(Idx);
25204 }
25205 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25206 APInt UndefElts;
25207 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25208 // Even if all demanded elements are splat, some of them could be undef.
25209 // Which lowest demanded element is *not* known-undef?
25210 std::optional<unsigned> MinNonUndefIdx;
25211 for (int Idx : Shuf->getMask()) {
25212 if (Idx < 0 || UndefElts[Idx])
25213 continue; // Ignore sentinel indices, and undef elements.
25214 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25215 }
25216 if (!MinNonUndefIdx)
25217 return DAG.getUNDEF(VT); // All undef - result is undef.
25218 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25219 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25220 Shuf->getMask().end());
25221 for (int &Idx : SplatMask) {
25222 if (Idx < 0)
25223 continue; // Passthrough sentinel indices.
25224 // Otherwise, just pick the lowest demanded non-undef element.
25225 // Or sentinel undef, if we know we'd pick a known-undef element.
25226 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25227 }
25228 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25229 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25230 Shuf->getOperand(1), SplatMask);
25231 }
25232 }
25233
25234 // If the inner operand is a known splat with no undefs, just return that directly.
25235 // TODO: Create DemandedElts mask from Shuf's mask.
25236 // TODO: Allow undef elements and merge with the shuffle code below.
25237 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25238 return Shuf->getOperand(0);
25239
25240 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25241 if (!Splat || !Splat->isSplat())
25242 return SDValue();
25243
25244 ArrayRef<int> ShufMask = Shuf->getMask();
25245 ArrayRef<int> SplatMask = Splat->getMask();
25246 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25247
25248 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25249 // every undef mask element in the splat-shuffle has a corresponding undef
25250 // element in the user-shuffle's mask or if the composition of mask elements
25251 // would result in undef.
25252 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25253 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25254 // In this case it is not legal to simplify to the splat-shuffle because we
25255 // may be exposing the users of the shuffle an undef element at index 1
25256 // which was not there before the combine.
25257 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25258 // In this case the composition of masks yields SplatMask, so it's ok to
25259 // simplify to the splat-shuffle.
25260 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25261 // In this case the composed mask includes all undef elements of SplatMask
25262 // and in addition sets element zero to undef. It is safe to simplify to
25263 // the splat-shuffle.
25264 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25265 ArrayRef<int> SplatMask) {
25266 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25267 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25268 SplatMask[UserMask[i]] != -1)
25269 return false;
25270 return true;
25271 };
25272 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25273 return Shuf->getOperand(0);
25274
25275 // Create a new shuffle with a mask that is composed of the two shuffles'
25276 // masks.
25277 SmallVector<int, 32> NewMask;
25278 for (int Idx : ShufMask)
25279 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25280
25281 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25282 Splat->getOperand(0), Splat->getOperand(1),
25283 NewMask);
25284}
25285
25286// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25287// the mask can be treated as a larger type.
25289 SelectionDAG &DAG,
25290 const TargetLowering &TLI,
25291 bool LegalOperations) {
25292 SDValue Op0 = SVN->getOperand(0);
25293 SDValue Op1 = SVN->getOperand(1);
25294 EVT VT = SVN->getValueType(0);
25295 if (Op0.getOpcode() != ISD::BITCAST)
25296 return SDValue();
25297 EVT InVT = Op0.getOperand(0).getValueType();
25298 if (!InVT.isVector() ||
25299 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25300 Op1.getOperand(0).getValueType() != InVT)))
25301 return SDValue();
25303 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25304 return SDValue();
25305
25306 int VTLanes = VT.getVectorNumElements();
25307 int InLanes = InVT.getVectorNumElements();
25308 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25309 (LegalOperations &&
25311 return SDValue();
25312 int Factor = VTLanes / InLanes;
25313
25314 // Check that each group of lanes in the mask are either undef or make a valid
25315 // mask for the wider lane type.
25316 ArrayRef<int> Mask = SVN->getMask();
25317 SmallVector<int> NewMask;
25318 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25319 return SDValue();
25320
25321 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25322 return SDValue();
25323
25324 // Create the new shuffle with the new mask and bitcast it back to the
25325 // original type.
25326 SDLoc DL(SVN);
25327 Op0 = Op0.getOperand(0);
25328 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25329 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25330 return DAG.getBitcast(VT, NewShuf);
25331}
25332
25333/// Combine shuffle of shuffle of the form:
25334/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25336 SelectionDAG &DAG) {
25337 if (!OuterShuf->getOperand(1).isUndef())
25338 return SDValue();
25339 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25340 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25341 return SDValue();
25342
25343 ArrayRef<int> OuterMask = OuterShuf->getMask();
25344 ArrayRef<int> InnerMask = InnerShuf->getMask();
25345 unsigned NumElts = OuterMask.size();
25346 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25347 SmallVector<int, 32> CombinedMask(NumElts, -1);
25348 int SplatIndex = -1;
25349 for (unsigned i = 0; i != NumElts; ++i) {
25350 // Undef lanes remain undef.
25351 int OuterMaskElt = OuterMask[i];
25352 if (OuterMaskElt == -1)
25353 continue;
25354
25355 // Peek through the shuffle masks to get the underlying source element.
25356 int InnerMaskElt = InnerMask[OuterMaskElt];
25357 if (InnerMaskElt == -1)
25358 continue;
25359
25360 // Initialize the splatted element.
25361 if (SplatIndex == -1)
25362 SplatIndex = InnerMaskElt;
25363
25364 // Non-matching index - this is not a splat.
25365 if (SplatIndex != InnerMaskElt)
25366 return SDValue();
25367
25368 CombinedMask[i] = InnerMaskElt;
25369 }
25370 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25371 getSplatIndex(CombinedMask) != -1) &&
25372 "Expected a splat mask");
25373
25374 // TODO: The transform may be a win even if the mask is not legal.
25375 EVT VT = OuterShuf->getValueType(0);
25376 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25377 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25378 return SDValue();
25379
25380 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25381 InnerShuf->getOperand(1), CombinedMask);
25382}
25383
25384/// If the shuffle mask is taking exactly one element from the first vector
25385/// operand and passing through all other elements from the second vector
25386/// operand, return the index of the mask element that is choosing an element
25387/// from the first operand. Otherwise, return -1.
25389 int MaskSize = Mask.size();
25390 int EltFromOp0 = -1;
25391 // TODO: This does not match if there are undef elements in the shuffle mask.
25392 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25393 // removing an instruction (a shuffle), but losing the knowledge that some
25394 // vector lanes are not needed.
25395 for (int i = 0; i != MaskSize; ++i) {
25396 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25397 // We're looking for a shuffle of exactly one element from operand 0.
25398 if (EltFromOp0 != -1)
25399 return -1;
25400 EltFromOp0 = i;
25401 } else if (Mask[i] != i + MaskSize) {
25402 // Nothing from operand 1 can change lanes.
25403 return -1;
25404 }
25405 }
25406 return EltFromOp0;
25407}
25408
25409/// If a shuffle inserts exactly one element from a source vector operand into
25410/// another vector operand and we can access the specified element as a scalar,
25411/// then we can eliminate the shuffle.
25413 SelectionDAG &DAG) {
25414 // First, check if we are taking one element of a vector and shuffling that
25415 // element into another vector.
25416 ArrayRef<int> Mask = Shuf->getMask();
25417 SmallVector<int, 16> CommutedMask(Mask);
25418 SDValue Op0 = Shuf->getOperand(0);
25419 SDValue Op1 = Shuf->getOperand(1);
25420 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25421 if (ShufOp0Index == -1) {
25422 // Commute mask and check again.
25424 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25425 if (ShufOp0Index == -1)
25426 return SDValue();
25427 // Commute operands to match the commuted shuffle mask.
25428 std::swap(Op0, Op1);
25429 Mask = CommutedMask;
25430 }
25431
25432 // The shuffle inserts exactly one element from operand 0 into operand 1.
25433 // Now see if we can access that element as a scalar via a real insert element
25434 // instruction.
25435 // TODO: We can try harder to locate the element as a scalar. Examples: it
25436 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25437 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25438 "Shuffle mask value must be from operand 0");
25439 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25440 return SDValue();
25441
25442 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25443 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25444 return SDValue();
25445
25446 // There's an existing insertelement with constant insertion index, so we
25447 // don't need to check the legality/profitability of a replacement operation
25448 // that differs at most in the constant value. The target should be able to
25449 // lower any of those in a similar way. If not, legalization will expand this
25450 // to a scalar-to-vector plus shuffle.
25451 //
25452 // Note that the shuffle may move the scalar from the position that the insert
25453 // element used. Therefore, our new insert element occurs at the shuffle's
25454 // mask index value, not the insert's index value.
25455 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25456 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25457 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25458 Op1, Op0.getOperand(1), NewInsIndex);
25459}
25460
25461/// If we have a unary shuffle of a shuffle, see if it can be folded away
25462/// completely. This has the potential to lose undef knowledge because the first
25463/// shuffle may not have an undef mask element where the second one does. So
25464/// only call this after doing simplifications based on demanded elements.
25466 // shuf (shuf0 X, Y, Mask0), undef, Mask
25467 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25468 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25469 return SDValue();
25470
25471 ArrayRef<int> Mask = Shuf->getMask();
25472 ArrayRef<int> Mask0 = Shuf0->getMask();
25473 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25474 // Ignore undef elements.
25475 if (Mask[i] == -1)
25476 continue;
25477 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25478
25479 // Is the element of the shuffle operand chosen by this shuffle the same as
25480 // the element chosen by the shuffle operand itself?
25481 if (Mask0[Mask[i]] != Mask0[i])
25482 return SDValue();
25483 }
25484 // Every element of this shuffle is identical to the result of the previous
25485 // shuffle, so we can replace this value.
25486 return Shuf->getOperand(0);
25487}
25488
25489SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25490 EVT VT = N->getValueType(0);
25491 unsigned NumElts = VT.getVectorNumElements();
25492
25493 SDValue N0 = N->getOperand(0);
25494 SDValue N1 = N->getOperand(1);
25495
25496 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25497
25498 // Canonicalize shuffle undef, undef -> undef
25499 if (N0.isUndef() && N1.isUndef())
25500 return DAG.getUNDEF(VT);
25501
25502 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25503
25504 // Canonicalize shuffle v, v -> v, undef
25505 if (N0 == N1)
25506 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25507 createUnaryMask(SVN->getMask(), NumElts));
25508
25509 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25510 if (N0.isUndef())
25511 return DAG.getCommutedVectorShuffle(*SVN);
25512
25513 // Remove references to rhs if it is undef
25514 if (N1.isUndef()) {
25515 bool Changed = false;
25516 SmallVector<int, 8> NewMask;
25517 for (unsigned i = 0; i != NumElts; ++i) {
25518 int Idx = SVN->getMaskElt(i);
25519 if (Idx >= (int)NumElts) {
25520 Idx = -1;
25521 Changed = true;
25522 }
25523 NewMask.push_back(Idx);
25524 }
25525 if (Changed)
25526 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25527 }
25528
25529 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25530 return InsElt;
25531
25532 // A shuffle of a single vector that is a splatted value can always be folded.
25533 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25534 return V;
25535
25536 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25537 return V;
25538
25539 // If it is a splat, check if the argument vector is another splat or a
25540 // build_vector.
25541 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25542 int SplatIndex = SVN->getSplatIndex();
25543 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25544 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25545 // splat (vector_bo L, R), Index -->
25546 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25547 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25548 SDLoc DL(N);
25549 EVT EltVT = VT.getScalarType();
25550 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25551 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25552 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25553 SDValue NewBO =
25554 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25555 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25557 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25558 }
25559
25560 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25561 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25562 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25563 N0.hasOneUse()) {
25564 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25565 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25566
25568 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25569 if (Idx->getAPIntValue() == SplatIndex)
25570 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25571
25572 // Look through a bitcast if LE and splatting lane 0, through to a
25573 // scalar_to_vector or a build_vector.
25574 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25575 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25578 EVT N00VT = N0.getOperand(0).getValueType();
25579 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25580 VT.isInteger() && N00VT.isInteger()) {
25581 EVT InVT =
25584 SDLoc(N), InVT);
25585 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25586 }
25587 }
25588 }
25589
25590 // If this is a bit convert that changes the element type of the vector but
25591 // not the number of vector elements, look through it. Be careful not to
25592 // look though conversions that change things like v4f32 to v2f64.
25593 SDNode *V = N0.getNode();
25594 if (V->getOpcode() == ISD::BITCAST) {
25595 SDValue ConvInput = V->getOperand(0);
25596 if (ConvInput.getValueType().isVector() &&
25597 ConvInput.getValueType().getVectorNumElements() == NumElts)
25598 V = ConvInput.getNode();
25599 }
25600
25601 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25602 assert(V->getNumOperands() == NumElts &&
25603 "BUILD_VECTOR has wrong number of operands");
25604 SDValue Base;
25605 bool AllSame = true;
25606 for (unsigned i = 0; i != NumElts; ++i) {
25607 if (!V->getOperand(i).isUndef()) {
25608 Base = V->getOperand(i);
25609 break;
25610 }
25611 }
25612 // Splat of <u, u, u, u>, return <u, u, u, u>
25613 if (!Base.getNode())
25614 return N0;
25615 for (unsigned i = 0; i != NumElts; ++i) {
25616 if (V->getOperand(i) != Base) {
25617 AllSame = false;
25618 break;
25619 }
25620 }
25621 // Splat of <x, x, x, x>, return <x, x, x, x>
25622 if (AllSame)
25623 return N0;
25624
25625 // Canonicalize any other splat as a build_vector.
25626 SDValue Splatted = V->getOperand(SplatIndex);
25627 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25628 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25629
25630 // We may have jumped through bitcasts, so the type of the
25631 // BUILD_VECTOR may not match the type of the shuffle.
25632 if (V->getValueType(0) != VT)
25633 NewBV = DAG.getBitcast(VT, NewBV);
25634 return NewBV;
25635 }
25636 }
25637
25638 // Simplify source operands based on shuffle mask.
25640 return SDValue(N, 0);
25641
25642 // This is intentionally placed after demanded elements simplification because
25643 // it could eliminate knowledge of undef elements created by this shuffle.
25644 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25645 return ShufOp;
25646
25647 // Match shuffles that can be converted to any_vector_extend_in_reg.
25648 if (SDValue V =
25649 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25650 return V;
25651
25652 // Combine "truncate_vector_in_reg" style shuffles.
25653 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25654 return V;
25655
25656 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25657 Level < AfterLegalizeVectorOps &&
25658 (N1.isUndef() ||
25659 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25660 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25661 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25662 return V;
25663 }
25664
25665 // A shuffle of a concat of the same narrow vector can be reduced to use
25666 // only low-half elements of a concat with undef:
25667 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25668 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25669 N0.getNumOperands() == 2 &&
25670 N0.getOperand(0) == N0.getOperand(1)) {
25671 int HalfNumElts = (int)NumElts / 2;
25672 SmallVector<int, 8> NewMask;
25673 for (unsigned i = 0; i != NumElts; ++i) {
25674 int Idx = SVN->getMaskElt(i);
25675 if (Idx >= HalfNumElts) {
25676 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25677 Idx -= HalfNumElts;
25678 }
25679 NewMask.push_back(Idx);
25680 }
25681 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25682 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25683 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25684 N0.getOperand(0), UndefVec);
25685 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25686 }
25687 }
25688
25689 // See if we can replace a shuffle with an insert_subvector.
25690 // e.g. v2i32 into v8i32:
25691 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25692 // --> insert_subvector(lhs,rhs1,4).
25693 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25695 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25696 // Ensure RHS subvectors are legal.
25697 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25698 EVT SubVT = RHS.getOperand(0).getValueType();
25699 int NumSubVecs = RHS.getNumOperands();
25700 int NumSubElts = SubVT.getVectorNumElements();
25701 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25702 if (!TLI.isTypeLegal(SubVT))
25703 return SDValue();
25704
25705 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25706 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25707 return SDValue();
25708
25709 // Search [NumSubElts] spans for RHS sequence.
25710 // TODO: Can we avoid nested loops to increase performance?
25711 SmallVector<int> InsertionMask(NumElts);
25712 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25713 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25714 // Reset mask to identity.
25715 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25716
25717 // Add subvector insertion.
25718 std::iota(InsertionMask.begin() + SubIdx,
25719 InsertionMask.begin() + SubIdx + NumSubElts,
25720 NumElts + (SubVec * NumSubElts));
25721
25722 // See if the shuffle mask matches the reference insertion mask.
25723 bool MatchingShuffle = true;
25724 for (int i = 0; i != (int)NumElts; ++i) {
25725 int ExpectIdx = InsertionMask[i];
25726 int ActualIdx = Mask[i];
25727 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25728 MatchingShuffle = false;
25729 break;
25730 }
25731 }
25732
25733 if (MatchingShuffle)
25734 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25735 RHS.getOperand(SubVec),
25736 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25737 }
25738 }
25739 return SDValue();
25740 };
25741 ArrayRef<int> Mask = SVN->getMask();
25742 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25743 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25744 return InsertN1;
25745 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25746 SmallVector<int> CommuteMask(Mask);
25748 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25749 return InsertN0;
25750 }
25751 }
25752
25753 // If we're not performing a select/blend shuffle, see if we can convert the
25754 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25755 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25756 bool IsInLaneMask = true;
25757 ArrayRef<int> Mask = SVN->getMask();
25758 SmallVector<int, 16> ClearMask(NumElts, -1);
25759 APInt DemandedLHS = APInt::getZero(NumElts);
25760 APInt DemandedRHS = APInt::getZero(NumElts);
25761 for (int I = 0; I != (int)NumElts; ++I) {
25762 int M = Mask[I];
25763 if (M < 0)
25764 continue;
25765 ClearMask[I] = M == I ? I : (I + NumElts);
25766 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25767 if (M != I) {
25768 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25769 Demanded.setBit(M % NumElts);
25770 }
25771 }
25772 // TODO: Should we try to mask with N1 as well?
25773 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25774 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25775 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25776 SDLoc DL(N);
25779 // Transform the type to a legal type so that the buildvector constant
25780 // elements are not illegal. Make sure that the result is larger than the
25781 // original type, incase the value is split into two (eg i64->i32).
25782 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25783 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25784 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25785 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25786 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25787 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25788 for (int I = 0; I != (int)NumElts; ++I)
25789 if (0 <= Mask[I])
25790 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25791
25792 // See if a clear mask is legal instead of going via
25793 // XformToShuffleWithZero which loses UNDEF mask elements.
25794 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25795 return DAG.getBitcast(
25796 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25797 DAG.getConstant(0, DL, IntVT), ClearMask));
25798
25799 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25800 return DAG.getBitcast(
25801 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25802 DAG.getBuildVector(IntVT, DL, AndMask)));
25803 }
25804 }
25805 }
25806
25807 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25808 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25809 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25810 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25811 return Res;
25812
25813 // If this shuffle only has a single input that is a bitcasted shuffle,
25814 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25815 // back to their original types.
25816 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25817 N1.isUndef() && Level < AfterLegalizeVectorOps &&
25818 TLI.isTypeLegal(VT)) {
25819
25821 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25822 EVT SVT = VT.getScalarType();
25823 EVT InnerVT = BC0->getValueType(0);
25824 EVT InnerSVT = InnerVT.getScalarType();
25825
25826 // Determine which shuffle works with the smaller scalar type.
25827 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25828 EVT ScaleSVT = ScaleVT.getScalarType();
25829
25830 if (TLI.isTypeLegal(ScaleVT) &&
25831 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25832 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25833 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25834 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25835
25836 // Scale the shuffle masks to the smaller scalar type.
25837 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25838 SmallVector<int, 8> InnerMask;
25839 SmallVector<int, 8> OuterMask;
25840 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25841 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25842
25843 // Merge the shuffle masks.
25844 SmallVector<int, 8> NewMask;
25845 for (int M : OuterMask)
25846 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25847
25848 // Test for shuffle mask legality over both commutations.
25849 SDValue SV0 = BC0->getOperand(0);
25850 SDValue SV1 = BC0->getOperand(1);
25851 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25852 if (!LegalMask) {
25853 std::swap(SV0, SV1);
25855 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25856 }
25857
25858 if (LegalMask) {
25859 SV0 = DAG.getBitcast(ScaleVT, SV0);
25860 SV1 = DAG.getBitcast(ScaleVT, SV1);
25861 return DAG.getBitcast(
25862 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25863 }
25864 }
25865 }
25866 }
25867
25868 // Match shuffles of bitcasts, so long as the mask can be treated as the
25869 // larger type.
25870 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
25871 return V;
25872
25873 // Compute the combined shuffle mask for a shuffle with SV0 as the first
25874 // operand, and SV1 as the second operand.
25875 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
25876 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
25877 auto MergeInnerShuffle =
25878 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
25879 ShuffleVectorSDNode *OtherSVN, SDValue N1,
25880 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
25881 SmallVectorImpl<int> &Mask) -> bool {
25882 // Don't try to fold splats; they're likely to simplify somehow, or they
25883 // might be free.
25884 if (OtherSVN->isSplat())
25885 return false;
25886
25887 SV0 = SV1 = SDValue();
25888 Mask.clear();
25889
25890 for (unsigned i = 0; i != NumElts; ++i) {
25891 int Idx = SVN->getMaskElt(i);
25892 if (Idx < 0) {
25893 // Propagate Undef.
25894 Mask.push_back(Idx);
25895 continue;
25896 }
25897
25898 if (Commute)
25899 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
25900
25901 SDValue CurrentVec;
25902 if (Idx < (int)NumElts) {
25903 // This shuffle index refers to the inner shuffle N0. Lookup the inner
25904 // shuffle mask to identify which vector is actually referenced.
25905 Idx = OtherSVN->getMaskElt(Idx);
25906 if (Idx < 0) {
25907 // Propagate Undef.
25908 Mask.push_back(Idx);
25909 continue;
25910 }
25911 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
25912 : OtherSVN->getOperand(1);
25913 } else {
25914 // This shuffle index references an element within N1.
25915 CurrentVec = N1;
25916 }
25917
25918 // Simple case where 'CurrentVec' is UNDEF.
25919 if (CurrentVec.isUndef()) {
25920 Mask.push_back(-1);
25921 continue;
25922 }
25923
25924 // Canonicalize the shuffle index. We don't know yet if CurrentVec
25925 // will be the first or second operand of the combined shuffle.
25926 Idx = Idx % NumElts;
25927 if (!SV0.getNode() || SV0 == CurrentVec) {
25928 // Ok. CurrentVec is the left hand side.
25929 // Update the mask accordingly.
25930 SV0 = CurrentVec;
25931 Mask.push_back(Idx);
25932 continue;
25933 }
25934 if (!SV1.getNode() || SV1 == CurrentVec) {
25935 // Ok. CurrentVec is the right hand side.
25936 // Update the mask accordingly.
25937 SV1 = CurrentVec;
25938 Mask.push_back(Idx + NumElts);
25939 continue;
25940 }
25941
25942 // Last chance - see if the vector is another shuffle and if it
25943 // uses one of the existing candidate shuffle ops.
25944 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
25945 int InnerIdx = CurrentSVN->getMaskElt(Idx);
25946 if (InnerIdx < 0) {
25947 Mask.push_back(-1);
25948 continue;
25949 }
25950 SDValue InnerVec = (InnerIdx < (int)NumElts)
25951 ? CurrentSVN->getOperand(0)
25952 : CurrentSVN->getOperand(1);
25953 if (InnerVec.isUndef()) {
25954 Mask.push_back(-1);
25955 continue;
25956 }
25957 InnerIdx %= NumElts;
25958 if (InnerVec == SV0) {
25959 Mask.push_back(InnerIdx);
25960 continue;
25961 }
25962 if (InnerVec == SV1) {
25963 Mask.push_back(InnerIdx + NumElts);
25964 continue;
25965 }
25966 }
25967
25968 // Bail out if we cannot convert the shuffle pair into a single shuffle.
25969 return false;
25970 }
25971
25972 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25973 return true;
25974
25975 // Avoid introducing shuffles with illegal mask.
25976 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25977 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25978 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25979 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
25980 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
25981 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
25982 if (TLI.isShuffleMaskLegal(Mask, VT))
25983 return true;
25984
25985 std::swap(SV0, SV1);
25987 return TLI.isShuffleMaskLegal(Mask, VT);
25988 };
25989
25990 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25991 // Canonicalize shuffles according to rules:
25992 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
25993 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
25994 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
25995 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25997 // The incoming shuffle must be of the same type as the result of the
25998 // current shuffle.
25999 assert(N1->getOperand(0).getValueType() == VT &&
26000 "Shuffle types don't match");
26001
26002 SDValue SV0 = N1->getOperand(0);
26003 SDValue SV1 = N1->getOperand(1);
26004 bool HasSameOp0 = N0 == SV0;
26005 bool IsSV1Undef = SV1.isUndef();
26006 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26007 // Commute the operands of this shuffle so merging below will trigger.
26008 return DAG.getCommutedVectorShuffle(*SVN);
26009 }
26010
26011 // Canonicalize splat shuffles to the RHS to improve merging below.
26012 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26013 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26014 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26015 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26016 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26017 return DAG.getCommutedVectorShuffle(*SVN);
26018 }
26019
26020 // Try to fold according to rules:
26021 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26022 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26023 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26024 // Don't try to fold shuffles with illegal type.
26025 // Only fold if this shuffle is the only user of the other shuffle.
26026 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26027 for (int i = 0; i != 2; ++i) {
26028 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26029 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26030 // The incoming shuffle must be of the same type as the result of the
26031 // current shuffle.
26032 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26033 assert(OtherSV->getOperand(0).getValueType() == VT &&
26034 "Shuffle types don't match");
26035
26036 SDValue SV0, SV1;
26038 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26039 SV0, SV1, Mask)) {
26040 // Check if all indices in Mask are Undef. In case, propagate Undef.
26041 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26042 return DAG.getUNDEF(VT);
26043
26044 return DAG.getVectorShuffle(VT, SDLoc(N),
26045 SV0 ? SV0 : DAG.getUNDEF(VT),
26046 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26047 }
26048 }
26049 }
26050
26051 // Merge shuffles through binops if we are able to merge it with at least
26052 // one other shuffles.
26053 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26054 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26055 unsigned SrcOpcode = N0.getOpcode();
26056 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26057 (N1.isUndef() ||
26058 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26059 // Get binop source ops, or just pass on the undef.
26060 SDValue Op00 = N0.getOperand(0);
26061 SDValue Op01 = N0.getOperand(1);
26062 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26063 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26064 // TODO: We might be able to relax the VT check but we don't currently
26065 // have any isBinOp() that has different result/ops VTs so play safe until
26066 // we have test coverage.
26067 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26068 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26069 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26070 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26071 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26072 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26073 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26074 SmallVectorImpl<int> &Mask, bool LeftOp,
26075 bool Commute) {
26076 SDValue InnerN = Commute ? N1 : N0;
26077 SDValue Op0 = LeftOp ? Op00 : Op01;
26078 SDValue Op1 = LeftOp ? Op10 : Op11;
26079 if (Commute)
26080 std::swap(Op0, Op1);
26081 // Only accept the merged shuffle if we don't introduce undef elements,
26082 // or the inner shuffle already contained undef elements.
26083 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26084 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26085 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26086 Mask) &&
26087 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26088 llvm::none_of(Mask, [](int M) { return M < 0; }));
26089 };
26090
26091 // Ensure we don't increase the number of shuffles - we must merge a
26092 // shuffle from at least one of the LHS and RHS ops.
26093 bool MergedLeft = false;
26094 SDValue LeftSV0, LeftSV1;
26095 SmallVector<int, 4> LeftMask;
26096 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26097 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26098 MergedLeft = true;
26099 } else {
26100 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26101 LeftSV0 = Op00, LeftSV1 = Op10;
26102 }
26103
26104 bool MergedRight = false;
26105 SDValue RightSV0, RightSV1;
26106 SmallVector<int, 4> RightMask;
26107 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26108 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26109 MergedRight = true;
26110 } else {
26111 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26112 RightSV0 = Op01, RightSV1 = Op11;
26113 }
26114
26115 if (MergedLeft || MergedRight) {
26116 SDLoc DL(N);
26118 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26119 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26121 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26122 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26123 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26124 }
26125 }
26126 }
26127 }
26128
26129 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26130 return V;
26131
26132 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26133 // Perform this really late, because it could eliminate knowledge
26134 // of undef elements created by this shuffle.
26135 if (Level < AfterLegalizeTypes)
26136 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26137 LegalOperations))
26138 return V;
26139
26140 return SDValue();
26141}
26142
26143SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26144 EVT VT = N->getValueType(0);
26145 if (!VT.isFixedLengthVector())
26146 return SDValue();
26147
26148 // Try to convert a scalar binop with an extracted vector element to a vector
26149 // binop. This is intended to reduce potentially expensive register moves.
26150 // TODO: Check if both operands are extracted.
26151 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26152 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26153 SDValue Scalar = N->getOperand(0);
26154 unsigned Opcode = Scalar.getOpcode();
26155 EVT VecEltVT = VT.getScalarType();
26156 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26157 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26158 Scalar.getOperand(0).getValueType() == VecEltVT &&
26159 Scalar.getOperand(1).getValueType() == VecEltVT &&
26160 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26161 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26162 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26163 // Match an extract element and get a shuffle mask equivalent.
26164 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26165
26166 for (int i : {0, 1}) {
26167 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26168 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26169 SDValue EE = Scalar.getOperand(i);
26170 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26171 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26172 EE.getOperand(0).getValueType() == VT &&
26173 isa<ConstantSDNode>(EE.getOperand(1))) {
26174 // Mask = {ExtractIndex, undef, undef....}
26175 ShufMask[0] = EE.getConstantOperandVal(1);
26176 // Make sure the shuffle is legal if we are crossing lanes.
26177 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26178 SDLoc DL(N);
26179 SDValue V[] = {EE.getOperand(0),
26180 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26181 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26182 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26183 ShufMask);
26184 }
26185 }
26186 }
26187 }
26188
26189 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26190 // with a VECTOR_SHUFFLE and possible truncate.
26191 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26192 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26193 return SDValue();
26194
26195 // If we have an implicit truncate, truncate here if it is legal.
26196 if (VecEltVT != Scalar.getValueType() &&
26197 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26198 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26199 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26200 }
26201
26202 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26203 if (!ExtIndexC)
26204 return SDValue();
26205
26206 SDValue SrcVec = Scalar.getOperand(0);
26207 EVT SrcVT = SrcVec.getValueType();
26208 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26209 unsigned VTNumElts = VT.getVectorNumElements();
26210 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26211 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26212 SmallVector<int, 8> Mask(SrcNumElts, -1);
26213 Mask[0] = ExtIndexC->getZExtValue();
26214 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26215 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26216 if (!LegalShuffle)
26217 return SDValue();
26218
26219 // If the initial vector is the same size, the shuffle is the result.
26220 if (VT == SrcVT)
26221 return LegalShuffle;
26222
26223 // If not, shorten the shuffled vector.
26224 if (VTNumElts != SrcNumElts) {
26225 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26226 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26227 SrcVT.getVectorElementType(), VTNumElts);
26228 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26229 ZeroIdx);
26230 }
26231 }
26232
26233 return SDValue();
26234}
26235
26236SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26237 EVT VT = N->getValueType(0);
26238 SDValue N0 = N->getOperand(0);
26239 SDValue N1 = N->getOperand(1);
26240 SDValue N2 = N->getOperand(2);
26241 uint64_t InsIdx = N->getConstantOperandVal(2);
26242
26243 // If inserting an UNDEF, just return the original vector.
26244 if (N1.isUndef())
26245 return N0;
26246
26247 // If this is an insert of an extracted vector into an undef vector, we can
26248 // just use the input to the extract if the types match, and can simplify
26249 // in some cases even if they don't.
26250 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26251 N1.getOperand(1) == N2) {
26252 EVT SrcVT = N1.getOperand(0).getValueType();
26253 if (SrcVT == VT)
26254 return N1.getOperand(0);
26255 // TODO: To remove the zero check, need to adjust the offset to
26256 // a multiple of the new src type.
26257 if (isNullConstant(N2) &&
26258 VT.isScalableVector() == SrcVT.isScalableVector()) {
26260 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26261 VT, N0, N1.getOperand(0), N2);
26262 else
26264 VT, N1.getOperand(0), N2);
26265 }
26266 }
26267
26268 // Handle case where we've ended up inserting back into the source vector
26269 // we extracted the subvector from.
26270 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26271 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26272 N1.getOperand(1) == N2)
26273 return N0;
26274
26275 // Simplify scalar inserts into an undef vector:
26276 // insert_subvector undef, (splat X), N2 -> splat X
26277 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26278 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26279 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26280
26281 // If we are inserting a bitcast value into an undef, with the same
26282 // number of elements, just use the bitcast input of the extract.
26283 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26284 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26285 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26287 N1.getOperand(0).getOperand(1) == N2 &&
26289 VT.getVectorElementCount() &&
26291 VT.getSizeInBits()) {
26292 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26293 }
26294
26295 // If both N1 and N2 are bitcast values on which insert_subvector
26296 // would makes sense, pull the bitcast through.
26297 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26298 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26299 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26300 SDValue CN0 = N0.getOperand(0);
26301 SDValue CN1 = N1.getOperand(0);
26302 EVT CN0VT = CN0.getValueType();
26303 EVT CN1VT = CN1.getValueType();
26304 if (CN0VT.isVector() && CN1VT.isVector() &&
26305 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26307 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26308 CN0.getValueType(), CN0, CN1, N2);
26309 return DAG.getBitcast(VT, NewINSERT);
26310 }
26311 }
26312
26313 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26314 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26315 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26316 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26317 N0.getOperand(1).getValueType() == N1.getValueType() &&
26318 N0.getOperand(2) == N2)
26319 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26320 N1, N2);
26321
26322 // Eliminate an intermediate insert into an undef vector:
26323 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26324 // insert_subvector undef, X, 0
26325 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26326 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26327 isNullConstant(N2))
26328 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26329 N1.getOperand(1), N2);
26330
26331 // Push subvector bitcasts to the output, adjusting the index as we go.
26332 // insert_subvector(bitcast(v), bitcast(s), c1)
26333 // -> bitcast(insert_subvector(v, s, c2))
26334 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26335 N1.getOpcode() == ISD::BITCAST) {
26336 SDValue N0Src = peekThroughBitcasts(N0);
26337 SDValue N1Src = peekThroughBitcasts(N1);
26338 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26339 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26340 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26341 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26342 EVT NewVT;
26343 SDLoc DL(N);
26344 SDValue NewIdx;
26345 LLVMContext &Ctx = *DAG.getContext();
26346 ElementCount NumElts = VT.getVectorElementCount();
26347 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26348 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26349 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26350 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26351 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26352 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26353 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26354 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26355 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26356 NumElts.divideCoefficientBy(Scale));
26357 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26358 }
26359 }
26360 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26361 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26362 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26363 return DAG.getBitcast(VT, Res);
26364 }
26365 }
26366 }
26367
26368 // Canonicalize insert_subvector dag nodes.
26369 // Example:
26370 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26371 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26372 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26373 N1.getValueType() == N0.getOperand(1).getValueType()) {
26374 unsigned OtherIdx = N0.getConstantOperandVal(2);
26375 if (InsIdx < OtherIdx) {
26376 // Swap nodes.
26377 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26378 N0.getOperand(0), N1, N2);
26379 AddToWorklist(NewOp.getNode());
26380 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26381 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26382 }
26383 }
26384
26385 // If the input vector is a concatenation, and the insert replaces
26386 // one of the pieces, we can optimize into a single concat_vectors.
26387 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26388 N0.getOperand(0).getValueType() == N1.getValueType() &&
26391 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26392 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26393 Ops[InsIdx / Factor] = N1;
26394 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26395 }
26396
26397 // Simplify source operands based on insertion.
26399 return SDValue(N, 0);
26400
26401 return SDValue();
26402}
26403
26404SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26405 SDValue N0 = N->getOperand(0);
26406
26407 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26408 if (N0->getOpcode() == ISD::FP16_TO_FP)
26409 return N0->getOperand(0);
26410
26411 return SDValue();
26412}
26413
26414SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26415 auto Op = N->getOpcode();
26417 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26418 SDValue N0 = N->getOperand(0);
26419
26420 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26421 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26422 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26424 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26425 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26426 }
26427 }
26428
26429 return SDValue();
26430}
26431
26432SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26433 SDValue N0 = N->getOperand(0);
26434
26435 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26436 if (N0->getOpcode() == ISD::BF16_TO_FP)
26437 return N0->getOperand(0);
26438
26439 return SDValue();
26440}
26441
26442SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26443 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26444 return visitFP16_TO_FP(N);
26445}
26446
26447SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26448 SDValue N0 = N->getOperand(0);
26449 EVT VT = N0.getValueType();
26450 unsigned Opcode = N->getOpcode();
26451
26452 // VECREDUCE over 1-element vector is just an extract.
26453 if (VT.getVectorElementCount().isScalar()) {
26454 SDLoc dl(N);
26455 SDValue Res =
26457 DAG.getVectorIdxConstant(0, dl));
26458 if (Res.getValueType() != N->getValueType(0))
26459 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26460 return Res;
26461 }
26462
26463 // On an boolean vector an and/or reduction is the same as a umin/umax
26464 // reduction. Convert them if the latter is legal while the former isn't.
26465 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26466 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26468 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26469 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26471 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26472 }
26473
26474 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26475 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26476 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26477 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26478 SDValue Vec = N0.getOperand(0);
26479 SDValue Subvec = N0.getOperand(1);
26480 if ((Opcode == ISD::VECREDUCE_OR &&
26481 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26482 (Opcode == ISD::VECREDUCE_AND &&
26483 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26484 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26485 }
26486
26487 return SDValue();
26488}
26489
26490SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26491 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26492
26493 // FSUB -> FMA combines:
26494 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26495 AddToWorklist(Fused.getNode());
26496 return Fused;
26497 }
26498 return SDValue();
26499}
26500
26501SDValue DAGCombiner::visitVPOp(SDNode *N) {
26502
26503 if (N->getOpcode() == ISD::VP_GATHER)
26504 if (SDValue SD = visitVPGATHER(N))
26505 return SD;
26506
26507 if (N->getOpcode() == ISD::VP_SCATTER)
26508 if (SDValue SD = visitVPSCATTER(N))
26509 return SD;
26510
26511 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26512 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26513 return SD;
26514
26515 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26516 if (SDValue SD = visitVP_STRIDED_STORE(N))
26517 return SD;
26518
26519 // VP operations in which all vector elements are disabled - either by
26520 // determining that the mask is all false or that the EVL is 0 - can be
26521 // eliminated.
26522 bool AreAllEltsDisabled = false;
26523 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26524 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26525 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26526 AreAllEltsDisabled |=
26527 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26528
26529 // This is the only generic VP combine we support for now.
26530 if (!AreAllEltsDisabled) {
26531 switch (N->getOpcode()) {
26532 case ISD::VP_FADD:
26533 return visitVP_FADD(N);
26534 case ISD::VP_FSUB:
26535 return visitVP_FSUB(N);
26536 case ISD::VP_FMA:
26537 return visitFMA<VPMatchContext>(N);
26538 case ISD::VP_SELECT:
26539 return visitVP_SELECT(N);
26540 }
26541 return SDValue();
26542 }
26543
26544 // Binary operations can be replaced by UNDEF.
26545 if (ISD::isVPBinaryOp(N->getOpcode()))
26546 return DAG.getUNDEF(N->getValueType(0));
26547
26548 // VP Memory operations can be replaced by either the chain (stores) or the
26549 // chain + undef (loads).
26550 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26551 if (MemSD->writeMem())
26552 return MemSD->getChain();
26553 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26554 }
26555
26556 // Reduction operations return the start operand when no elements are active.
26557 if (ISD::isVPReduction(N->getOpcode()))
26558 return N->getOperand(0);
26559
26560 return SDValue();
26561}
26562
26563SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26564 SDValue Chain = N->getOperand(0);
26565 SDValue Ptr = N->getOperand(1);
26566 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26567
26568 // Check if the memory, where FP state is written to, is used only in a single
26569 // load operation.
26570 LoadSDNode *LdNode = nullptr;
26571 for (auto *U : Ptr->uses()) {
26572 if (U == N)
26573 continue;
26574 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26575 if (LdNode && LdNode != Ld)
26576 return SDValue();
26577 LdNode = Ld;
26578 continue;
26579 }
26580 return SDValue();
26581 }
26582 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26583 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26585 return SDValue();
26586
26587 // Check if the loaded value is used only in a store operation.
26588 StoreSDNode *StNode = nullptr;
26589 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26590 SDUse &U = I.getUse();
26591 if (U.getResNo() == 0) {
26592 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26593 if (StNode)
26594 return SDValue();
26595 StNode = St;
26596 } else {
26597 return SDValue();
26598 }
26599 }
26600 }
26601 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26602 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26603 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26604 return SDValue();
26605
26606 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26607 // environment.
26608 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26609 StNode->getMemOperand());
26610 CombineTo(StNode, Res, false);
26611 return Res;
26612}
26613
26614SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26615 SDValue Chain = N->getOperand(0);
26616 SDValue Ptr = N->getOperand(1);
26617 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26618
26619 // Check if the address of FP state is used also in a store operation only.
26620 StoreSDNode *StNode = nullptr;
26621 for (auto *U : Ptr->uses()) {
26622 if (U == N)
26623 continue;
26624 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26625 if (StNode && StNode != St)
26626 return SDValue();
26627 StNode = St;
26628 continue;
26629 }
26630 return SDValue();
26631 }
26632 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26633 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26634 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26635 return SDValue();
26636
26637 // Check if the stored value is loaded from some location and the loaded
26638 // value is used only in the store operation.
26639 SDValue StValue = StNode->getValue();
26640 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26641 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26642 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26643 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26644 return SDValue();
26645
26646 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26647 // environment.
26648 SDValue Res =
26649 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26650 LdNode->getMemOperand());
26651 return Res;
26652}
26653
26654/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26655/// with the destination vector and a zero vector.
26656/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26657/// vector_shuffle V, Zero, <0, 4, 2, 4>
26658SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26659 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26660
26661 EVT VT = N->getValueType(0);
26662 SDValue LHS = N->getOperand(0);
26663 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26664 SDLoc DL(N);
26665
26666 // Make sure we're not running after operation legalization where it
26667 // may have custom lowered the vector shuffles.
26668 if (LegalOperations)
26669 return SDValue();
26670
26671 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26672 return SDValue();
26673
26674 EVT RVT = RHS.getValueType();
26675 unsigned NumElts = RHS.getNumOperands();
26676
26677 // Attempt to create a valid clear mask, splitting the mask into
26678 // sub elements and checking to see if each is
26679 // all zeros or all ones - suitable for shuffle masking.
26680 auto BuildClearMask = [&](int Split) {
26681 int NumSubElts = NumElts * Split;
26682 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26683
26684 SmallVector<int, 8> Indices;
26685 for (int i = 0; i != NumSubElts; ++i) {
26686 int EltIdx = i / Split;
26687 int SubIdx = i % Split;
26688 SDValue Elt = RHS.getOperand(EltIdx);
26689 // X & undef --> 0 (not undef). So this lane must be converted to choose
26690 // from the zero constant vector (same as if the element had all 0-bits).
26691 if (Elt.isUndef()) {
26692 Indices.push_back(i + NumSubElts);
26693 continue;
26694 }
26695
26696 APInt Bits;
26697 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26698 Bits = Cst->getAPIntValue();
26699 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26700 Bits = CstFP->getValueAPF().bitcastToAPInt();
26701 else
26702 return SDValue();
26703
26704 // Extract the sub element from the constant bit mask.
26705 if (DAG.getDataLayout().isBigEndian())
26706 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26707 else
26708 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26709
26710 if (Bits.isAllOnes())
26711 Indices.push_back(i);
26712 else if (Bits == 0)
26713 Indices.push_back(i + NumSubElts);
26714 else
26715 return SDValue();
26716 }
26717
26718 // Let's see if the target supports this vector_shuffle.
26719 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26720 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26721 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26722 return SDValue();
26723
26724 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26725 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26726 DAG.getBitcast(ClearVT, LHS),
26727 Zero, Indices));
26728 };
26729
26730 // Determine maximum split level (byte level masking).
26731 int MaxSplit = 1;
26732 if (RVT.getScalarSizeInBits() % 8 == 0)
26733 MaxSplit = RVT.getScalarSizeInBits() / 8;
26734
26735 for (int Split = 1; Split <= MaxSplit; ++Split)
26736 if (RVT.getScalarSizeInBits() % Split == 0)
26737 if (SDValue S = BuildClearMask(Split))
26738 return S;
26739
26740 return SDValue();
26741}
26742
26743/// If a vector binop is performed on splat values, it may be profitable to
26744/// extract, scalarize, and insert/splat.
26746 const SDLoc &DL) {
26747 SDValue N0 = N->getOperand(0);
26748 SDValue N1 = N->getOperand(1);
26749 unsigned Opcode = N->getOpcode();
26750 EVT VT = N->getValueType(0);
26751 EVT EltVT = VT.getVectorElementType();
26752 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26753
26754 // TODO: Remove/replace the extract cost check? If the elements are available
26755 // as scalars, then there may be no extract cost. Should we ask if
26756 // inserting a scalar back into a vector is cheap instead?
26757 int Index0, Index1;
26758 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26759 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26760 // Extract element from splat_vector should be free.
26761 // TODO: use DAG.isSplatValue instead?
26762 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26764 if (!Src0 || !Src1 || Index0 != Index1 ||
26765 Src0.getValueType().getVectorElementType() != EltVT ||
26766 Src1.getValueType().getVectorElementType() != EltVT ||
26767 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26768 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26769 return SDValue();
26770
26771 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26772 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26773 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26774 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26775
26776 // If all lanes but 1 are undefined, no need to splat the scalar result.
26777 // TODO: Keep track of undefs and use that info in the general case.
26778 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26779 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26780 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26781 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26782 // build_vec ..undef, (bo X, Y), undef...
26784 Ops[Index0] = ScalarBO;
26785 return DAG.getBuildVector(VT, DL, Ops);
26786 }
26787
26788 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26789 return DAG.getSplat(VT, DL, ScalarBO);
26790}
26791
26792/// Visit a vector cast operation, like FP_EXTEND.
26793SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26794 EVT VT = N->getValueType(0);
26795 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26796 EVT EltVT = VT.getVectorElementType();
26797 unsigned Opcode = N->getOpcode();
26798
26799 SDValue N0 = N->getOperand(0);
26800 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26801
26802 // TODO: promote operation might be also good here?
26803 int Index0;
26804 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26805 if (Src0 &&
26806 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26807 TLI.isExtractVecEltCheap(VT, Index0)) &&
26808 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26809 TLI.preferScalarizeSplat(N)) {
26810 EVT SrcVT = N0.getValueType();
26811 EVT SrcEltVT = SrcVT.getVectorElementType();
26812 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26813 SDValue Elt =
26814 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26815 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26816 if (VT.isScalableVector())
26817 return DAG.getSplatVector(VT, DL, ScalarBO);
26819 return DAG.getBuildVector(VT, DL, Ops);
26820 }
26821
26822 return SDValue();
26823}
26824
26825/// Visit a binary vector operation, like ADD.
26826SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26827 EVT VT = N->getValueType(0);
26828 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26829
26830 SDValue LHS = N->getOperand(0);
26831 SDValue RHS = N->getOperand(1);
26832 unsigned Opcode = N->getOpcode();
26833 SDNodeFlags Flags = N->getFlags();
26834
26835 // Move unary shuffles with identical masks after a vector binop:
26836 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26837 // --> shuffle (VBinOp A, B), Undef, Mask
26838 // This does not require type legality checks because we are creating the
26839 // same types of operations that are in the original sequence. We do have to
26840 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26841 // though. This code is adapted from the identical transform in instcombine.
26842 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26843 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26844 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26845 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26846 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26847 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26848 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26849 RHS.getOperand(0), Flags);
26850 SDValue UndefV = LHS.getOperand(1);
26851 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26852 }
26853
26854 // Try to sink a splat shuffle after a binop with a uniform constant.
26855 // This is limited to cases where neither the shuffle nor the constant have
26856 // undefined elements because that could be poison-unsafe or inhibit
26857 // demanded elements analysis. It is further limited to not change a splat
26858 // of an inserted scalar because that may be optimized better by
26859 // load-folding or other target-specific behaviors.
26860 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26861 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26862 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26863 // binop (splat X), (splat C) --> splat (binop X, C)
26864 SDValue X = Shuf0->getOperand(0);
26865 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26866 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26867 Shuf0->getMask());
26868 }
26869 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
26870 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
26871 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26872 // binop (splat C), (splat X) --> splat (binop C, X)
26873 SDValue X = Shuf1->getOperand(0);
26874 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
26875 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26876 Shuf1->getMask());
26877 }
26878 }
26879
26880 // The following pattern is likely to emerge with vector reduction ops. Moving
26881 // the binary operation ahead of insertion may allow using a narrower vector
26882 // instruction that has better performance than the wide version of the op:
26883 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
26884 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
26885 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
26886 LHS.getOperand(2) == RHS.getOperand(2) &&
26887 (LHS.hasOneUse() || RHS.hasOneUse())) {
26888 SDValue X = LHS.getOperand(1);
26889 SDValue Y = RHS.getOperand(1);
26890 SDValue Z = LHS.getOperand(2);
26891 EVT NarrowVT = X.getValueType();
26892 if (NarrowVT == Y.getValueType() &&
26893 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
26894 LegalOperations)) {
26895 // (binop undef, undef) may not return undef, so compute that result.
26896 SDValue VecC =
26897 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
26898 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
26899 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
26900 }
26901 }
26902
26903 // Make sure all but the first op are undef or constant.
26904 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
26905 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
26906 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
26907 return Op.isUndef() ||
26908 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
26909 });
26910 };
26911
26912 // The following pattern is likely to emerge with vector reduction ops. Moving
26913 // the binary operation ahead of the concat may allow using a narrower vector
26914 // instruction that has better performance than the wide version of the op:
26915 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
26916 // concat (VBinOp X, Y), VecC
26917 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
26918 (LHS.hasOneUse() || RHS.hasOneUse())) {
26919 EVT NarrowVT = LHS.getOperand(0).getValueType();
26920 if (NarrowVT == RHS.getOperand(0).getValueType() &&
26921 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
26922 unsigned NumOperands = LHS.getNumOperands();
26923 SmallVector<SDValue, 4> ConcatOps;
26924 for (unsigned i = 0; i != NumOperands; ++i) {
26925 // This constant fold for operands 1 and up.
26926 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
26927 RHS.getOperand(i)));
26928 }
26929
26930 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
26931 }
26932 }
26933
26934 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
26935 return V;
26936
26937 return SDValue();
26938}
26939
26940SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
26941 SDValue N2) {
26942 assert(N0.getOpcode() == ISD::SETCC &&
26943 "First argument must be a SetCC node!");
26944
26945 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
26946 cast<CondCodeSDNode>(N0.getOperand(2))->get());
26947
26948 // If we got a simplified select_cc node back from SimplifySelectCC, then
26949 // break it down into a new SETCC node, and a new SELECT node, and then return
26950 // the SELECT node, since we were called with a SELECT node.
26951 if (SCC.getNode()) {
26952 // Check to see if we got a select_cc back (to turn into setcc/select).
26953 // Otherwise, just return whatever node we got back, like fabs.
26954 if (SCC.getOpcode() == ISD::SELECT_CC) {
26955 const SDNodeFlags Flags = N0->getFlags();
26957 N0.getValueType(),
26958 SCC.getOperand(0), SCC.getOperand(1),
26959 SCC.getOperand(4), Flags);
26960 AddToWorklist(SETCC.getNode());
26961 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
26962 SCC.getOperand(2), SCC.getOperand(3));
26963 SelectNode->setFlags(Flags);
26964 return SelectNode;
26965 }
26966
26967 return SCC;
26968 }
26969 return SDValue();
26970}
26971
26972/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
26973/// being selected between, see if we can simplify the select. Callers of this
26974/// should assume that TheSelect is deleted if this returns true. As such, they
26975/// should return the appropriate thing (e.g. the node) back to the top-level of
26976/// the DAG combiner loop to avoid it being looked at.
26977bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
26978 SDValue RHS) {
26979 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26980 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
26981 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
26982 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
26983 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
26984 SDValue Sqrt = RHS;
26986 SDValue CmpLHS;
26987 const ConstantFPSDNode *Zero = nullptr;
26988
26989 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
26990 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
26991 CmpLHS = TheSelect->getOperand(0);
26992 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
26993 } else {
26994 // SELECT or VSELECT
26995 SDValue Cmp = TheSelect->getOperand(0);
26996 if (Cmp.getOpcode() == ISD::SETCC) {
26997 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
26998 CmpLHS = Cmp.getOperand(0);
26999 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27000 }
27001 }
27002 if (Zero && Zero->isZero() &&
27003 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27004 CC == ISD::SETULT || CC == ISD::SETLT)) {
27005 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27006 CombineTo(TheSelect, Sqrt);
27007 return true;
27008 }
27009 }
27010 }
27011 // Cannot simplify select with vector condition
27012 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27013
27014 // If this is a select from two identical things, try to pull the operation
27015 // through the select.
27016 if (LHS.getOpcode() != RHS.getOpcode() ||
27017 !LHS.hasOneUse() || !RHS.hasOneUse())
27018 return false;
27019
27020 // If this is a load and the token chain is identical, replace the select
27021 // of two loads with a load through a select of the address to load from.
27022 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27023 // constants have been dropped into the constant pool.
27024 if (LHS.getOpcode() == ISD::LOAD) {
27025 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27026 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27027
27028 // Token chains must be identical.
27029 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27030 // Do not let this transformation reduce the number of volatile loads.
27031 // Be conservative for atomics for the moment
27032 // TODO: This does appear to be legal for unordered atomics (see D66309)
27033 !LLD->isSimple() || !RLD->isSimple() ||
27034 // FIXME: If either is a pre/post inc/dec load,
27035 // we'd need to split out the address adjustment.
27036 LLD->isIndexed() || RLD->isIndexed() ||
27037 // If this is an EXTLOAD, the VT's must match.
27038 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27039 // If this is an EXTLOAD, the kind of extension must match.
27040 (LLD->getExtensionType() != RLD->getExtensionType() &&
27041 // The only exception is if one of the extensions is anyext.
27042 LLD->getExtensionType() != ISD::EXTLOAD &&
27043 RLD->getExtensionType() != ISD::EXTLOAD) ||
27044 // FIXME: this discards src value information. This is
27045 // over-conservative. It would be beneficial to be able to remember
27046 // both potential memory locations. Since we are discarding
27047 // src value info, don't do the transformation if the memory
27048 // locations are not in the default address space.
27049 LLD->getPointerInfo().getAddrSpace() != 0 ||
27050 RLD->getPointerInfo().getAddrSpace() != 0 ||
27051 // We can't produce a CMOV of a TargetFrameIndex since we won't
27052 // generate the address generation required.
27055 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27056 LLD->getBasePtr().getValueType()))
27057 return false;
27058
27059 // The loads must not depend on one another.
27060 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27061 return false;
27062
27063 // Check that the select condition doesn't reach either load. If so,
27064 // folding this will induce a cycle into the DAG. If not, this is safe to
27065 // xform, so create a select of the addresses.
27066
27069
27070 // Always fail if LLD and RLD are not independent. TheSelect is a
27071 // predecessor to all Nodes in question so we need not search past it.
27072
27073 Visited.insert(TheSelect);
27074 Worklist.push_back(LLD);
27075 Worklist.push_back(RLD);
27076
27077 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27078 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27079 return false;
27080
27081 SDValue Addr;
27082 if (TheSelect->getOpcode() == ISD::SELECT) {
27083 // We cannot do this optimization if any pair of {RLD, LLD} is a
27084 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27085 // Loads, we only need to check if CondNode is a successor to one of the
27086 // loads. We can further avoid this if there's no use of their chain
27087 // value.
27088 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27089 Worklist.push_back(CondNode);
27090
27091 if ((LLD->hasAnyUseOfValue(1) &&
27092 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27093 (RLD->hasAnyUseOfValue(1) &&
27094 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27095 return false;
27096
27097 Addr = DAG.getSelect(SDLoc(TheSelect),
27098 LLD->getBasePtr().getValueType(),
27099 TheSelect->getOperand(0), LLD->getBasePtr(),
27100 RLD->getBasePtr());
27101 } else { // Otherwise SELECT_CC
27102 // We cannot do this optimization if any pair of {RLD, LLD} is a
27103 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27104 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27105 // one of the loads. We can further avoid this if there's no use of their
27106 // chain value.
27107
27108 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27109 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27110 Worklist.push_back(CondLHS);
27111 Worklist.push_back(CondRHS);
27112
27113 if ((LLD->hasAnyUseOfValue(1) &&
27114 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27115 (RLD->hasAnyUseOfValue(1) &&
27116 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27117 return false;
27118
27119 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27120 LLD->getBasePtr().getValueType(),
27121 TheSelect->getOperand(0),
27122 TheSelect->getOperand(1),
27123 LLD->getBasePtr(), RLD->getBasePtr(),
27124 TheSelect->getOperand(4));
27125 }
27126
27127 SDValue Load;
27128 // It is safe to replace the two loads if they have different alignments,
27129 // but the new load must be the minimum (most restrictive) alignment of the
27130 // inputs.
27131 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27132 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27133 if (!RLD->isInvariant())
27134 MMOFlags &= ~MachineMemOperand::MOInvariant;
27135 if (!RLD->isDereferenceable())
27136 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27137 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27138 // FIXME: Discards pointer and AA info.
27139 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27140 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27141 MMOFlags);
27142 } else {
27143 // FIXME: Discards pointer and AA info.
27144 Load = DAG.getExtLoad(
27146 : LLD->getExtensionType(),
27147 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27148 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27149 }
27150
27151 // Users of the select now use the result of the load.
27152 CombineTo(TheSelect, Load);
27153
27154 // Users of the old loads now use the new load's chain. We know the
27155 // old-load value is dead now.
27156 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27157 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27158 return true;
27159 }
27160
27161 return false;
27162}
27163
27164/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27165/// bitwise 'and'.
27166SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27167 SDValue N1, SDValue N2, SDValue N3,
27168 ISD::CondCode CC) {
27169 // If this is a select where the false operand is zero and the compare is a
27170 // check of the sign bit, see if we can perform the "gzip trick":
27171 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27172 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27173 EVT XType = N0.getValueType();
27174 EVT AType = N2.getValueType();
27175 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27176 return SDValue();
27177
27178 // If the comparison is testing for a positive value, we have to invert
27179 // the sign bit mask, so only do that transform if the target has a bitwise
27180 // 'and not' instruction (the invert is free).
27181 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27182 // (X > -1) ? A : 0
27183 // (X > 0) ? X : 0 <-- This is canonical signed max.
27184 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27185 return SDValue();
27186 } else if (CC == ISD::SETLT) {
27187 // (X < 0) ? A : 0
27188 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27189 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27190 return SDValue();
27191 } else {
27192 return SDValue();
27193 }
27194
27195 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27196 // constant.
27197 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27198 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27199 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27200 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27201 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27202 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27203 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27204 AddToWorklist(Shift.getNode());
27205
27206 if (XType.bitsGT(AType)) {
27207 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27208 AddToWorklist(Shift.getNode());
27209 }
27210
27211 if (CC == ISD::SETGT)
27212 Shift = DAG.getNOT(DL, Shift, AType);
27213
27214 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27215 }
27216 }
27217
27218 unsigned ShCt = XType.getSizeInBits() - 1;
27219 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27220 return SDValue();
27221
27222 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27223 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27224 AddToWorklist(Shift.getNode());
27225
27226 if (XType.bitsGT(AType)) {
27227 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27228 AddToWorklist(Shift.getNode());
27229 }
27230
27231 if (CC == ISD::SETGT)
27232 Shift = DAG.getNOT(DL, Shift, AType);
27233
27234 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27235}
27236
27237// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27238SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27239 SDValue N0 = N->getOperand(0);
27240 SDValue N1 = N->getOperand(1);
27241 SDValue N2 = N->getOperand(2);
27242 SDLoc DL(N);
27243
27244 unsigned BinOpc = N1.getOpcode();
27245 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27246 (N1.getResNo() != N2.getResNo()))
27247 return SDValue();
27248
27249 // The use checks are intentionally on SDNode because we may be dealing
27250 // with opcodes that produce more than one SDValue.
27251 // TODO: Do we really need to check N0 (the condition operand of the select)?
27252 // But removing that clause could cause an infinite loop...
27253 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27254 return SDValue();
27255
27256 // Binops may include opcodes that return multiple values, so all values
27257 // must be created/propagated from the newly created binops below.
27258 SDVTList OpVTs = N1->getVTList();
27259
27260 // Fold select(cond, binop(x, y), binop(z, y))
27261 // --> binop(select(cond, x, z), y)
27262 if (N1.getOperand(1) == N2.getOperand(1)) {
27263 SDValue N10 = N1.getOperand(0);
27264 SDValue N20 = N2.getOperand(0);
27265 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27266 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27267 NewBinOp->setFlags(N1->getFlags());
27268 NewBinOp->intersectFlagsWith(N2->getFlags());
27269 return SDValue(NewBinOp.getNode(), N1.getResNo());
27270 }
27271
27272 // Fold select(cond, binop(x, y), binop(x, z))
27273 // --> binop(x, select(cond, y, z))
27274 if (N1.getOperand(0) == N2.getOperand(0)) {
27275 SDValue N11 = N1.getOperand(1);
27276 SDValue N21 = N2.getOperand(1);
27277 // Second op VT might be different (e.g. shift amount type)
27278 if (N11.getValueType() == N21.getValueType()) {
27279 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27280 SDValue NewBinOp =
27281 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27282 NewBinOp->setFlags(N1->getFlags());
27283 NewBinOp->intersectFlagsWith(N2->getFlags());
27284 return SDValue(NewBinOp.getNode(), N1.getResNo());
27285 }
27286 }
27287
27288 // TODO: Handle isCommutativeBinOp patterns as well?
27289 return SDValue();
27290}
27291
27292// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27293SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27294 SDValue N0 = N->getOperand(0);
27295 EVT VT = N->getValueType(0);
27296 bool IsFabs = N->getOpcode() == ISD::FABS;
27297 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27298
27299 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27300 return SDValue();
27301
27302 SDValue Int = N0.getOperand(0);
27303 EVT IntVT = Int.getValueType();
27304
27305 // The operand to cast should be integer.
27306 if (!IntVT.isInteger() || IntVT.isVector())
27307 return SDValue();
27308
27309 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27310 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27311 APInt SignMask;
27312 if (N0.getValueType().isVector()) {
27313 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27314 // 0x7f...) per element and splat it.
27316 if (IsFabs)
27317 SignMask = ~SignMask;
27318 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27319 } else {
27320 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27321 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27322 if (IsFabs)
27323 SignMask = ~SignMask;
27324 }
27325 SDLoc DL(N0);
27326 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27327 DAG.getConstant(SignMask, DL, IntVT));
27328 AddToWorklist(Int.getNode());
27329 return DAG.getBitcast(VT, Int);
27330}
27331
27332/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27333/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27334/// in it. This may be a win when the constant is not otherwise available
27335/// because it replaces two constant pool loads with one.
27336SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27337 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27338 ISD::CondCode CC) {
27340 return SDValue();
27341
27342 // If we are before legalize types, we want the other legalization to happen
27343 // first (for example, to avoid messing with soft float).
27344 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27345 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27346 EVT VT = N2.getValueType();
27347 if (!TV || !FV || !TLI.isTypeLegal(VT))
27348 return SDValue();
27349
27350 // If a constant can be materialized without loads, this does not make sense.
27352 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27353 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27354 return SDValue();
27355
27356 // If both constants have multiple uses, then we won't need to do an extra
27357 // load. The values are likely around in registers for other users.
27358 if (!TV->hasOneUse() && !FV->hasOneUse())
27359 return SDValue();
27360
27361 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27362 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27363 Type *FPTy = Elts[0]->getType();
27364 const DataLayout &TD = DAG.getDataLayout();
27365
27366 // Create a ConstantArray of the two constants.
27367 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27368 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27369 TD.getPrefTypeAlign(FPTy));
27370 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27371
27372 // Get offsets to the 0 and 1 elements of the array, so we can select between
27373 // them.
27374 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27375 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27376 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27377 SDValue Cond =
27378 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27379 AddToWorklist(Cond.getNode());
27380 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27381 AddToWorklist(CstOffset.getNode());
27382 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27383 AddToWorklist(CPIdx.getNode());
27384 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27386 DAG.getMachineFunction()), Alignment);
27387}
27388
27389/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27390/// where 'cond' is the comparison specified by CC.
27391SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27393 bool NotExtCompare) {
27394 // (x ? y : y) -> y.
27395 if (N2 == N3) return N2;
27396
27397 EVT CmpOpVT = N0.getValueType();
27398 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27399 EVT VT = N2.getValueType();
27400 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27401 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27402 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27403
27404 // Determine if the condition we're dealing with is constant.
27405 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27406 AddToWorklist(SCC.getNode());
27407 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27408 // fold select_cc true, x, y -> x
27409 // fold select_cc false, x, y -> y
27410 return !(SCCC->isZero()) ? N2 : N3;
27411 }
27412 }
27413
27414 if (SDValue V =
27415 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27416 return V;
27417
27418 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27419 return V;
27420
27421 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27422 // where y is has a single bit set.
27423 // A plaintext description would be, we can turn the SELECT_CC into an AND
27424 // when the condition can be materialized as an all-ones register. Any
27425 // single bit-test can be materialized as an all-ones register with
27426 // shift-left and shift-right-arith.
27427 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27428 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27429 SDValue AndLHS = N0->getOperand(0);
27430 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27431 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27432 // Shift the tested bit over the sign bit.
27433 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27434 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27435 unsigned ShCt = AndMask.getBitWidth() - 1;
27436 SDValue ShlAmt =
27437 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27438 getShiftAmountTy(AndLHS.getValueType()));
27439 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27440
27441 // Now arithmetic right shift it all the way over, so the result is
27442 // either all-ones, or zero.
27443 SDValue ShrAmt =
27444 DAG.getConstant(ShCt, SDLoc(Shl),
27446 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27447
27448 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27449 }
27450 }
27451 }
27452
27453 // fold select C, 16, 0 -> shl C, 4
27454 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27455 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27456
27457 if ((Fold || Swap) &&
27458 TLI.getBooleanContents(CmpOpVT) ==
27460 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27461
27462 if (Swap) {
27463 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27464 std::swap(N2C, N3C);
27465 }
27466
27467 // If the caller doesn't want us to simplify this into a zext of a compare,
27468 // don't do it.
27469 if (NotExtCompare && N2C->isOne())
27470 return SDValue();
27471
27472 SDValue Temp, SCC;
27473 // zext (setcc n0, n1)
27474 if (LegalTypes) {
27475 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27476 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27477 } else {
27478 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27479 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27480 }
27481
27482 AddToWorklist(SCC.getNode());
27483 AddToWorklist(Temp.getNode());
27484
27485 if (N2C->isOne())
27486 return Temp;
27487
27488 unsigned ShCt = N2C->getAPIntValue().logBase2();
27489 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27490 return SDValue();
27491
27492 // shl setcc result by log2 n2c
27493 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27494 DAG.getConstant(ShCt, SDLoc(Temp),
27496 }
27497
27498 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27499 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27500 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27501 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27502 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27503 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27504 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27505 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27506 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27507 SDValue ValueOnZero = N2;
27508 SDValue Count = N3;
27509 // If the condition is NE instead of E, swap the operands.
27510 if (CC == ISD::SETNE)
27511 std::swap(ValueOnZero, Count);
27512 // Check if the value on zero is a constant equal to the bits in the type.
27513 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27514 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27515 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27516 // legal, combine to just cttz.
27517 if ((Count.getOpcode() == ISD::CTTZ ||
27518 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27519 N0 == Count.getOperand(0) &&
27520 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27521 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27522 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27523 // legal, combine to just ctlz.
27524 if ((Count.getOpcode() == ISD::CTLZ ||
27525 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27526 N0 == Count.getOperand(0) &&
27527 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27528 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27529 }
27530 }
27531 }
27532
27533 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27534 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27535 if (!NotExtCompare && N1C && N2C && N3C &&
27536 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27537 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27538 (N1C->isZero() && CC == ISD::SETLT)) &&
27539 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27540 SDValue ASR = DAG.getNode(
27541 ISD::SRA, DL, CmpOpVT, N0,
27542 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27543 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27544 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27545 }
27546
27547 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27548 return S;
27549 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27550 return S;
27551
27552 return SDValue();
27553}
27554
27555/// This is a stub for TargetLowering::SimplifySetCC.
27556SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27557 ISD::CondCode Cond, const SDLoc &DL,
27558 bool foldBooleans) {
27560 DagCombineInfo(DAG, Level, false, this);
27561 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27562}
27563
27564/// Given an ISD::SDIV node expressing a divide by constant, return
27565/// a DAG expression to select that will generate the same value by multiplying
27566/// by a magic number.
27567/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27568SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27569 // when optimising for minimum size, we don't want to expand a div to a mul
27570 // and a shift.
27572 return SDValue();
27573
27575 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27576 for (SDNode *N : Built)
27577 AddToWorklist(N);
27578 return S;
27579 }
27580
27581 return SDValue();
27582}
27583
27584/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27585/// DAG expression that will generate the same value by right shifting.
27586SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27587 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27588 if (!C)
27589 return SDValue();
27590
27591 // Avoid division by zero.
27592 if (C->isZero())
27593 return SDValue();
27594
27596 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27597 for (SDNode *N : Built)
27598 AddToWorklist(N);
27599 return S;
27600 }
27601
27602 return SDValue();
27603}
27604
27605/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27606/// expression that will generate the same value by multiplying by a magic
27607/// number.
27608/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27609SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27610 // when optimising for minimum size, we don't want to expand a div to a mul
27611 // and a shift.
27613 return SDValue();
27614
27616 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27617 for (SDNode *N : Built)
27618 AddToWorklist(N);
27619 return S;
27620 }
27621
27622 return SDValue();
27623}
27624
27625/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27626/// return a DAG expression that will generate the same value.
27627SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27628 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27629 if (!C)
27630 return SDValue();
27631
27632 // Avoid division by zero.
27633 if (C->isZero())
27634 return SDValue();
27635
27637 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27638 for (SDNode *N : Built)
27639 AddToWorklist(N);
27640 return S;
27641 }
27642
27643 return SDValue();
27644}
27645
27646// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27647//
27648// Returns the node that represents `Log2(Op)`. This may create a new node. If
27649// we are unable to compute `Log2(Op)` its return `SDValue()`.
27650//
27651// All nodes will be created at `DL` and the output will be of type `VT`.
27652//
27653// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27654// `AssumeNonZero` if this function should simply assume (not require proving
27655// `Op` is non-zero).
27657 SDValue Op, unsigned Depth,
27658 bool AssumeNonZero) {
27659 assert(VT.isInteger() && "Only integer types are supported!");
27660
27661 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27662 while (true) {
27663 switch (V.getOpcode()) {
27664 case ISD::TRUNCATE:
27665 case ISD::ZERO_EXTEND:
27666 V = V.getOperand(0);
27667 break;
27668 default:
27669 return V;
27670 }
27671 }
27672 };
27673
27674 if (VT.isScalableVector())
27675 return SDValue();
27676
27677 Op = PeekThroughCastsAndTrunc(Op);
27678
27679 // Helper for determining whether a value is a power-2 constant scalar or a
27680 // vector of such elements.
27681 SmallVector<APInt> Pow2Constants;
27682 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27683 if (C->isZero() || C->isOpaque())
27684 return false;
27685 // TODO: We may also be able to support negative powers of 2 here.
27686 if (C->getAPIntValue().isPowerOf2()) {
27687 Pow2Constants.emplace_back(C->getAPIntValue());
27688 return true;
27689 }
27690 return false;
27691 };
27692
27693 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27694 if (!VT.isVector())
27695 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27696 // We need to create a build vector
27697 SmallVector<SDValue> Log2Ops;
27698 for (const APInt &Pow2 : Pow2Constants)
27699 Log2Ops.emplace_back(
27700 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27701 return DAG.getBuildVector(VT, DL, Log2Ops);
27702 }
27703
27704 if (Depth >= DAG.MaxRecursionDepth)
27705 return SDValue();
27706
27707 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27708 ToCast = PeekThroughCastsAndTrunc(ToCast);
27709 EVT CurVT = ToCast.getValueType();
27710 if (NewVT == CurVT)
27711 return ToCast;
27712
27713 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27714 return DAG.getBitcast(NewVT, ToCast);
27715
27716 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27717 };
27718
27719 // log2(X << Y) -> log2(X) + Y
27720 if (Op.getOpcode() == ISD::SHL) {
27721 // 1 << Y and X nuw/nsw << Y are all non-zero.
27722 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27723 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27724 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27725 Depth + 1, AssumeNonZero))
27726 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27727 CastToVT(VT, Op.getOperand(1)));
27728 }
27729
27730 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27731 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27732 Op.hasOneUse()) {
27733 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27734 Depth + 1, AssumeNonZero))
27735 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27736 Depth + 1, AssumeNonZero))
27737 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27738 }
27739
27740 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27741 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27742 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27743 Op.hasOneUse()) {
27744 // Use AssumeNonZero as false here. Otherwise we can hit case where
27745 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27746 if (SDValue LogX =
27747 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27748 /*AssumeNonZero*/ false))
27749 if (SDValue LogY =
27750 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27751 /*AssumeNonZero*/ false))
27752 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27753 }
27754
27755 return SDValue();
27756}
27757
27758/// Determines the LogBase2 value for a non-null input value using the
27759/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27760SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27761 bool KnownNonZero, bool InexpensiveOnly,
27762 std::optional<EVT> OutVT) {
27763 EVT VT = OutVT ? *OutVT : V.getValueType();
27764 SDValue InexpensiveLogBase2 =
27765 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27766 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27767 return InexpensiveLogBase2;
27768
27769 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27770 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27771 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27772 return LogBase2;
27773}
27774
27775/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27776/// For the reciprocal, we need to find the zero of the function:
27777/// F(X) = 1/X - A [which has a zero at X = 1/A]
27778/// =>
27779/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27780/// does not require additional intermediate precision]
27781/// For the last iteration, put numerator N into it to gain more precision:
27782/// Result = N X_i + X_i (N - N A X_i)
27783SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27784 SDNodeFlags Flags) {
27785 if (LegalDAG)
27786 return SDValue();
27787
27788 // TODO: Handle extended types?
27789 EVT VT = Op.getValueType();
27790 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27791 VT.getScalarType() != MVT::f64)
27792 return SDValue();
27793
27794 // If estimates are explicitly disabled for this function, we're done.
27796 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27797 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27798 return SDValue();
27799
27800 // Estimates may be explicitly enabled for this type with a custom number of
27801 // refinement steps.
27802 int Iterations = TLI.getDivRefinementSteps(VT, MF);
27803 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27804 AddToWorklist(Est.getNode());
27805
27806 SDLoc DL(Op);
27807 if (Iterations) {
27808 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27809
27810 // Newton iterations: Est = Est + Est (N - Arg * Est)
27811 // If this is the last iteration, also multiply by the numerator.
27812 for (int i = 0; i < Iterations; ++i) {
27813 SDValue MulEst = Est;
27814
27815 if (i == Iterations - 1) {
27816 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27817 AddToWorklist(MulEst.getNode());
27818 }
27819
27820 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27821 AddToWorklist(NewEst.getNode());
27822
27823 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27824 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27825 AddToWorklist(NewEst.getNode());
27826
27827 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27828 AddToWorklist(NewEst.getNode());
27829
27830 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27831 AddToWorklist(Est.getNode());
27832 }
27833 } else {
27834 // If no iterations are available, multiply with N.
27835 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27836 AddToWorklist(Est.getNode());
27837 }
27838
27839 return Est;
27840 }
27841
27842 return SDValue();
27843}
27844
27845/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27846/// For the reciprocal sqrt, we need to find the zero of the function:
27847/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27848/// =>
27849/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27850/// As a result, we precompute A/2 prior to the iteration loop.
27851SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27852 unsigned Iterations,
27853 SDNodeFlags Flags, bool Reciprocal) {
27854 EVT VT = Arg.getValueType();
27855 SDLoc DL(Arg);
27856 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27857
27858 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27859 // this entire sequence requires only one FP constant.
27860 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27861 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27862
27863 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27864 for (unsigned i = 0; i < Iterations; ++i) {
27865 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27866 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27867 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
27868 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27869 }
27870
27871 // If non-reciprocal square root is requested, multiply the result by Arg.
27872 if (!Reciprocal)
27873 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
27874
27875 return Est;
27876}
27877
27878/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27879/// For the reciprocal sqrt, we need to find the zero of the function:
27880/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27881/// =>
27882/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
27883SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
27884 unsigned Iterations,
27885 SDNodeFlags Flags, bool Reciprocal) {
27886 EVT VT = Arg.getValueType();
27887 SDLoc DL(Arg);
27888 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
27889 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
27890
27891 // This routine must enter the loop below to work correctly
27892 // when (Reciprocal == false).
27893 assert(Iterations > 0);
27894
27895 // Newton iterations for reciprocal square root:
27896 // E = (E * -0.5) * ((A * E) * E + -3.0)
27897 for (unsigned i = 0; i < Iterations; ++i) {
27898 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
27899 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
27900 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
27901
27902 // When calculating a square root at the last iteration build:
27903 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
27904 // (notice a common subexpression)
27905 SDValue LHS;
27906 if (Reciprocal || (i + 1) < Iterations) {
27907 // RSQRT: LHS = (E * -0.5)
27908 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
27909 } else {
27910 // SQRT: LHS = (A * E) * -0.5
27911 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
27912 }
27913
27914 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
27915 }
27916
27917 return Est;
27918}
27919
27920/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
27921/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
27922/// Op can be zero.
27923SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
27924 bool Reciprocal) {
27925 if (LegalDAG)
27926 return SDValue();
27927
27928 // TODO: Handle extended types?
27929 EVT VT = Op.getValueType();
27930 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27931 VT.getScalarType() != MVT::f64)
27932 return SDValue();
27933
27934 // If estimates are explicitly disabled for this function, we're done.
27936 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
27937 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27938 return SDValue();
27939
27940 // Estimates may be explicitly enabled for this type with a custom number of
27941 // refinement steps.
27942 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
27943
27944 bool UseOneConstNR = false;
27945 if (SDValue Est =
27946 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
27947 Reciprocal)) {
27948 AddToWorklist(Est.getNode());
27949
27950 if (Iterations > 0)
27951 Est = UseOneConstNR
27952 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
27953 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
27954 if (!Reciprocal) {
27955 SDLoc DL(Op);
27956 // Try the target specific test first.
27957 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
27958
27959 // The estimate is now completely wrong if the input was exactly 0.0 or
27960 // possibly a denormal. Force the answer to 0.0 or value provided by
27961 // target for those cases.
27962 Est = DAG.getNode(
27963 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
27964 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
27965 }
27966 return Est;
27967 }
27968
27969 return SDValue();
27970}
27971
27972SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27973 return buildSqrtEstimateImpl(Op, Flags, true);
27974}
27975
27976SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27977 return buildSqrtEstimateImpl(Op, Flags, false);
27978}
27979
27980/// Return true if there is any possibility that the two addresses overlap.
27981bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
27982
27983 struct MemUseCharacteristics {
27984 bool IsVolatile;
27985 bool IsAtomic;
27987 int64_t Offset;
27988 LocationSize NumBytes;
27989 MachineMemOperand *MMO;
27990 };
27991
27992 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
27993 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
27994 int64_t Offset = 0;
27995 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
27996 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
27997 : (LSN->getAddressingMode() == ISD::PRE_DEC)
27998 ? -1 * C->getSExtValue()
27999 : 0;
28000 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28001 return {LSN->isVolatile(), LSN->isAtomic(),
28002 LSN->getBasePtr(), Offset /*base offset*/,
28003 LocationSize::precise(Size), LSN->getMemOperand()};
28004 }
28005 if (const auto *LN = cast<LifetimeSDNode>(N))
28006 return {false /*isVolatile*/,
28007 /*isAtomic*/ false,
28008 LN->getOperand(1),
28009 (LN->hasOffset()) ? LN->getOffset() : 0,
28010 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28012 (MachineMemOperand *)nullptr};
28013 // Default.
28014 return {false /*isvolatile*/,
28015 /*isAtomic*/ false,
28016 SDValue(),
28017 (int64_t)0 /*offset*/,
28019 (MachineMemOperand *)nullptr};
28020 };
28021
28022 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28023 MUC1 = getCharacteristics(Op1);
28024
28025 // If they are to the same address, then they must be aliases.
28026 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28027 MUC0.Offset == MUC1.Offset)
28028 return true;
28029
28030 // If they are both volatile then they cannot be reordered.
28031 if (MUC0.IsVolatile && MUC1.IsVolatile)
28032 return true;
28033
28034 // Be conservative about atomics for the moment
28035 // TODO: This is way overconservative for unordered atomics (see D66309)
28036 if (MUC0.IsAtomic && MUC1.IsAtomic)
28037 return true;
28038
28039 if (MUC0.MMO && MUC1.MMO) {
28040 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28041 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28042 return false;
28043 }
28044
28045 // If NumBytes is scalable and offset is not 0, conservatively return may
28046 // alias
28047 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28048 MUC0.Offset != 0) ||
28049 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28050 MUC1.Offset != 0))
28051 return true;
28052 // Try to prove that there is aliasing, or that there is no aliasing. Either
28053 // way, we can return now. If nothing can be proved, proceed with more tests.
28054 bool IsAlias;
28055 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28056 DAG, IsAlias))
28057 return IsAlias;
28058
28059 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28060 // either are not known.
28061 if (!MUC0.MMO || !MUC1.MMO)
28062 return true;
28063
28064 // If one operation reads from invariant memory, and the other may store, they
28065 // cannot alias. These should really be checking the equivalent of mayWrite,
28066 // but it only matters for memory nodes other than load /store.
28067 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28068 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28069 return false;
28070
28071 // If we know required SrcValue1 and SrcValue2 have relatively large
28072 // alignment compared to the size and offset of the access, we may be able
28073 // to prove they do not alias. This check is conservative for now to catch
28074 // cases created by splitting vector types, it only works when the offsets are
28075 // multiples of the size of the data.
28076 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28077 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28078 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28079 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28080 LocationSize Size0 = MUC0.NumBytes;
28081 LocationSize Size1 = MUC1.NumBytes;
28082
28083 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28084 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28085 !Size1.isScalable() && Size0 == Size1 &&
28086 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28087 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28088 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28089 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28090 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28091
28092 // There is no overlap between these relatively aligned accesses of
28093 // similar size. Return no alias.
28094 if ((OffAlign0 + static_cast<int64_t>(
28095 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28096 (OffAlign1 + static_cast<int64_t>(
28097 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28098 return false;
28099 }
28100
28101 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28103 : DAG.getSubtarget().useAA();
28104#ifndef NDEBUG
28105 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28107 UseAA = false;
28108#endif
28109
28110 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28111 Size0.hasValue() && Size1.hasValue() &&
28112 // Can't represent a scalable size + fixed offset in LocationSize
28113 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28114 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28115 // Use alias analysis information.
28116 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28117 int64_t Overlap0 =
28118 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28119 int64_t Overlap1 =
28120 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28121 LocationSize Loc0 =
28122 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28123 LocationSize Loc1 =
28124 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28125 if (AA->isNoAlias(
28126 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28127 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28128 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28129 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28130 return false;
28131 }
28132
28133 // Otherwise we have to assume they alias.
28134 return true;
28135}
28136
28137/// Walk up chain skipping non-aliasing memory nodes,
28138/// looking for aliasing nodes and adding them to the Aliases vector.
28139void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28140 SmallVectorImpl<SDValue> &Aliases) {
28141 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28142 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28143
28144 // Get alias information for node.
28145 // TODO: relax aliasing for unordered atomics (see D66309)
28146 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28147
28148 // Starting off.
28149 Chains.push_back(OriginalChain);
28150 unsigned Depth = 0;
28151
28152 // Attempt to improve chain by a single step
28153 auto ImproveChain = [&](SDValue &C) -> bool {
28154 switch (C.getOpcode()) {
28155 case ISD::EntryToken:
28156 // No need to mark EntryToken.
28157 C = SDValue();
28158 return true;
28159 case ISD::LOAD:
28160 case ISD::STORE: {
28161 // Get alias information for C.
28162 // TODO: Relax aliasing for unordered atomics (see D66309)
28163 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28164 cast<LSBaseSDNode>(C.getNode())->isSimple();
28165 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28166 // Look further up the chain.
28167 C = C.getOperand(0);
28168 return true;
28169 }
28170 // Alias, so stop here.
28171 return false;
28172 }
28173
28174 case ISD::CopyFromReg:
28175 // Always forward past CopyFromReg.
28176 C = C.getOperand(0);
28177 return true;
28178
28180 case ISD::LIFETIME_END: {
28181 // We can forward past any lifetime start/end that can be proven not to
28182 // alias the memory access.
28183 if (!mayAlias(N, C.getNode())) {
28184 // Look further up the chain.
28185 C = C.getOperand(0);
28186 return true;
28187 }
28188 return false;
28189 }
28190 default:
28191 return false;
28192 }
28193 };
28194
28195 // Look at each chain and determine if it is an alias. If so, add it to the
28196 // aliases list. If not, then continue up the chain looking for the next
28197 // candidate.
28198 while (!Chains.empty()) {
28199 SDValue Chain = Chains.pop_back_val();
28200
28201 // Don't bother if we've seen Chain before.
28202 if (!Visited.insert(Chain.getNode()).second)
28203 continue;
28204
28205 // For TokenFactor nodes, look at each operand and only continue up the
28206 // chain until we reach the depth limit.
28207 //
28208 // FIXME: The depth check could be made to return the last non-aliasing
28209 // chain we found before we hit a tokenfactor rather than the original
28210 // chain.
28211 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28212 Aliases.clear();
28213 Aliases.push_back(OriginalChain);
28214 return;
28215 }
28216
28217 if (Chain.getOpcode() == ISD::TokenFactor) {
28218 // We have to check each of the operands of the token factor for "small"
28219 // token factors, so we queue them up. Adding the operands to the queue
28220 // (stack) in reverse order maintains the original order and increases the
28221 // likelihood that getNode will find a matching token factor (CSE.)
28222 if (Chain.getNumOperands() > 16) {
28223 Aliases.push_back(Chain);
28224 continue;
28225 }
28226 for (unsigned n = Chain.getNumOperands(); n;)
28227 Chains.push_back(Chain.getOperand(--n));
28228 ++Depth;
28229 continue;
28230 }
28231 // Everything else
28232 if (ImproveChain(Chain)) {
28233 // Updated Chain Found, Consider new chain if one exists.
28234 if (Chain.getNode())
28235 Chains.push_back(Chain);
28236 ++Depth;
28237 continue;
28238 }
28239 // No Improved Chain Possible, treat as Alias.
28240 Aliases.push_back(Chain);
28241 }
28242}
28243
28244/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28245/// (aliasing node.)
28246SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28247 if (OptLevel == CodeGenOptLevel::None)
28248 return OldChain;
28249
28250 // Ops for replacing token factor.
28252
28253 // Accumulate all the aliases to this node.
28254 GatherAllAliases(N, OldChain, Aliases);
28255
28256 // If no operands then chain to entry token.
28257 if (Aliases.empty())
28258 return DAG.getEntryNode();
28259
28260 // If a single operand then chain to it. We don't need to revisit it.
28261 if (Aliases.size() == 1)
28262 return Aliases[0];
28263
28264 // Construct a custom tailored token factor.
28265 return DAG.getTokenFactor(SDLoc(N), Aliases);
28266}
28267
28268// This function tries to collect a bunch of potentially interesting
28269// nodes to improve the chains of, all at once. This might seem
28270// redundant, as this function gets called when visiting every store
28271// node, so why not let the work be done on each store as it's visited?
28272//
28273// I believe this is mainly important because mergeConsecutiveStores
28274// is unable to deal with merging stores of different sizes, so unless
28275// we improve the chains of all the potential candidates up-front
28276// before running mergeConsecutiveStores, it might only see some of
28277// the nodes that will eventually be candidates, and then not be able
28278// to go from a partially-merged state to the desired final
28279// fully-merged state.
28280
28281bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28282 SmallVector<StoreSDNode *, 8> ChainedStores;
28283 StoreSDNode *STChain = St;
28284 // Intervals records which offsets from BaseIndex have been covered. In
28285 // the common case, every store writes to the immediately previous address
28286 // space and thus merged with the previous interval at insertion time.
28287
28288 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28290 IMap::Allocator A;
28291 IMap Intervals(A);
28292
28293 // This holds the base pointer, index, and the offset in bytes from the base
28294 // pointer.
28296
28297 // We must have a base and an offset.
28298 if (!BasePtr.getBase().getNode())
28299 return false;
28300
28301 // Do not handle stores to undef base pointers.
28302 if (BasePtr.getBase().isUndef())
28303 return false;
28304
28305 // Do not handle stores to opaque types
28306 if (St->getMemoryVT().isZeroSized())
28307 return false;
28308
28309 // BaseIndexOffset assumes that offsets are fixed-size, which
28310 // is not valid for scalable vectors where the offsets are
28311 // scaled by `vscale`, so bail out early.
28312 if (St->getMemoryVT().isScalableVT())
28313 return false;
28314
28315 // Add ST's interval.
28316 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28317 std::monostate{});
28318
28319 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28320 if (Chain->getMemoryVT().isScalableVector())
28321 return false;
28322
28323 // If the chain has more than one use, then we can't reorder the mem ops.
28324 if (!SDValue(Chain, 0)->hasOneUse())
28325 break;
28326 // TODO: Relax for unordered atomics (see D66309)
28327 if (!Chain->isSimple() || Chain->isIndexed())
28328 break;
28329
28330 // Find the base pointer and offset for this memory node.
28331 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28332 // Check that the base pointer is the same as the original one.
28333 int64_t Offset;
28334 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28335 break;
28336 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28337 // Make sure we don't overlap with other intervals by checking the ones to
28338 // the left or right before inserting.
28339 auto I = Intervals.find(Offset);
28340 // If there's a next interval, we should end before it.
28341 if (I != Intervals.end() && I.start() < (Offset + Length))
28342 break;
28343 // If there's a previous interval, we should start after it.
28344 if (I != Intervals.begin() && (--I).stop() <= Offset)
28345 break;
28346 Intervals.insert(Offset, Offset + Length, std::monostate{});
28347
28348 ChainedStores.push_back(Chain);
28349 STChain = Chain;
28350 }
28351
28352 // If we didn't find a chained store, exit.
28353 if (ChainedStores.empty())
28354 return false;
28355
28356 // Improve all chained stores (St and ChainedStores members) starting from
28357 // where the store chain ended and return single TokenFactor.
28358 SDValue NewChain = STChain->getChain();
28360 for (unsigned I = ChainedStores.size(); I;) {
28361 StoreSDNode *S = ChainedStores[--I];
28362 SDValue BetterChain = FindBetterChain(S, NewChain);
28363 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28364 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28365 TFOps.push_back(SDValue(S, 0));
28366 ChainedStores[I] = S;
28367 }
28368
28369 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28370 SDValue BetterChain = FindBetterChain(St, NewChain);
28371 SDValue NewST;
28372 if (St->isTruncatingStore())
28373 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28374 St->getBasePtr(), St->getMemoryVT(),
28375 St->getMemOperand());
28376 else
28377 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28378 St->getBasePtr(), St->getMemOperand());
28379
28380 TFOps.push_back(NewST);
28381
28382 // If we improved every element of TFOps, then we've lost the dependence on
28383 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28384 // the beginning to keep relative order consistent with FindBetterChains.
28385 auto hasImprovedChain = [&](SDValue ST) -> bool {
28386 return ST->getOperand(0) != NewChain;
28387 };
28388 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28389 if (AddNewChain)
28390 TFOps.insert(TFOps.begin(), NewChain);
28391
28392 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28393 CombineTo(St, TF);
28394
28395 // Add TF and its operands to the worklist.
28396 AddToWorklist(TF.getNode());
28397 for (const SDValue &Op : TF->ops())
28398 AddToWorklist(Op.getNode());
28399 AddToWorklist(STChain);
28400 return true;
28401}
28402
28403bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28404 if (OptLevel == CodeGenOptLevel::None)
28405 return false;
28406
28408
28409 // We must have a base and an offset.
28410 if (!BasePtr.getBase().getNode())
28411 return false;
28412
28413 // Do not handle stores to undef base pointers.
28414 if (BasePtr.getBase().isUndef())
28415 return false;
28416
28417 // Directly improve a chain of disjoint stores starting at St.
28418 if (parallelizeChainedStores(St))
28419 return true;
28420
28421 // Improve St's Chain..
28422 SDValue BetterChain = FindBetterChain(St, St->getChain());
28423 if (St->getChain() != BetterChain) {
28424 replaceStoreChain(St, BetterChain);
28425 return true;
28426 }
28427 return false;
28428}
28429
28430/// This is the entry point for the file.
28432 CodeGenOptLevel OptLevel) {
28433 /// This is the main entry point to this class.
28434 DAGCombiner(*this, AA, OptLevel).Run(Level);
28435}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
bool isNegative() const
Definition: APFloat.h:1295
bool isNormal() const
Definition: APFloat.h:1299
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1278
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isLargest() const
Definition: APFloat.h:1311
bool isIEEE() const
Definition: APFloat.h:1313
bool isInfinity() const
Definition: APFloat.h:1292
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:444
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
int32_t exactLogBase2() const
Definition: APInt.h:1725
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
unsigned logBase2() const
Definition: APInt.h:1703
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:361
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:479
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:908
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:229
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:251
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2178
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2183
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2188
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2193
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:368
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:270
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:488
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:374
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:821
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:663
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:759
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ TargetConstantFP
Definition: ISDOpcodes.h:159
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:360
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1311
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1198
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1014
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:261
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:658
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1360
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:280
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:945
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:832
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1147
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1019
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:314
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1496
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1606
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1581
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1601
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1422
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1492
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1492
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:966
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:869
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:586
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:887
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
Or< Preds... > m_AnyOf(Preds &&...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4504
constexpr double e
Definition: MathExtras.h:31
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1527
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:895
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1509
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1477
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:300
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:296
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:307
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...