LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
143 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
144 cl::Hidden, cl::init(false),
145 cl::desc("DAG combiner force override the narrowing profitable check when "
146 "reducing the width of load/op/store sequences"));
147
149 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
150 cl::desc("DAG combiner enable load/<replace bytes>/store with "
151 "a narrower store"));
152
153static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
154 cl::init(false),
155 cl::desc("Disable the DAG combiner"));
156
157namespace {
158
159 class DAGCombiner {
160 SelectionDAG &DAG;
161 const TargetLowering &TLI;
162 const SelectionDAGTargetInfo *STI;
164 CodeGenOptLevel OptLevel;
165 bool LegalDAG = false;
166 bool LegalOperations = false;
167 bool LegalTypes = false;
168 bool ForCodeSize;
169 bool DisableGenericCombines;
170
171 /// Worklist of all of the nodes that need to be simplified.
172 ///
173 /// This must behave as a stack -- new nodes to process are pushed onto the
174 /// back and when processing we pop off of the back.
175 ///
176 /// The worklist will not contain duplicates but may contain null entries
177 /// due to nodes being deleted from the underlying DAG. For fast lookup and
178 /// deduplication, the index of the node in this vector is stored in the
179 /// node in SDNode::CombinerWorklistIndex.
181
182 /// This records all nodes attempted to be added to the worklist since we
183 /// considered a new worklist entry. As we keep do not add duplicate nodes
184 /// in the worklist, this is different from the tail of the worklist.
186
187 /// Map from candidate StoreNode to the pair of RootNode and count.
188 /// The count is used to track how many times we have seen the StoreNode
189 /// with the same RootNode bail out in dependence check. If we have seen
190 /// the bail out for the same pair many times over a limit, we won't
191 /// consider the StoreNode with the same RootNode as store merging
192 /// candidate again.
194
195 // BatchAA - Used for DAG load/store alias analysis.
196 BatchAAResults *BatchAA;
197
198 /// This caches all chains that have already been processed in
199 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
200 /// stores candidates.
201 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
202
203 /// When an instruction is simplified, add all users of the instruction to
204 /// the work lists because they might get more simplified now.
205 void AddUsersToWorklist(SDNode *N) {
206 for (SDNode *Node : N->users())
207 AddToWorklist(Node);
208 }
209
210 /// Convenient shorthand to add a node and all of its user to the worklist.
211 void AddToWorklistWithUsers(SDNode *N) {
212 AddUsersToWorklist(N);
213 AddToWorklist(N);
214 }
215
216 // Prune potentially dangling nodes. This is called after
217 // any visit to a node, but should also be called during a visit after any
218 // failed combine which may have created a DAG node.
219 void clearAddedDanglingWorklistEntries() {
220 // Check any nodes added to the worklist to see if they are prunable.
221 while (!PruningList.empty()) {
222 auto *N = PruningList.pop_back_val();
223 if (N->use_empty())
224 recursivelyDeleteUnusedNodes(N);
225 }
226 }
227
228 SDNode *getNextWorklistEntry() {
229 // Before we do any work, remove nodes that are not in use.
230 clearAddedDanglingWorklistEntries();
231 SDNode *N = nullptr;
232 // The Worklist holds the SDNodes in order, but it may contain null
233 // entries.
234 while (!N && !Worklist.empty()) {
235 N = Worklist.pop_back_val();
236 }
237
238 if (N) {
239 assert(N->getCombinerWorklistIndex() >= 0 &&
240 "Found a worklist entry without a corresponding map entry!");
241 // Set to -2 to indicate that we combined the node.
242 N->setCombinerWorklistIndex(-2);
243 }
244 return N;
245 }
246
247 /// Call the node-specific routine that folds each particular type of node.
248 SDValue visit(SDNode *N);
249
250 public:
251 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
252 : DAG(D), TLI(D.getTargetLoweringInfo()),
253 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
254 BatchAA(BatchAA) {
255 ForCodeSize = DAG.shouldOptForSize();
256 DisableGenericCombines =
257 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
277 bool SkipIfCombinedBefore = false) {
278 assert(N->getOpcode() != ISD::DELETED_NODE &&
279 "Deleted Node added to Worklist");
280
281 // Skip handle nodes as they can't usefully be combined and confuse the
282 // zero-use deletion strategy.
283 if (N->getOpcode() == ISD::HANDLENODE)
284 return;
285
286 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
287 return;
288
289 if (IsCandidateForPruning)
290 ConsiderForPruning(N);
291
292 if (N->getCombinerWorklistIndex() < 0) {
293 N->setCombinerWorklistIndex(Worklist.size());
294 Worklist.push_back(N);
295 }
296 }
297
298 /// Remove all instances of N from the worklist.
299 void removeFromWorklist(SDNode *N) {
300 PruningList.remove(N);
301 StoreRootCountMap.erase(N);
302
303 int WorklistIndex = N->getCombinerWorklistIndex();
304 // If not in the worklist, the index might be -1 or -2 (was combined
305 // before). As the node gets deleted anyway, there's no need to update
306 // the index.
307 if (WorklistIndex < 0)
308 return; // Not in the worklist.
309
310 // Null out the entry rather than erasing it to avoid a linear operation.
311 Worklist[WorklistIndex] = nullptr;
312 N->setCombinerWorklistIndex(-1);
313 }
314
315 void deleteAndRecombine(SDNode *N);
316 bool recursivelyDeleteUnusedNodes(SDNode *N);
317
318 /// Replaces all uses of the results of one DAG node with new values.
319 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
320 bool AddTo = true);
321
322 /// Replaces all uses of the results of one DAG node with new values.
323 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
324 return CombineTo(N, &Res, 1, AddTo);
325 }
326
327 /// Replaces all uses of the results of one DAG node with new values.
328 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
329 bool AddTo = true) {
330 SDValue To[] = { Res0, Res1 };
331 return CombineTo(N, To, 2, AddTo);
332 }
333
334 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
335 bool AddTo = true) {
336 return CombineTo(N, To->data(), To->size(), AddTo);
337 }
338
339 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
340
341 private:
342 unsigned MaximumLegalStoreInBits;
343
344 /// Check the specified integer node value to see if it can be simplified or
345 /// if things it uses can be simplified by bit propagation.
346 /// If so, return true.
347 bool SimplifyDemandedBits(SDValue Op) {
348 unsigned BitWidth = Op.getScalarValueSizeInBits();
349 APInt DemandedBits = APInt::getAllOnes(BitWidth);
350 return SimplifyDemandedBits(Op, DemandedBits);
351 }
352
353 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
354 EVT VT = Op.getValueType();
355 APInt DemandedElts = VT.isFixedLengthVector()
357 : APInt(1, 1);
358 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
359 }
360
361 /// Check the specified vector node value to see if it can be simplified or
362 /// if things it uses can be simplified as it only uses some of the
363 /// elements. If so, return true.
364 bool SimplifyDemandedVectorElts(SDValue Op) {
365 // TODO: For now just pretend it cannot be simplified.
366 if (Op.getValueType().isScalableVector())
367 return false;
368
369 unsigned NumElts = Op.getValueType().getVectorNumElements();
370 APInt DemandedElts = APInt::getAllOnes(NumElts);
371 return SimplifyDemandedVectorElts(Op, DemandedElts);
372 }
373
374 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
375 const APInt &DemandedElts,
376 bool AssumeSingleUse = false);
377 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
378 bool AssumeSingleUse = false);
379
380 bool CombineToPreIndexedLoadStore(SDNode *N);
381 bool CombineToPostIndexedLoadStore(SDNode *N);
382 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
383 bool SliceUpLoad(SDNode *N);
384
385 // Looks up the chain to find a unique (unaliased) store feeding the passed
386 // load. If no such store is found, returns a nullptr.
387 // Note: This will look past a CALLSEQ_START if the load is chained to it so
388 // so that it can find stack stores for byval params.
389 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
390 // Scalars have size 0 to distinguish from singleton vectors.
391 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
392 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
393 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
394
395 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
396 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
397 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
398 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue PromoteIntBinOp(SDValue Op);
400 SDValue PromoteIntShiftOp(SDValue Op);
401 SDValue PromoteExtend(SDValue Op);
402 bool PromoteLoad(SDValue Op);
403
404 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407
408 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
409 SDValue RHS, SDValue True, SDValue False,
410 ISD::CondCode CC);
411
412 /// Call the node-specific routine that knows how to fold each
413 /// particular type of node. If that doesn't do anything, try the
414 /// target-specific DAG combines.
415 SDValue combine(SDNode *N);
416
417 // Visitation implementation - Implement dag node combining for different
418 // node types. The semantics are as follows:
419 // Return Value:
420 // SDValue.getNode() == 0 - No change was made
421 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
422 // otherwise - N should be replaced by the returned Operand.
423 //
424 SDValue visitTokenFactor(SDNode *N);
425 SDValue visitMERGE_VALUES(SDNode *N);
426 SDValue visitADD(SDNode *N);
427 SDValue visitADDLike(SDNode *N);
428 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
429 SDNode *LocReference);
430 SDValue visitPTRADD(SDNode *N);
431 SDValue visitSUB(SDNode *N);
432 SDValue visitADDSAT(SDNode *N);
433 SDValue visitSUBSAT(SDNode *N);
434 SDValue visitADDC(SDNode *N);
435 SDValue visitADDO(SDNode *N);
436 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
437 SDValue visitSUBC(SDNode *N);
438 SDValue visitSUBO(SDNode *N);
439 SDValue visitADDE(SDNode *N);
440 SDValue visitUADDO_CARRY(SDNode *N);
441 SDValue visitSADDO_CARRY(SDNode *N);
442 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
443 SDNode *N);
444 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
445 SDNode *N);
446 SDValue visitSUBE(SDNode *N);
447 SDValue visitUSUBO_CARRY(SDNode *N);
448 SDValue visitSSUBO_CARRY(SDNode *N);
449 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
450 SDValue visitMULFIX(SDNode *N);
451 SDValue useDivRem(SDNode *N);
452 SDValue visitSDIV(SDNode *N);
453 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
454 SDValue visitUDIV(SDNode *N);
455 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitREM(SDNode *N);
457 SDValue visitMULHU(SDNode *N);
458 SDValue visitMULHS(SDNode *N);
459 SDValue visitAVG(SDNode *N);
460 SDValue visitABD(SDNode *N);
461 SDValue visitSMUL_LOHI(SDNode *N);
462 SDValue visitUMUL_LOHI(SDNode *N);
463 SDValue visitMULO(SDNode *N);
464 SDValue visitIMINMAX(SDNode *N);
465 SDValue visitAND(SDNode *N);
466 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
467 SDValue visitOR(SDNode *N);
468 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
469 SDValue visitXOR(SDNode *N);
470 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
471 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
472 SDValue visitSHL(SDNode *N);
473 SDValue visitSRA(SDNode *N);
474 SDValue visitSRL(SDNode *N);
475 SDValue visitFunnelShift(SDNode *N);
476 SDValue visitSHLSAT(SDNode *N);
477 SDValue visitRotate(SDNode *N);
478 SDValue visitABS(SDNode *N);
479 SDValue visitBSWAP(SDNode *N);
480 SDValue visitBITREVERSE(SDNode *N);
481 SDValue visitCTLZ(SDNode *N);
482 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
483 SDValue visitCTTZ(SDNode *N);
484 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
485 SDValue visitCTPOP(SDNode *N);
486 SDValue visitSELECT(SDNode *N);
487 SDValue visitVSELECT(SDNode *N);
488 SDValue visitVP_SELECT(SDNode *N);
489 SDValue visitSELECT_CC(SDNode *N);
490 SDValue visitSETCC(SDNode *N);
491 SDValue visitSETCCCARRY(SDNode *N);
492 SDValue visitSIGN_EXTEND(SDNode *N);
493 SDValue visitZERO_EXTEND(SDNode *N);
494 SDValue visitANY_EXTEND(SDNode *N);
495 SDValue visitAssertExt(SDNode *N);
496 SDValue visitAssertAlign(SDNode *N);
497 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
498 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
499 SDValue visitTRUNCATE(SDNode *N);
500 SDValue visitTRUNCATE_USAT_U(SDNode *N);
501 SDValue visitBITCAST(SDNode *N);
502 SDValue visitFREEZE(SDNode *N);
503 SDValue visitBUILD_PAIR(SDNode *N);
504 SDValue visitFADD(SDNode *N);
505 SDValue visitVP_FADD(SDNode *N);
506 SDValue visitVP_FSUB(SDNode *N);
507 SDValue visitSTRICT_FADD(SDNode *N);
508 SDValue visitFSUB(SDNode *N);
509 SDValue visitFMUL(SDNode *N);
510 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
511 SDValue visitFMAD(SDNode *N);
512 SDValue visitFMULADD(SDNode *N);
513 SDValue visitFDIV(SDNode *N);
514 SDValue visitFREM(SDNode *N);
515 SDValue visitFSQRT(SDNode *N);
516 SDValue visitFCOPYSIGN(SDNode *N);
517 SDValue visitFPOW(SDNode *N);
518 SDValue visitFCANONICALIZE(SDNode *N);
519 SDValue visitSINT_TO_FP(SDNode *N);
520 SDValue visitUINT_TO_FP(SDNode *N);
521 SDValue visitFP_TO_SINT(SDNode *N);
522 SDValue visitFP_TO_UINT(SDNode *N);
523 SDValue visitXROUND(SDNode *N);
524 SDValue visitFP_ROUND(SDNode *N);
525 SDValue visitFP_EXTEND(SDNode *N);
526 SDValue visitFNEG(SDNode *N);
527 SDValue visitFABS(SDNode *N);
528 SDValue visitFCEIL(SDNode *N);
529 SDValue visitFTRUNC(SDNode *N);
530 SDValue visitFFREXP(SDNode *N);
531 SDValue visitFFLOOR(SDNode *N);
532 SDValue visitFMinMax(SDNode *N);
533 SDValue visitBRCOND(SDNode *N);
534 SDValue visitBR_CC(SDNode *N);
535 SDValue visitLOAD(SDNode *N);
536
537 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
538 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
539 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
540
541 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
542
543 SDValue visitSTORE(SDNode *N);
544 SDValue visitATOMIC_STORE(SDNode *N);
545 SDValue visitLIFETIME_END(SDNode *N);
546 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
547 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
548 SDValue visitBUILD_VECTOR(SDNode *N);
549 SDValue visitCONCAT_VECTORS(SDNode *N);
550 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
551 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
552 SDValue visitVECTOR_SHUFFLE(SDNode *N);
553 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
554 SDValue visitINSERT_SUBVECTOR(SDNode *N);
555 SDValue visitVECTOR_COMPRESS(SDNode *N);
556 SDValue visitMLOAD(SDNode *N);
557 SDValue visitMSTORE(SDNode *N);
558 SDValue visitMGATHER(SDNode *N);
559 SDValue visitMSCATTER(SDNode *N);
560 SDValue visitMHISTOGRAM(SDNode *N);
561 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
562 SDValue visitVPGATHER(SDNode *N);
563 SDValue visitVPSCATTER(SDNode *N);
564 SDValue visitVP_STRIDED_LOAD(SDNode *N);
565 SDValue visitVP_STRIDED_STORE(SDNode *N);
566 SDValue visitFP_TO_FP16(SDNode *N);
567 SDValue visitFP16_TO_FP(SDNode *N);
568 SDValue visitFP_TO_BF16(SDNode *N);
569 SDValue visitBF16_TO_FP(SDNode *N);
570 SDValue visitVECREDUCE(SDNode *N);
571 SDValue visitVPOp(SDNode *N);
572 SDValue visitGET_FPENV_MEM(SDNode *N);
573 SDValue visitSET_FPENV_MEM(SDNode *N);
574
575 template <class MatchContextClass>
576 SDValue visitFADDForFMACombine(SDNode *N);
577 template <class MatchContextClass>
578 SDValue visitFSUBForFMACombine(SDNode *N);
579 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
580
581 SDValue XformToShuffleWithZero(SDNode *N);
582 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
583 const SDLoc &DL,
584 SDNode *N,
585 SDValue N0,
586 SDValue N1);
587 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
588 SDValue N1, SDNodeFlags Flags);
589 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
590 SDValue N1, SDNodeFlags Flags);
591 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
592 EVT VT, SDValue N0, SDValue N1,
593 SDNodeFlags Flags = SDNodeFlags());
594
595 SDValue visitShiftByConstant(SDNode *N);
596
597 SDValue foldSelectOfConstants(SDNode *N);
598 SDValue foldVSelectOfConstants(SDNode *N);
599 SDValue foldBinOpIntoSelect(SDNode *BO);
600 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
601 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
602 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
603 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
604 SDValue N2, SDValue N3, ISD::CondCode CC,
605 bool NotExtCompare = false);
606 SDValue convertSelectOfFPConstantsToLoadOffset(
607 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
608 ISD::CondCode CC);
609 SDValue foldSignChangeInBitcast(SDNode *N);
610 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
611 SDValue N2, SDValue N3, ISD::CondCode CC);
612 SDValue foldSelectOfBinops(SDNode *N);
613 SDValue foldSextSetcc(SDNode *N);
614 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
615 const SDLoc &DL);
616 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
617 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
618 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
619 SDValue False, ISD::CondCode CC, const SDLoc &DL);
620 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
621 SDValue False, ISD::CondCode CC, const SDLoc &DL);
622 SDValue unfoldMaskedMerge(SDNode *N);
623 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
624 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
625 const SDLoc &DL, bool foldBooleans);
626 SDValue rebuildSetCC(SDValue N);
627
628 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
629 SDValue &CC, bool MatchStrict = false) const;
630 bool isOneUseSetCC(SDValue N) const;
631
632 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
633 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
634
635 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
636 unsigned HiOp);
637 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
638 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
639 const TargetLowering &TLI);
640 SDValue foldPartialReduceMLAMulOp(SDNode *N);
641 SDValue foldPartialReduceAdd(SDNode *N);
642
643 SDValue CombineExtLoad(SDNode *N);
644 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
645 SDValue combineRepeatedFPDivisors(SDNode *N);
646 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
647 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
648 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
650 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
651 SDValue BuildSDIV(SDNode *N);
652 SDValue BuildSDIVPow2(SDNode *N);
653 SDValue BuildUDIV(SDNode *N);
654 SDValue BuildSREMPow2(SDNode *N);
655 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
656 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
657 bool KnownNeverZero = false,
658 bool InexpensiveOnly = false,
659 std::optional<EVT> OutVT = std::nullopt);
660 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
661 SDValue buildRsqrtEstimate(SDValue Op);
662 SDValue buildSqrtEstimate(SDValue Op);
663 SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
664 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
665 bool Reciprocal);
666 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
667 bool Reciprocal);
668 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
669 bool DemandHighBits = true);
670 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
671 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
672 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
673 bool HasPos, unsigned PosOpcode,
674 unsigned NegOpcode, const SDLoc &DL);
675 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
676 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
677 bool HasPos, unsigned PosOpcode,
678 unsigned NegOpcode, const SDLoc &DL);
679 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
680 bool FromAdd);
681 SDValue MatchLoadCombine(SDNode *N);
682 SDValue mergeTruncStores(StoreSDNode *N);
683 SDValue reduceLoadWidth(SDNode *N);
684 SDValue ReduceLoadOpStoreWidth(SDNode *N);
685 SDValue splitMergedValStore(StoreSDNode *ST);
686 SDValue TransformFPLoadStorePair(SDNode *N);
687 SDValue convertBuildVecZextToZext(SDNode *N);
688 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
689 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
690 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
691 SDValue reduceBuildVecToShuffle(SDNode *N);
692 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
693 ArrayRef<int> VectorMask, SDValue VecIn1,
694 SDValue VecIn2, unsigned LeftIdx,
695 bool DidSplitVec);
696 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
697
698 /// Walk up chain skipping non-aliasing memory nodes,
699 /// looking for aliasing nodes and adding them to the Aliases vector.
700 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
701 SmallVectorImpl<SDValue> &Aliases);
702
703 /// Return true if there is any possibility that the two addresses overlap.
704 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
705
706 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
707 /// chain (aliasing node.)
708 SDValue FindBetterChain(SDNode *N, SDValue Chain);
709
710 /// Try to replace a store and any possibly adjacent stores on
711 /// consecutive chains with better chains. Return true only if St is
712 /// replaced.
713 ///
714 /// Notice that other chains may still be replaced even if the function
715 /// returns false.
716 bool findBetterNeighborChains(StoreSDNode *St);
717
718 // Helper for findBetterNeighborChains. Walk up store chain add additional
719 // chained stores that do not overlap and can be parallelized.
720 bool parallelizeChainedStores(StoreSDNode *St);
721
722 /// Holds a pointer to an LSBaseSDNode as well as information on where it
723 /// is located in a sequence of memory operations connected by a chain.
724 struct MemOpLink {
725 // Ptr to the mem node.
726 LSBaseSDNode *MemNode;
727
728 // Offset from the base ptr.
729 int64_t OffsetFromBase;
730
731 MemOpLink(LSBaseSDNode *N, int64_t Offset)
732 : MemNode(N), OffsetFromBase(Offset) {}
733 };
734
735 // Classify the origin of a stored value.
736 enum class StoreSource { Unknown, Constant, Extract, Load };
737 StoreSource getStoreSource(SDValue StoreVal) {
738 switch (StoreVal.getOpcode()) {
739 case ISD::Constant:
740 case ISD::ConstantFP:
741 return StoreSource::Constant;
745 return StoreSource::Constant;
746 return StoreSource::Unknown;
749 return StoreSource::Extract;
750 case ISD::LOAD:
751 return StoreSource::Load;
752 default:
753 return StoreSource::Unknown;
754 }
755 }
756
757 /// This is a helper function for visitMUL to check the profitability
758 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
759 /// MulNode is the original multiply, AddNode is (add x, c1),
760 /// and ConstNode is c2.
761 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
762 SDValue ConstNode);
763
764 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
765 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
766 /// the type of the loaded value to be extended.
767 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
768 EVT LoadResultTy, EVT &ExtVT);
769
770 /// Helper function to calculate whether the given Load/Store can have its
771 /// width reduced to ExtVT.
772 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
773 EVT &MemVT, unsigned ShAmt = 0);
774
775 /// Used by BackwardsPropagateMask to find suitable loads.
776 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
777 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
778 ConstantSDNode *Mask, SDNode *&NodeToMask);
779 /// Attempt to propagate a given AND node back to load leaves so that they
780 /// can be combined into narrow loads.
781 bool BackwardsPropagateMask(SDNode *N);
782
783 /// Helper function for mergeConsecutiveStores which merges the component
784 /// store chains.
785 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
786 unsigned NumStores);
787
788 /// Helper function for mergeConsecutiveStores which checks if all the store
789 /// nodes have the same underlying object. We can still reuse the first
790 /// store's pointer info if all the stores are from the same object.
791 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
792
793 /// This is a helper function for mergeConsecutiveStores. When the source
794 /// elements of the consecutive stores are all constants or all extracted
795 /// vector elements, try to merge them into one larger store introducing
796 /// bitcasts if necessary. \return True if a merged store was created.
797 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
798 EVT MemVT, unsigned NumStores,
799 bool IsConstantSrc, bool UseVector,
800 bool UseTrunc);
801
802 /// This is a helper function for mergeConsecutiveStores. Stores that
803 /// potentially may be merged with St are placed in StoreNodes. On success,
804 /// returns a chain predecessor to all store candidates.
805 SDNode *getStoreMergeCandidates(StoreSDNode *St,
806 SmallVectorImpl<MemOpLink> &StoreNodes);
807
808 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
809 /// have indirect dependency through their operands. RootNode is the
810 /// predecessor to all stores calculated by getStoreMergeCandidates and is
811 /// used to prune the dependency check. \return True if safe to merge.
812 bool checkMergeStoreCandidatesForDependencies(
813 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
814 SDNode *RootNode);
815
816 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
817 /// chain has a call in it. \return True if a call is found.
818 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
819
820 /// This is a helper function for mergeConsecutiveStores. Given a list of
821 /// store candidates, find the first N that are consecutive in memory.
822 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
823 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
824 int64_t ElementSizeBytes) const;
825
826 /// This is a helper function for mergeConsecutiveStores. It is used for
827 /// store chains that are composed entirely of constant values.
828 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
829 unsigned NumConsecutiveStores,
830 EVT MemVT, SDNode *Root, bool AllowVectors);
831
832 /// This is a helper function for mergeConsecutiveStores. It is used for
833 /// store chains that are composed entirely of extracted vector elements.
834 /// When extracting multiple vector elements, try to store them in one
835 /// vector store rather than a sequence of scalar stores.
836 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
837 unsigned NumConsecutiveStores, EVT MemVT,
838 SDNode *Root);
839
840 /// This is a helper function for mergeConsecutiveStores. It is used for
841 /// store chains that are composed entirely of loaded values.
842 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
843 unsigned NumConsecutiveStores, EVT MemVT,
844 SDNode *Root, bool AllowVectors,
845 bool IsNonTemporalStore, bool IsNonTemporalLoad);
846
847 /// Merge consecutive store operations into a wide store.
848 /// This optimization uses wide integers or vectors when possible.
849 /// \return true if stores were merged.
850 bool mergeConsecutiveStores(StoreSDNode *St);
851
852 /// Try to transform a truncation where C is a constant:
853 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
854 ///
855 /// \p N needs to be a truncation and its first operand an AND. Other
856 /// requirements are checked by the function (e.g. that trunc is
857 /// single-use) and if missed an empty SDValue is returned.
858 SDValue distributeTruncateThroughAnd(SDNode *N);
859
860 /// Helper function to determine whether the target supports operation
861 /// given by \p Opcode for type \p VT, that is, whether the operation
862 /// is legal or custom before legalizing operations, and whether is
863 /// legal (but not custom) after legalization.
864 bool hasOperation(unsigned Opcode, EVT VT) {
865 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
866 }
867
868 bool hasUMin(EVT VT) const {
869 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
870 return (LK.first == TargetLoweringBase::TypeLegal ||
872 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
873 }
874
875 public:
876 /// Runs the dag combiner on all nodes in the work list
877 void Run(CombineLevel AtLevel);
878
879 SelectionDAG &getDAG() const { return DAG; }
880
881 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
882 EVT getShiftAmountTy(EVT LHSTy) {
883 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
884 }
885
886 /// This method returns true if we are running before type legalization or
887 /// if the specified VT is legal.
888 bool isTypeLegal(const EVT &VT) {
889 if (!LegalTypes) return true;
890 return TLI.isTypeLegal(VT);
891 }
892
893 /// Convenience wrapper around TargetLowering::getSetCCResultType
894 EVT getSetCCResultType(EVT VT) const {
895 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
896 }
897
898 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
899 SDValue OrigLoad, SDValue ExtLoad,
900 ISD::NodeType ExtType);
901 };
902
903/// This class is a DAGUpdateListener that removes any deleted
904/// nodes from the worklist.
905class WorklistRemover : public SelectionDAG::DAGUpdateListener {
906 DAGCombiner &DC;
907
908public:
909 explicit WorklistRemover(DAGCombiner &dc)
910 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
911
912 void NodeDeleted(SDNode *N, SDNode *E) override {
913 DC.removeFromWorklist(N);
914 }
915};
916
917class WorklistInserter : public SelectionDAG::DAGUpdateListener {
918 DAGCombiner &DC;
919
920public:
921 explicit WorklistInserter(DAGCombiner &dc)
922 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
923
924 // FIXME: Ideally we could add N to the worklist, but this causes exponential
925 // compile time costs in large DAGs, e.g. Halide.
926 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
927};
928
929} // end anonymous namespace
930
931//===----------------------------------------------------------------------===//
932// TargetLowering::DAGCombinerInfo implementation
933//===----------------------------------------------------------------------===//
934
936 ((DAGCombiner*)DC)->AddToWorklist(N);
937}
938
940CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
941 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
942}
943
945CombineTo(SDNode *N, SDValue Res, bool AddTo) {
946 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
947}
948
950CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
951 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
952}
953
956 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
957}
958
961 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
962}
963
964//===----------------------------------------------------------------------===//
965// Helper Functions
966//===----------------------------------------------------------------------===//
967
968void DAGCombiner::deleteAndRecombine(SDNode *N) {
969 removeFromWorklist(N);
970
971 // If the operands of this node are only used by the node, they will now be
972 // dead. Make sure to re-visit them and recursively delete dead nodes.
973 for (const SDValue &Op : N->ops())
974 // For an operand generating multiple values, one of the values may
975 // become dead allowing further simplification (e.g. split index
976 // arithmetic from an indexed load).
977 if (Op->hasOneUse() || Op->getNumValues() > 1)
978 AddToWorklist(Op.getNode());
979
980 DAG.DeleteNode(N);
981}
982
983// APInts must be the same size for most operations, this helper
984// function zero extends the shorter of the pair so that they match.
985// We provide an Offset so that we can create bitwidths that won't overflow.
986static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
987 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
988 LHS = LHS.zext(Bits);
989 RHS = RHS.zext(Bits);
990}
991
992// Return true if this node is a setcc, or is a select_cc
993// that selects between the target values used for true and false, making it
994// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
995// the appropriate nodes based on the type of node we are checking. This
996// simplifies life a bit for the callers.
997bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
998 SDValue &CC, bool MatchStrict) const {
999 if (N.getOpcode() == ISD::SETCC) {
1000 LHS = N.getOperand(0);
1001 RHS = N.getOperand(1);
1002 CC = N.getOperand(2);
1003 return true;
1004 }
1005
1006 if (MatchStrict &&
1007 (N.getOpcode() == ISD::STRICT_FSETCC ||
1008 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1009 LHS = N.getOperand(1);
1010 RHS = N.getOperand(2);
1011 CC = N.getOperand(3);
1012 return true;
1013 }
1014
1015 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1016 !TLI.isConstFalseVal(N.getOperand(3)))
1017 return false;
1018
1019 if (TLI.getBooleanContents(N.getValueType()) ==
1021 return false;
1022
1023 LHS = N.getOperand(0);
1024 RHS = N.getOperand(1);
1025 CC = N.getOperand(4);
1026 return true;
1027}
1028
1029/// Return true if this is a SetCC-equivalent operation with only one use.
1030/// If this is true, it allows the users to invert the operation for free when
1031/// it is profitable to do so.
1032bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1033 SDValue N0, N1, N2;
1034 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1035 return true;
1036 return false;
1037}
1038
1040 if (!ScalarTy.isSimple())
1041 return false;
1042
1043 uint64_t MaskForTy = 0ULL;
1044 switch (ScalarTy.getSimpleVT().SimpleTy) {
1045 case MVT::i8:
1046 MaskForTy = 0xFFULL;
1047 break;
1048 case MVT::i16:
1049 MaskForTy = 0xFFFFULL;
1050 break;
1051 case MVT::i32:
1052 MaskForTy = 0xFFFFFFFFULL;
1053 break;
1054 default:
1055 return false;
1056 break;
1057 }
1058
1059 APInt Val;
1060 if (ISD::isConstantSplatVector(N, Val))
1061 return Val.getLimitedValue() == MaskForTy;
1062
1063 return false;
1064}
1065
1066// Determines if it is a constant integer or a splat/build vector of constant
1067// integers (and undefs).
1068// Do not permit build vector implicit truncation.
1069static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1071 return !(Const->isOpaque() && NoOpaques);
1072 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1073 return false;
1074 unsigned BitWidth = N.getScalarValueSizeInBits();
1075 for (const SDValue &Op : N->op_values()) {
1076 if (Op.isUndef())
1077 continue;
1079 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1080 (Const->isOpaque() && NoOpaques))
1081 return false;
1082 }
1083 return true;
1084}
1085
1086// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1087// undef's.
1088static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1089 if (V.getOpcode() != ISD::BUILD_VECTOR)
1090 return false;
1091 return isConstantOrConstantVector(V, NoOpaques) ||
1093}
1094
1095// Determine if this an indexed load with an opaque target constant index.
1096static bool canSplitIdx(LoadSDNode *LD) {
1097 return MaySplitLoadIndex &&
1098 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1099 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1100}
1101
1102bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1103 const SDLoc &DL,
1104 SDNode *N,
1105 SDValue N0,
1106 SDValue N1) {
1107 // Currently this only tries to ensure we don't undo the GEP splits done by
1108 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1109 // we check if the following transformation would be problematic:
1110 // (load/store (add, (add, x, offset1), offset2)) ->
1111 // (load/store (add, x, offset1+offset2)).
1112
1113 // (load/store (add, (add, x, y), offset2)) ->
1114 // (load/store (add, (add, x, offset2), y)).
1115
1116 if (!N0.isAnyAdd())
1117 return false;
1118
1119 // Check for vscale addressing modes.
1120 // (load/store (add/sub (add x, y), vscale))
1121 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1122 // (load/store (add/sub (add x, y), (mul vscale, C)))
1123 if ((N1.getOpcode() == ISD::VSCALE ||
1124 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1125 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1127 N1.getValueType().getFixedSizeInBits() <= 64) {
1128 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1129 ? N1.getConstantOperandVal(0)
1130 : (N1.getOperand(0).getConstantOperandVal(0) *
1131 (N1.getOpcode() == ISD::SHL
1132 ? (1LL << N1.getConstantOperandVal(1))
1133 : N1.getConstantOperandVal(1)));
1134 if (Opc == ISD::SUB)
1135 ScalableOffset = -ScalableOffset;
1136 if (all_of(N->users(), [&](SDNode *Node) {
1137 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1138 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1139 TargetLoweringBase::AddrMode AM;
1140 AM.HasBaseReg = true;
1141 AM.ScalableOffset = ScalableOffset;
1142 EVT VT = LoadStore->getMemoryVT();
1143 unsigned AS = LoadStore->getAddressSpace();
1144 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1145 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1146 AS);
1147 }
1148 return false;
1149 }))
1150 return true;
1151 }
1152
1153 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1154 return false;
1155
1156 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1157 if (!C2)
1158 return false;
1159
1160 const APInt &C2APIntVal = C2->getAPIntValue();
1161 if (C2APIntVal.getSignificantBits() > 64)
1162 return false;
1163
1164 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1165 if (N0.hasOneUse())
1166 return false;
1167
1168 const APInt &C1APIntVal = C1->getAPIntValue();
1169 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1170 if (CombinedValueIntVal.getSignificantBits() > 64)
1171 return false;
1172 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1173
1174 for (SDNode *Node : N->users()) {
1175 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1176 // Is x[offset2] already not a legal addressing mode? If so then
1177 // reassociating the constants breaks nothing (we test offset2 because
1178 // that's the one we hope to fold into the load or store).
1179 TargetLoweringBase::AddrMode AM;
1180 AM.HasBaseReg = true;
1181 AM.BaseOffs = C2APIntVal.getSExtValue();
1182 EVT VT = LoadStore->getMemoryVT();
1183 unsigned AS = LoadStore->getAddressSpace();
1184 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1185 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1186 continue;
1187
1188 // Would x[offset1+offset2] still be a legal addressing mode?
1189 AM.BaseOffs = CombinedValue;
1190 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1191 return true;
1192 }
1193 }
1194 } else {
1195 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1196 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1197 return false;
1198
1199 for (SDNode *Node : N->users()) {
1200 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1201 if (!LoadStore)
1202 return false;
1203
1204 // Is x[offset2] a legal addressing mode? If so then
1205 // reassociating the constants breaks address pattern
1206 TargetLoweringBase::AddrMode AM;
1207 AM.HasBaseReg = true;
1208 AM.BaseOffs = C2APIntVal.getSExtValue();
1209 EVT VT = LoadStore->getMemoryVT();
1210 unsigned AS = LoadStore->getAddressSpace();
1211 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1212 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1213 return false;
1214 }
1215 return true;
1216 }
1217
1218 return false;
1219}
1220
1221/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1222/// \p N0 is the same kind of operation as \p Opc.
1223SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1224 SDValue N0, SDValue N1,
1225 SDNodeFlags Flags) {
1226 EVT VT = N0.getValueType();
1227
1228 if (N0.getOpcode() != Opc)
1229 return SDValue();
1230
1231 SDValue N00 = N0.getOperand(0);
1232 SDValue N01 = N0.getOperand(1);
1233
1235 SDNodeFlags NewFlags;
1236 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1237 Flags.hasNoUnsignedWrap())
1238 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1239
1241 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1242 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1243 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1244 N0->getFlags().hasDisjoint());
1245 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1246 }
1247 return SDValue();
1248 }
1249 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1250 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1251 // iff (op x, c1) has one use
1252 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1253 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1254 }
1255 }
1256
1257 // Check for repeated operand logic simplifications.
1258 if (Opc == ISD::AND || Opc == ISD::OR) {
1259 // (N00 & N01) & N00 --> N00 & N01
1260 // (N00 & N01) & N01 --> N00 & N01
1261 // (N00 | N01) | N00 --> N00 | N01
1262 // (N00 | N01) | N01 --> N00 | N01
1263 if (N1 == N00 || N1 == N01)
1264 return N0;
1265 }
1266 if (Opc == ISD::XOR) {
1267 // (N00 ^ N01) ^ N00 --> N01
1268 if (N1 == N00)
1269 return N01;
1270 // (N00 ^ N01) ^ N01 --> N00
1271 if (N1 == N01)
1272 return N00;
1273 }
1274
1275 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1276 if (N1 != N01) {
1277 // Reassociate if (op N00, N1) already exist
1278 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1279 // if Op (Op N00, N1), N01 already exist
1280 // we need to stop reassciate to avoid dead loop
1281 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1282 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1283 }
1284 }
1285
1286 if (N1 != N00) {
1287 // Reassociate if (op N01, N1) already exist
1288 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1289 // if Op (Op N01, N1), N00 already exist
1290 // we need to stop reassciate to avoid dead loop
1291 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1292 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1293 }
1294 }
1295
1296 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1297 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1298 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1299 // comparisons with the same predicate. This enables optimizations as the
1300 // following one:
1301 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1302 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1303 if (Opc == ISD::AND || Opc == ISD::OR) {
1304 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1305 N01->getOpcode() == ISD::SETCC) {
1306 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1307 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1308 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1309 if (CC1 == CC00 && CC1 != CC01) {
1310 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1311 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1312 }
1313 if (CC1 == CC01 && CC1 != CC00) {
1314 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1315 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1316 }
1317 }
1318 }
1319 }
1320
1321 return SDValue();
1322}
1323
1324/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1325/// same kind of operation as \p Opc.
1326SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1327 SDValue N1, SDNodeFlags Flags) {
1328 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1329
1330 // Floating-point reassociation is not allowed without loose FP math.
1331 if (N0.getValueType().isFloatingPoint() ||
1333 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1334 return SDValue();
1335
1336 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1337 return Combined;
1338 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1339 return Combined;
1340 return SDValue();
1341}
1342
1343// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1344// Note that we only expect Flags to be passed from FP operations. For integer
1345// operations they need to be dropped.
1346SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1347 const SDLoc &DL, EVT VT, SDValue N0,
1348 SDValue N1, SDNodeFlags Flags) {
1349 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1350 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1351 N0->hasOneUse() && N1->hasOneUse() &&
1353 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1354 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1355 return DAG.getNode(RedOpc, DL, VT,
1356 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1357 N0.getOperand(0), N1.getOperand(0)));
1358 }
1359
1360 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1361 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1362 // single node.
1363 SDValue A, B, C, D, RedA, RedB;
1364 if (sd_match(N0, m_OneUse(m_c_BinOp(
1365 Opc,
1366 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1367 m_Value(RedA)),
1368 m_Value(B)))) &&
1370 Opc,
1371 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1372 m_Value(RedB)),
1373 m_Value(D)))) &&
1374 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1375 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1376 A.getValueType() == C.getValueType() &&
1377 hasOperation(Opc, A.getValueType()) &&
1378 TLI.shouldReassociateReduction(RedOpc, VT)) {
1379 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1380 (!N0->getFlags().hasAllowReassociation() ||
1382 !RedA->getFlags().hasAllowReassociation() ||
1383 !RedB->getFlags().hasAllowReassociation()))
1384 return SDValue();
1385 SelectionDAG::FlagInserter FlagsInserter(
1386 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1387 RedB->getFlags());
1388 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1389 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1390 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1391 return DAG.getNode(Opc, DL, VT, Red, Op2);
1392 }
1393 return SDValue();
1394}
1395
1396SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1397 bool AddTo) {
1398 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1399 ++NodesCombined;
1400 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1401 To[0].dump(&DAG);
1402 dbgs() << " and " << NumTo - 1 << " other values\n");
1403 for (unsigned i = 0, e = NumTo; i != e; ++i)
1404 assert((!To[i].getNode() ||
1405 N->getValueType(i) == To[i].getValueType()) &&
1406 "Cannot combine value to value of different type!");
1407
1408 WorklistRemover DeadNodes(*this);
1409 DAG.ReplaceAllUsesWith(N, To);
1410 if (AddTo) {
1411 // Push the new nodes and any users onto the worklist
1412 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1413 if (To[i].getNode())
1414 AddToWorklistWithUsers(To[i].getNode());
1415 }
1416 }
1417
1418 // Finally, if the node is now dead, remove it from the graph. The node
1419 // may not be dead if the replacement process recursively simplified to
1420 // something else needing this node.
1421 if (N->use_empty())
1422 deleteAndRecombine(N);
1423 return SDValue(N, 0);
1424}
1425
1426void DAGCombiner::
1427CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1428 // Replace the old value with the new one.
1429 ++NodesCombined;
1430 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1431 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1432
1433 // Replace all uses.
1434 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1435
1436 // Push the new node and any (possibly new) users onto the worklist.
1437 AddToWorklistWithUsers(TLO.New.getNode());
1438
1439 // Finally, if the node is now dead, remove it from the graph.
1440 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1441}
1442
1443/// Check the specified integer node value to see if it can be simplified or if
1444/// things it uses can be simplified by bit propagation. If so, return true.
1445bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1446 const APInt &DemandedElts,
1447 bool AssumeSingleUse) {
1448 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1449 KnownBits Known;
1450 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1451 AssumeSingleUse))
1452 return false;
1453
1454 // Revisit the node.
1455 AddToWorklist(Op.getNode());
1456
1457 CommitTargetLoweringOpt(TLO);
1458 return true;
1459}
1460
1461/// Check the specified vector node value to see if it can be simplified or
1462/// if things it uses can be simplified as it only uses some of the elements.
1463/// If so, return true.
1464bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1465 const APInt &DemandedElts,
1466 bool AssumeSingleUse) {
1467 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1468 APInt KnownUndef, KnownZero;
1469 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1470 TLO, 0, AssumeSingleUse))
1471 return false;
1472
1473 // Revisit the node.
1474 AddToWorklist(Op.getNode());
1475
1476 CommitTargetLoweringOpt(TLO);
1477 return true;
1478}
1479
1480void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1481 SDLoc DL(Load);
1482 EVT VT = Load->getValueType(0);
1483 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1484
1485 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1486 Trunc.dump(&DAG); dbgs() << '\n');
1487
1488 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1489 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1490
1491 AddToWorklist(Trunc.getNode());
1492 recursivelyDeleteUnusedNodes(Load);
1493}
1494
1495SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1496 Replace = false;
1497 SDLoc DL(Op);
1498 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1499 LoadSDNode *LD = cast<LoadSDNode>(Op);
1500 EVT MemVT = LD->getMemoryVT();
1502 : LD->getExtensionType();
1503 Replace = true;
1504 return DAG.getExtLoad(ExtType, DL, PVT,
1505 LD->getChain(), LD->getBasePtr(),
1506 MemVT, LD->getMemOperand());
1507 }
1508
1509 unsigned Opc = Op.getOpcode();
1510 switch (Opc) {
1511 default: break;
1512 case ISD::AssertSext:
1513 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1514 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1515 break;
1516 case ISD::AssertZext:
1517 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1518 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1519 break;
1520 case ISD::Constant: {
1521 unsigned ExtOpc =
1522 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1523 return DAG.getNode(ExtOpc, DL, PVT, Op);
1524 }
1525 }
1526
1527 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1528 return SDValue();
1529 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1530}
1531
1532SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1534 return SDValue();
1535 EVT OldVT = Op.getValueType();
1536 SDLoc DL(Op);
1537 bool Replace = false;
1538 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1539 if (!NewOp.getNode())
1540 return SDValue();
1541 AddToWorklist(NewOp.getNode());
1542
1543 if (Replace)
1544 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1545 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1546 DAG.getValueType(OldVT));
1547}
1548
1549SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1550 EVT OldVT = Op.getValueType();
1551 SDLoc DL(Op);
1552 bool Replace = false;
1553 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1554 if (!NewOp.getNode())
1555 return SDValue();
1556 AddToWorklist(NewOp.getNode());
1557
1558 if (Replace)
1559 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1560 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1561}
1562
1563/// Promote the specified integer binary operation if the target indicates it is
1564/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1565/// i32 since i16 instructions are longer.
1566SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1567 if (!LegalOperations)
1568 return SDValue();
1569
1570 EVT VT = Op.getValueType();
1571 if (VT.isVector() || !VT.isInteger())
1572 return SDValue();
1573
1574 // If operation type is 'undesirable', e.g. i16 on x86, consider
1575 // promoting it.
1576 unsigned Opc = Op.getOpcode();
1577 if (TLI.isTypeDesirableForOp(Opc, VT))
1578 return SDValue();
1579
1580 EVT PVT = VT;
1581 // Consult target whether it is a good idea to promote this operation and
1582 // what's the right type to promote it to.
1583 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1584 assert(PVT != VT && "Don't know what type to promote to!");
1585
1586 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1587
1588 bool Replace0 = false;
1589 SDValue N0 = Op.getOperand(0);
1590 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1591
1592 bool Replace1 = false;
1593 SDValue N1 = Op.getOperand(1);
1594 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1595 SDLoc DL(Op);
1596
1597 SDValue RV =
1598 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1599
1600 // We are always replacing N0/N1's use in N and only need additional
1601 // replacements if there are additional uses.
1602 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1603 // (SDValue) here because the node may reference multiple values
1604 // (for example, the chain value of a load node).
1605 Replace0 &= !N0->hasOneUse();
1606 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1607
1608 // Combine Op here so it is preserved past replacements.
1609 CombineTo(Op.getNode(), RV);
1610
1611 // If operands have a use ordering, make sure we deal with
1612 // predecessor first.
1613 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1614 std::swap(N0, N1);
1615 std::swap(NN0, NN1);
1616 }
1617
1618 if (Replace0) {
1619 AddToWorklist(NN0.getNode());
1620 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1621 }
1622 if (Replace1) {
1623 AddToWorklist(NN1.getNode());
1624 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1625 }
1626 return Op;
1627 }
1628 return SDValue();
1629}
1630
1631/// Promote the specified integer shift operation if the target indicates it is
1632/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1633/// i32 since i16 instructions are longer.
1634SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1635 if (!LegalOperations)
1636 return SDValue();
1637
1638 EVT VT = Op.getValueType();
1639 if (VT.isVector() || !VT.isInteger())
1640 return SDValue();
1641
1642 // If operation type is 'undesirable', e.g. i16 on x86, consider
1643 // promoting it.
1644 unsigned Opc = Op.getOpcode();
1645 if (TLI.isTypeDesirableForOp(Opc, VT))
1646 return SDValue();
1647
1648 EVT PVT = VT;
1649 // Consult target whether it is a good idea to promote this operation and
1650 // what's the right type to promote it to.
1651 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1652 assert(PVT != VT && "Don't know what type to promote to!");
1653
1654 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1655
1656 bool Replace = false;
1657 SDValue N0 = Op.getOperand(0);
1658 if (Opc == ISD::SRA)
1659 N0 = SExtPromoteOperand(N0, PVT);
1660 else if (Opc == ISD::SRL)
1661 N0 = ZExtPromoteOperand(N0, PVT);
1662 else
1663 N0 = PromoteOperand(N0, PVT, Replace);
1664
1665 if (!N0.getNode())
1666 return SDValue();
1667
1668 SDLoc DL(Op);
1669 SDValue N1 = Op.getOperand(1);
1670 SDValue RV =
1671 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1672
1673 if (Replace)
1674 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1675
1676 // Deal with Op being deleted.
1677 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1678 return RV;
1679 }
1680 return SDValue();
1681}
1682
1683SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1684 if (!LegalOperations)
1685 return SDValue();
1686
1687 EVT VT = Op.getValueType();
1688 if (VT.isVector() || !VT.isInteger())
1689 return SDValue();
1690
1691 // If operation type is 'undesirable', e.g. i16 on x86, consider
1692 // promoting it.
1693 unsigned Opc = Op.getOpcode();
1694 if (TLI.isTypeDesirableForOp(Opc, VT))
1695 return SDValue();
1696
1697 EVT PVT = VT;
1698 // Consult target whether it is a good idea to promote this operation and
1699 // what's the right type to promote it to.
1700 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1701 assert(PVT != VT && "Don't know what type to promote to!");
1702 // fold (aext (aext x)) -> (aext x)
1703 // fold (aext (zext x)) -> (zext x)
1704 // fold (aext (sext x)) -> (sext x)
1705 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1706 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1707 }
1708 return SDValue();
1709}
1710
1711bool DAGCombiner::PromoteLoad(SDValue Op) {
1712 if (!LegalOperations)
1713 return false;
1714
1715 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1716 return false;
1717
1718 EVT VT = Op.getValueType();
1719 if (VT.isVector() || !VT.isInteger())
1720 return false;
1721
1722 // If operation type is 'undesirable', e.g. i16 on x86, consider
1723 // promoting it.
1724 unsigned Opc = Op.getOpcode();
1725 if (TLI.isTypeDesirableForOp(Opc, VT))
1726 return false;
1727
1728 EVT PVT = VT;
1729 // Consult target whether it is a good idea to promote this operation and
1730 // what's the right type to promote it to.
1731 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1732 assert(PVT != VT && "Don't know what type to promote to!");
1733
1734 SDLoc DL(Op);
1735 SDNode *N = Op.getNode();
1736 LoadSDNode *LD = cast<LoadSDNode>(N);
1737 EVT MemVT = LD->getMemoryVT();
1739 : LD->getExtensionType();
1740 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1741 LD->getChain(), LD->getBasePtr(),
1742 MemVT, LD->getMemOperand());
1743 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1744
1745 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1746 Result.dump(&DAG); dbgs() << '\n');
1747
1748 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1749 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1750
1751 AddToWorklist(Result.getNode());
1752 recursivelyDeleteUnusedNodes(N);
1753 return true;
1754 }
1755
1756 return false;
1757}
1758
1759/// Recursively delete a node which has no uses and any operands for
1760/// which it is the only use.
1761///
1762/// Note that this both deletes the nodes and removes them from the worklist.
1763/// It also adds any nodes who have had a user deleted to the worklist as they
1764/// may now have only one use and subject to other combines.
1765bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1766 if (!N->use_empty())
1767 return false;
1768
1769 SmallSetVector<SDNode *, 16> Nodes;
1770 Nodes.insert(N);
1771 do {
1772 N = Nodes.pop_back_val();
1773 if (!N)
1774 continue;
1775
1776 if (N->use_empty()) {
1777 for (const SDValue &ChildN : N->op_values())
1778 Nodes.insert(ChildN.getNode());
1779
1780 removeFromWorklist(N);
1781 DAG.DeleteNode(N);
1782 } else {
1783 AddToWorklist(N);
1784 }
1785 } while (!Nodes.empty());
1786 return true;
1787}
1788
1789//===----------------------------------------------------------------------===//
1790// Main DAG Combiner implementation
1791//===----------------------------------------------------------------------===//
1792
1793void DAGCombiner::Run(CombineLevel AtLevel) {
1794 // set the instance variables, so that the various visit routines may use it.
1795 Level = AtLevel;
1796 LegalDAG = Level >= AfterLegalizeDAG;
1797 LegalOperations = Level >= AfterLegalizeVectorOps;
1798 LegalTypes = Level >= AfterLegalizeTypes;
1799
1800 WorklistInserter AddNodes(*this);
1801
1802 // Add all the dag nodes to the worklist.
1803 //
1804 // Note: All nodes are not added to PruningList here, this is because the only
1805 // nodes which can be deleted are those which have no uses and all other nodes
1806 // which would otherwise be added to the worklist by the first call to
1807 // getNextWorklistEntry are already present in it.
1808 for (SDNode &Node : DAG.allnodes())
1809 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1810
1811 // Create a dummy node (which is not added to allnodes), that adds a reference
1812 // to the root node, preventing it from being deleted, and tracking any
1813 // changes of the root.
1814 HandleSDNode Dummy(DAG.getRoot());
1815
1816 // While we have a valid worklist entry node, try to combine it.
1817 while (SDNode *N = getNextWorklistEntry()) {
1818 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1819 // N is deleted from the DAG, since they too may now be dead or may have a
1820 // reduced number of uses, allowing other xforms.
1821 if (recursivelyDeleteUnusedNodes(N))
1822 continue;
1823
1824 WorklistRemover DeadNodes(*this);
1825
1826 // If this combine is running after legalizing the DAG, re-legalize any
1827 // nodes pulled off the worklist.
1828 if (LegalDAG) {
1829 SmallSetVector<SDNode *, 16> UpdatedNodes;
1830 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1831
1832 for (SDNode *LN : UpdatedNodes)
1833 AddToWorklistWithUsers(LN);
1834
1835 if (!NIsValid)
1836 continue;
1837 }
1838
1839 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1840
1841 // Add any operands of the new node which have not yet been combined to the
1842 // worklist as well. getNextWorklistEntry flags nodes that have been
1843 // combined before. Because the worklist uniques things already, this won't
1844 // repeatedly process the same operand.
1845 for (const SDValue &ChildN : N->op_values())
1846 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1847 /*SkipIfCombinedBefore=*/true);
1848
1849 SDValue RV = combine(N);
1850
1851 if (!RV.getNode())
1852 continue;
1853
1854 ++NodesCombined;
1855
1856 // Invalidate cached info.
1857 ChainsWithoutMergeableStores.clear();
1858
1859 // If we get back the same node we passed in, rather than a new node or
1860 // zero, we know that the node must have defined multiple values and
1861 // CombineTo was used. Since CombineTo takes care of the worklist
1862 // mechanics for us, we have no work to do in this case.
1863 if (RV.getNode() == N)
1864 continue;
1865
1866 assert(N->getOpcode() != ISD::DELETED_NODE &&
1867 RV.getOpcode() != ISD::DELETED_NODE &&
1868 "Node was deleted but visit returned new node!");
1869
1870 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1871
1872 if (N->getNumValues() == RV->getNumValues())
1873 DAG.ReplaceAllUsesWith(N, RV.getNode());
1874 else {
1875 assert(N->getValueType(0) == RV.getValueType() &&
1876 N->getNumValues() == 1 && "Type mismatch");
1877 DAG.ReplaceAllUsesWith(N, &RV);
1878 }
1879
1880 // Push the new node and any users onto the worklist. Omit this if the
1881 // new node is the EntryToken (e.g. if a store managed to get optimized
1882 // out), because re-visiting the EntryToken and its users will not uncover
1883 // any additional opportunities, but there may be a large number of such
1884 // users, potentially causing compile time explosion.
1885 if (RV.getOpcode() != ISD::EntryToken)
1886 AddToWorklistWithUsers(RV.getNode());
1887
1888 // Finally, if the node is now dead, remove it from the graph. The node
1889 // may not be dead if the replacement process recursively simplified to
1890 // something else needing this node. This will also take care of adding any
1891 // operands which have lost a user to the worklist.
1892 recursivelyDeleteUnusedNodes(N);
1893 }
1894
1895 // If the root changed (e.g. it was a dead load, update the root).
1896 DAG.setRoot(Dummy.getValue());
1897 DAG.RemoveDeadNodes();
1898}
1899
1900SDValue DAGCombiner::visit(SDNode *N) {
1901 // clang-format off
1902 switch (N->getOpcode()) {
1903 default: break;
1904 case ISD::TokenFactor: return visitTokenFactor(N);
1905 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1906 case ISD::ADD: return visitADD(N);
1907 case ISD::PTRADD: return visitPTRADD(N);
1908 case ISD::SUB: return visitSUB(N);
1909 case ISD::SADDSAT:
1910 case ISD::UADDSAT: return visitADDSAT(N);
1911 case ISD::SSUBSAT:
1912 case ISD::USUBSAT: return visitSUBSAT(N);
1913 case ISD::ADDC: return visitADDC(N);
1914 case ISD::SADDO:
1915 case ISD::UADDO: return visitADDO(N);
1916 case ISD::SUBC: return visitSUBC(N);
1917 case ISD::SSUBO:
1918 case ISD::USUBO: return visitSUBO(N);
1919 case ISD::ADDE: return visitADDE(N);
1920 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1921 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1922 case ISD::SUBE: return visitSUBE(N);
1923 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1924 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1925 case ISD::SMULFIX:
1926 case ISD::SMULFIXSAT:
1927 case ISD::UMULFIX:
1928 case ISD::UMULFIXSAT: return visitMULFIX(N);
1929 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1930 case ISD::SDIV: return visitSDIV(N);
1931 case ISD::UDIV: return visitUDIV(N);
1932 case ISD::SREM:
1933 case ISD::UREM: return visitREM(N);
1934 case ISD::MULHU: return visitMULHU(N);
1935 case ISD::MULHS: return visitMULHS(N);
1936 case ISD::AVGFLOORS:
1937 case ISD::AVGFLOORU:
1938 case ISD::AVGCEILS:
1939 case ISD::AVGCEILU: return visitAVG(N);
1940 case ISD::ABDS:
1941 case ISD::ABDU: return visitABD(N);
1942 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1943 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1944 case ISD::SMULO:
1945 case ISD::UMULO: return visitMULO(N);
1946 case ISD::SMIN:
1947 case ISD::SMAX:
1948 case ISD::UMIN:
1949 case ISD::UMAX: return visitIMINMAX(N);
1950 case ISD::AND: return visitAND(N);
1951 case ISD::OR: return visitOR(N);
1952 case ISD::XOR: return visitXOR(N);
1953 case ISD::SHL: return visitSHL(N);
1954 case ISD::SRA: return visitSRA(N);
1955 case ISD::SRL: return visitSRL(N);
1956 case ISD::ROTR:
1957 case ISD::ROTL: return visitRotate(N);
1958 case ISD::FSHL:
1959 case ISD::FSHR: return visitFunnelShift(N);
1960 case ISD::SSHLSAT:
1961 case ISD::USHLSAT: return visitSHLSAT(N);
1962 case ISD::ABS: return visitABS(N);
1963 case ISD::BSWAP: return visitBSWAP(N);
1964 case ISD::BITREVERSE: return visitBITREVERSE(N);
1965 case ISD::CTLZ: return visitCTLZ(N);
1966 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1967 case ISD::CTTZ: return visitCTTZ(N);
1968 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1969 case ISD::CTPOP: return visitCTPOP(N);
1970 case ISD::SELECT: return visitSELECT(N);
1971 case ISD::VSELECT: return visitVSELECT(N);
1972 case ISD::SELECT_CC: return visitSELECT_CC(N);
1973 case ISD::SETCC: return visitSETCC(N);
1974 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1975 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1976 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1977 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1978 case ISD::AssertSext:
1979 case ISD::AssertZext: return visitAssertExt(N);
1980 case ISD::AssertAlign: return visitAssertAlign(N);
1981 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1984 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1985 case ISD::TRUNCATE: return visitTRUNCATE(N);
1986 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1987 case ISD::BITCAST: return visitBITCAST(N);
1988 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1989 case ISD::FADD: return visitFADD(N);
1990 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1991 case ISD::FSUB: return visitFSUB(N);
1992 case ISD::FMUL: return visitFMUL(N);
1993 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1994 case ISD::FMAD: return visitFMAD(N);
1995 case ISD::FMULADD: return visitFMULADD(N);
1996 case ISD::FDIV: return visitFDIV(N);
1997 case ISD::FREM: return visitFREM(N);
1998 case ISD::FSQRT: return visitFSQRT(N);
1999 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
2000 case ISD::FPOW: return visitFPOW(N);
2001 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2002 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2003 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2004 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2005 case ISD::LROUND:
2006 case ISD::LLROUND:
2007 case ISD::LRINT:
2008 case ISD::LLRINT: return visitXROUND(N);
2009 case ISD::FP_ROUND: return visitFP_ROUND(N);
2010 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2011 case ISD::FNEG: return visitFNEG(N);
2012 case ISD::FABS: return visitFABS(N);
2013 case ISD::FFLOOR: return visitFFLOOR(N);
2014 case ISD::FMINNUM:
2015 case ISD::FMAXNUM:
2016 case ISD::FMINIMUM:
2017 case ISD::FMAXIMUM:
2018 case ISD::FMINIMUMNUM:
2019 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2020 case ISD::FCEIL: return visitFCEIL(N);
2021 case ISD::FTRUNC: return visitFTRUNC(N);
2022 case ISD::FFREXP: return visitFFREXP(N);
2023 case ISD::BRCOND: return visitBRCOND(N);
2024 case ISD::BR_CC: return visitBR_CC(N);
2025 case ISD::LOAD: return visitLOAD(N);
2026 case ISD::STORE: return visitSTORE(N);
2027 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2028 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2029 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2030 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2031 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2032 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2033 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2034 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2035 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2036 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2037 case ISD::MGATHER: return visitMGATHER(N);
2038 case ISD::MLOAD: return visitMLOAD(N);
2039 case ISD::MSCATTER: return visitMSCATTER(N);
2040 case ISD::MSTORE: return visitMSTORE(N);
2041 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2042 case ISD::PARTIAL_REDUCE_SMLA:
2043 case ISD::PARTIAL_REDUCE_UMLA:
2044 case ISD::PARTIAL_REDUCE_SUMLA:
2045 return visitPARTIAL_REDUCE_MLA(N);
2046 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2047 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2048 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2049 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2050 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2051 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2052 case ISD::FREEZE: return visitFREEZE(N);
2053 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2054 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2055 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2056 case ISD::VECREDUCE_FADD:
2057 case ISD::VECREDUCE_FMUL:
2058 case ISD::VECREDUCE_ADD:
2059 case ISD::VECREDUCE_MUL:
2060 case ISD::VECREDUCE_AND:
2061 case ISD::VECREDUCE_OR:
2062 case ISD::VECREDUCE_XOR:
2063 case ISD::VECREDUCE_SMAX:
2064 case ISD::VECREDUCE_SMIN:
2065 case ISD::VECREDUCE_UMAX:
2066 case ISD::VECREDUCE_UMIN:
2067 case ISD::VECREDUCE_FMAX:
2068 case ISD::VECREDUCE_FMIN:
2069 case ISD::VECREDUCE_FMAXIMUM:
2070 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2071#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2072#include "llvm/IR/VPIntrinsics.def"
2073 return visitVPOp(N);
2074 }
2075 // clang-format on
2076 return SDValue();
2077}
2078
2079SDValue DAGCombiner::combine(SDNode *N) {
2080 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2081 return SDValue();
2082
2083 SDValue RV;
2084 if (!DisableGenericCombines)
2085 RV = visit(N);
2086
2087 // If nothing happened, try a target-specific DAG combine.
2088 if (!RV.getNode()) {
2089 assert(N->getOpcode() != ISD::DELETED_NODE &&
2090 "Node was deleted but visit returned NULL!");
2091
2092 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2093 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2094
2095 // Expose the DAG combiner to the target combiner impls.
2096 TargetLowering::DAGCombinerInfo
2097 DagCombineInfo(DAG, Level, false, this);
2098
2099 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2100 }
2101 }
2102
2103 // If nothing happened still, try promoting the operation.
2104 if (!RV.getNode()) {
2105 switch (N->getOpcode()) {
2106 default: break;
2107 case ISD::ADD:
2108 case ISD::SUB:
2109 case ISD::MUL:
2110 case ISD::AND:
2111 case ISD::OR:
2112 case ISD::XOR:
2113 RV = PromoteIntBinOp(SDValue(N, 0));
2114 break;
2115 case ISD::SHL:
2116 case ISD::SRA:
2117 case ISD::SRL:
2118 RV = PromoteIntShiftOp(SDValue(N, 0));
2119 break;
2120 case ISD::SIGN_EXTEND:
2121 case ISD::ZERO_EXTEND:
2122 case ISD::ANY_EXTEND:
2123 RV = PromoteExtend(SDValue(N, 0));
2124 break;
2125 case ISD::LOAD:
2126 if (PromoteLoad(SDValue(N, 0)))
2127 RV = SDValue(N, 0);
2128 break;
2129 }
2130 }
2131
2132 // If N is a commutative binary node, try to eliminate it if the commuted
2133 // version is already present in the DAG.
2134 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2135 SDValue N0 = N->getOperand(0);
2136 SDValue N1 = N->getOperand(1);
2137
2138 // Constant operands are canonicalized to RHS.
2139 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2140 SDValue Ops[] = {N1, N0};
2141 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2142 N->getFlags());
2143 if (CSENode)
2144 return SDValue(CSENode, 0);
2145 }
2146 }
2147
2148 return RV;
2149}
2150
2151/// Given a node, return its input chain if it has one, otherwise return a null
2152/// sd operand.
2154 if (unsigned NumOps = N->getNumOperands()) {
2155 if (N->getOperand(0).getValueType() == MVT::Other)
2156 return N->getOperand(0);
2157 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2158 return N->getOperand(NumOps-1);
2159 for (unsigned i = 1; i < NumOps-1; ++i)
2160 if (N->getOperand(i).getValueType() == MVT::Other)
2161 return N->getOperand(i);
2162 }
2163 return SDValue();
2164}
2165
2166SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2167 SDValue Operand = N->getOperand(0);
2168 EVT VT = Operand.getValueType();
2169 SDLoc dl(N);
2170
2171 // Canonicalize undef to quiet NaN.
2172 if (Operand.isUndef()) {
2173 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2174 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2175 }
2176 return SDValue();
2177}
2178
2179SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2180 // If N has two operands, where one has an input chain equal to the other,
2181 // the 'other' chain is redundant.
2182 if (N->getNumOperands() == 2) {
2183 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2184 return N->getOperand(0);
2185 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2186 return N->getOperand(1);
2187 }
2188
2189 // Don't simplify token factors if optnone.
2190 if (OptLevel == CodeGenOptLevel::None)
2191 return SDValue();
2192
2193 // Don't simplify the token factor if the node itself has too many operands.
2194 if (N->getNumOperands() > TokenFactorInlineLimit)
2195 return SDValue();
2196
2197 // If the sole user is a token factor, we should make sure we have a
2198 // chance to merge them together. This prevents TF chains from inhibiting
2199 // optimizations.
2200 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2201 AddToWorklist(*(N->user_begin()));
2202
2203 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2204 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2205 SmallPtrSet<SDNode*, 16> SeenOps;
2206 bool Changed = false; // If we should replace this token factor.
2207
2208 // Start out with this token factor.
2209 TFs.push_back(N);
2210
2211 // Iterate through token factors. The TFs grows when new token factors are
2212 // encountered.
2213 for (unsigned i = 0; i < TFs.size(); ++i) {
2214 // Limit number of nodes to inline, to avoid quadratic compile times.
2215 // We have to add the outstanding Token Factors to Ops, otherwise we might
2216 // drop Ops from the resulting Token Factors.
2217 if (Ops.size() > TokenFactorInlineLimit) {
2218 for (unsigned j = i; j < TFs.size(); j++)
2219 Ops.emplace_back(TFs[j], 0);
2220 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2221 // combiner worklist later.
2222 TFs.resize(i);
2223 break;
2224 }
2225
2226 SDNode *TF = TFs[i];
2227 // Check each of the operands.
2228 for (const SDValue &Op : TF->op_values()) {
2229 switch (Op.getOpcode()) {
2230 case ISD::EntryToken:
2231 // Entry tokens don't need to be added to the list. They are
2232 // redundant.
2233 Changed = true;
2234 break;
2235
2236 case ISD::TokenFactor:
2237 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2238 // Queue up for processing.
2239 TFs.push_back(Op.getNode());
2240 Changed = true;
2241 break;
2242 }
2243 [[fallthrough]];
2244
2245 default:
2246 // Only add if it isn't already in the list.
2247 if (SeenOps.insert(Op.getNode()).second)
2248 Ops.push_back(Op);
2249 else
2250 Changed = true;
2251 break;
2252 }
2253 }
2254 }
2255
2256 // Re-visit inlined Token Factors, to clean them up in case they have been
2257 // removed. Skip the first Token Factor, as this is the current node.
2258 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2259 AddToWorklist(TFs[i]);
2260
2261 // Remove Nodes that are chained to another node in the list. Do so
2262 // by walking up chains breath-first stopping when we've seen
2263 // another operand. In general we must climb to the EntryNode, but we can exit
2264 // early if we find all remaining work is associated with just one operand as
2265 // no further pruning is possible.
2266
2267 // List of nodes to search through and original Ops from which they originate.
2269 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2270 SmallPtrSet<SDNode *, 16> SeenChains;
2271 bool DidPruneOps = false;
2272
2273 unsigned NumLeftToConsider = 0;
2274 for (const SDValue &Op : Ops) {
2275 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2276 OpWorkCount.push_back(1);
2277 }
2278
2279 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2280 // If this is an Op, we can remove the op from the list. Remark any
2281 // search associated with it as from the current OpNumber.
2282 if (SeenOps.contains(Op)) {
2283 Changed = true;
2284 DidPruneOps = true;
2285 unsigned OrigOpNumber = 0;
2286 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2287 OrigOpNumber++;
2288 assert((OrigOpNumber != Ops.size()) &&
2289 "expected to find TokenFactor Operand");
2290 // Re-mark worklist from OrigOpNumber to OpNumber
2291 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2292 if (Worklist[i].second == OrigOpNumber) {
2293 Worklist[i].second = OpNumber;
2294 }
2295 }
2296 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2297 OpWorkCount[OrigOpNumber] = 0;
2298 NumLeftToConsider--;
2299 }
2300 // Add if it's a new chain
2301 if (SeenChains.insert(Op).second) {
2302 OpWorkCount[OpNumber]++;
2303 Worklist.push_back(std::make_pair(Op, OpNumber));
2304 }
2305 };
2306
2307 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2308 // We need at least be consider at least 2 Ops to prune.
2309 if (NumLeftToConsider <= 1)
2310 break;
2311 auto CurNode = Worklist[i].first;
2312 auto CurOpNumber = Worklist[i].second;
2313 assert((OpWorkCount[CurOpNumber] > 0) &&
2314 "Node should not appear in worklist");
2315 switch (CurNode->getOpcode()) {
2316 case ISD::EntryToken:
2317 // Hitting EntryToken is the only way for the search to terminate without
2318 // hitting
2319 // another operand's search. Prevent us from marking this operand
2320 // considered.
2321 NumLeftToConsider++;
2322 break;
2323 case ISD::TokenFactor:
2324 for (const SDValue &Op : CurNode->op_values())
2325 AddToWorklist(i, Op.getNode(), CurOpNumber);
2326 break;
2327 case ISD::LIFETIME_START:
2328 case ISD::LIFETIME_END:
2329 case ISD::CopyFromReg:
2330 case ISD::CopyToReg:
2331 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2332 break;
2333 default:
2334 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2335 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2336 break;
2337 }
2338 OpWorkCount[CurOpNumber]--;
2339 if (OpWorkCount[CurOpNumber] == 0)
2340 NumLeftToConsider--;
2341 }
2342
2343 // If we've changed things around then replace token factor.
2344 if (Changed) {
2346 if (Ops.empty()) {
2347 // The entry token is the only possible outcome.
2348 Result = DAG.getEntryNode();
2349 } else {
2350 if (DidPruneOps) {
2351 SmallVector<SDValue, 8> PrunedOps;
2352 //
2353 for (const SDValue &Op : Ops) {
2354 if (SeenChains.count(Op.getNode()) == 0)
2355 PrunedOps.push_back(Op);
2356 }
2357 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2358 } else {
2359 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2360 }
2361 }
2362 return Result;
2363 }
2364 return SDValue();
2365}
2366
2367/// MERGE_VALUES can always be eliminated.
2368SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2369 WorklistRemover DeadNodes(*this);
2370 // Replacing results may cause a different MERGE_VALUES to suddenly
2371 // be CSE'd with N, and carry its uses with it. Iterate until no
2372 // uses remain, to ensure that the node can be safely deleted.
2373 // First add the users of this node to the work list so that they
2374 // can be tried again once they have new operands.
2375 AddUsersToWorklist(N);
2376 do {
2377 // Do as a single replacement to avoid rewalking use lists.
2379 DAG.ReplaceAllUsesWith(N, Ops.data());
2380 } while (!N->use_empty());
2381 deleteAndRecombine(N);
2382 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2383}
2384
2385/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2386/// ConstantSDNode pointer else nullptr.
2389 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2390}
2391
2392// isTruncateOf - If N is a truncate of some other value, return true, record
2393// the value being truncated in Op and which of Op's bits are zero/one in Known.
2394// This function computes KnownBits to avoid a duplicated call to
2395// computeKnownBits in the caller.
2397 KnownBits &Known) {
2398 if (N->getOpcode() == ISD::TRUNCATE) {
2399 Op = N->getOperand(0);
2400 Known = DAG.computeKnownBits(Op);
2401 if (N->getFlags().hasNoUnsignedWrap())
2402 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2403 return true;
2404 }
2405
2406 if (N.getValueType().getScalarType() != MVT::i1 ||
2407 !sd_match(
2409 return false;
2410
2411 Known = DAG.computeKnownBits(Op);
2412 return (Known.Zero | 1).isAllOnes();
2413}
2414
2415/// Return true if 'Use' is a load or a store that uses N as its base pointer
2416/// and that N may be folded in the load / store addressing mode.
2418 const TargetLowering &TLI) {
2419 EVT VT;
2420 unsigned AS;
2421
2422 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2423 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2424 return false;
2425 VT = LD->getMemoryVT();
2426 AS = LD->getAddressSpace();
2427 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2428 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2429 return false;
2430 VT = ST->getMemoryVT();
2431 AS = ST->getAddressSpace();
2433 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2434 return false;
2435 VT = LD->getMemoryVT();
2436 AS = LD->getAddressSpace();
2438 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2439 return false;
2440 VT = ST->getMemoryVT();
2441 AS = ST->getAddressSpace();
2442 } else {
2443 return false;
2444 }
2445
2447 if (N->isAnyAdd()) {
2448 AM.HasBaseReg = true;
2450 if (Offset)
2451 // [reg +/- imm]
2452 AM.BaseOffs = Offset->getSExtValue();
2453 else
2454 // [reg +/- reg]
2455 AM.Scale = 1;
2456 } else if (N->getOpcode() == ISD::SUB) {
2457 AM.HasBaseReg = true;
2459 if (Offset)
2460 // [reg +/- imm]
2461 AM.BaseOffs = -Offset->getSExtValue();
2462 else
2463 // [reg +/- reg]
2464 AM.Scale = 1;
2465 } else {
2466 return false;
2467 }
2468
2469 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2470 VT.getTypeForEVT(*DAG.getContext()), AS);
2471}
2472
2473/// This inverts a canonicalization in IR that replaces a variable select arm
2474/// with an identity constant. Codegen improves if we re-use the variable
2475/// operand rather than load a constant. This can also be converted into a
2476/// masked vector operation if the target supports it.
2478 bool ShouldCommuteOperands) {
2479 SDValue N0 = N->getOperand(0);
2480 SDValue N1 = N->getOperand(1);
2481
2482 // Match a select as operand 1. The identity constant that we are looking for
2483 // is only valid as operand 1 of a non-commutative binop.
2484 if (ShouldCommuteOperands)
2485 std::swap(N0, N1);
2486
2487 SDValue Cond, TVal, FVal;
2489 m_Value(FVal)))))
2490 return SDValue();
2491
2492 // We can't hoist all instructions because of immediate UB (not speculatable).
2493 // For example div/rem by zero.
2495 return SDValue();
2496
2497 unsigned SelOpcode = N1.getOpcode();
2498 unsigned Opcode = N->getOpcode();
2499 EVT VT = N->getValueType(0);
2500 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2501
2502 // This transform increases uses of N0, so freeze it to be safe.
2503 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2504 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2505 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2506 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2507 FVal)) {
2508 SDValue F0 = DAG.getFreeze(N0);
2509 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2510 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2511 }
2512 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2513 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2514 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2515 TVal)) {
2516 SDValue F0 = DAG.getFreeze(N0);
2517 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2518 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2519 }
2520
2521 return SDValue();
2522}
2523
2524SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2525 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2526 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2527 "Unexpected binary operator");
2528
2529 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2530 return Sel;
2531
2532 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2533 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2534 return Sel;
2535
2536 // Don't do this unless the old select is going away. We want to eliminate the
2537 // binary operator, not replace a binop with a select.
2538 // TODO: Handle ISD::SELECT_CC.
2539 unsigned SelOpNo = 0;
2540 SDValue Sel = BO->getOperand(0);
2541 auto BinOpcode = BO->getOpcode();
2542 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2543 SelOpNo = 1;
2544 Sel = BO->getOperand(1);
2545
2546 // Peek through trunc to shift amount type.
2547 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2548 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2549 // This is valid when the truncated bits of x are already zero.
2550 SDValue Op;
2551 KnownBits Known;
2552 if (isTruncateOf(DAG, Sel, Op, Known) &&
2554 Sel = Op;
2555 }
2556 }
2557
2558 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2559 return SDValue();
2560
2561 SDValue CT = Sel.getOperand(1);
2562 if (!isConstantOrConstantVector(CT, true) &&
2564 return SDValue();
2565
2566 SDValue CF = Sel.getOperand(2);
2567 if (!isConstantOrConstantVector(CF, true) &&
2569 return SDValue();
2570
2571 // Bail out if any constants are opaque because we can't constant fold those.
2572 // The exception is "and" and "or" with either 0 or -1 in which case we can
2573 // propagate non constant operands into select. I.e.:
2574 // and (select Cond, 0, -1), X --> select Cond, 0, X
2575 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2576 bool CanFoldNonConst =
2577 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2580
2581 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2582 if (!CanFoldNonConst &&
2583 !isConstantOrConstantVector(CBO, true) &&
2585 return SDValue();
2586
2587 SDLoc DL(Sel);
2588 SDValue NewCT, NewCF;
2589 EVT VT = BO->getValueType(0);
2590
2591 if (CanFoldNonConst) {
2592 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2593 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2594 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2595 NewCT = CT;
2596 else
2597 NewCT = CBO;
2598
2599 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2600 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2601 NewCF = CF;
2602 else
2603 NewCF = CBO;
2604 } else {
2605 // We have a select-of-constants followed by a binary operator with a
2606 // constant. Eliminate the binop by pulling the constant math into the
2607 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2608 // CBO, CF + CBO
2609 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2610 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2611 if (!NewCT)
2612 return SDValue();
2613
2614 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2615 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2616 if (!NewCF)
2617 return SDValue();
2618 }
2619
2620 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2621}
2622
2624 SelectionDAG &DAG) {
2625 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2626 "Expecting add or sub");
2627
2628 // Match a constant operand and a zext operand for the math instruction:
2629 // add Z, C
2630 // sub C, Z
2631 bool IsAdd = N->getOpcode() == ISD::ADD;
2632 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2633 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2634 auto *CN = dyn_cast<ConstantSDNode>(C);
2635 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2636 return SDValue();
2637
2638 // Match the zext operand as a setcc of a boolean.
2639 if (Z.getOperand(0).getValueType() != MVT::i1)
2640 return SDValue();
2641
2642 // Match the compare as: setcc (X & 1), 0, eq.
2643 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2645 return SDValue();
2646
2647 // We are adding/subtracting a constant and an inverted low bit. Turn that
2648 // into a subtract/add of the low bit with incremented/decremented constant:
2649 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2650 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2651 EVT VT = C.getValueType();
2652 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2653 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2654 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2655 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2656}
2657
2658// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2659SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2660 SDValue N0 = N->getOperand(0);
2661 EVT VT = N0.getValueType();
2662 SDValue A, B;
2663
2664 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2666 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2667 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2668 }
2669 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2671 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2672 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2673 }
2674 return SDValue();
2675}
2676
2677/// Try to fold a pointer arithmetic node.
2678/// This needs to be done separately from normal addition, because pointer
2679/// addition is not commutative.
2680SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2681 SDValue N0 = N->getOperand(0);
2682 SDValue N1 = N->getOperand(1);
2683 EVT PtrVT = N0.getValueType();
2684 EVT IntVT = N1.getValueType();
2685 SDLoc DL(N);
2686
2687 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2688 // combines here depend on this assumption.
2689 assert(PtrVT == IntVT &&
2690 "PTRADD with different operand types is not supported");
2691
2692 // fold (ptradd x, 0) -> x
2693 if (isNullConstant(N1))
2694 return N0;
2695
2696 // fold (ptradd 0, x) -> x
2697 if (PtrVT == IntVT && isNullConstant(N0))
2698 return N1;
2699
2700 if (N0.getOpcode() == ISD::PTRADD &&
2701 !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2702 SDValue X = N0.getOperand(0);
2703 SDValue Y = N0.getOperand(1);
2704 SDValue Z = N1;
2705 bool N0OneUse = N0.hasOneUse();
2706 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2707 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2708
2709 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2710 // * y is a constant and (ptradd x, y) has one use; or
2711 // * y and z are both constants.
2712 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2713 // If both additions in the original were NUW, the new ones are as well.
2714 SDNodeFlags Flags =
2715 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2716 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2717 AddToWorklist(Add.getNode());
2718 // We can't set InBounds even if both original ptradds were InBounds and
2719 // NUW: SDAG usually represents pointers as integers, therefore, the
2720 // matched pattern behaves as if it had implicit casts:
2721 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
2722 // The outer inbounds ptradd might therefore rely on a provenance that x
2723 // does not have.
2724 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2725 }
2726 }
2727
2728 // The following combines can turn in-bounds pointer arithmetic out of bounds.
2729 // That is problematic for settings like AArch64's CPA, which checks that
2730 // intermediate results of pointer arithmetic remain in bounds. The target
2731 // therefore needs to opt-in to enable them.
2733 DAG.getMachineFunction().getFunction(), PtrVT))
2734 return SDValue();
2735
2736 if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2737 // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2738 // global address GA and constant c, such that c can be folded into GA.
2739 // TODO: Support constant vector splats.
2740 SDValue GAValue = N0.getOperand(0);
2741 if (const GlobalAddressSDNode *GA =
2743 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2744 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2745 // If both additions in the original were NUW, reassociation preserves
2746 // that.
2747 SDNodeFlags Flags =
2748 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2749 // We can't set InBounds even if both original ptradds were InBounds and
2750 // NUW: SDAG usually represents pointers as integers, therefore, the
2751 // matched pattern behaves as if it had implicit casts:
2752 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
2753 // The outer inbounds ptradd might therefore rely on a provenance that
2754 // GA does not have.
2755 SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2756 AddToWorklist(Inner.getNode());
2757 return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2758 }
2759 }
2760 }
2761
2762 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2763 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2764 // y is not, and (add y, z) is used only once.
2765 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2766 // z is not, and (add y, z) is used only once.
2767 // The goal is to move constant offsets to the outermost ptradd, to create
2768 // more opportunities to fold offsets into memory instructions.
2769 // Together with the another combine above, this also implements
2770 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2771 SDValue X = N0;
2772 SDValue Y = N1.getOperand(0);
2773 SDValue Z = N1.getOperand(1);
2774 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2775 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2776
2777 // If both additions in the original were NUW, reassociation preserves that.
2778 SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags();
2779 SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
2780 if (CommonFlags.hasNoUnsignedWrap()) {
2781 // If both operations are NUW and the PTRADD is inbounds, the offests are
2782 // both non-negative, so the reassociated PTRADDs are also inbounds.
2783 ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds;
2784 }
2785
2786 if (ZIsConstant != YIsConstant) {
2787 if (YIsConstant)
2788 std::swap(Y, Z);
2789 SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2790 AddToWorklist(Inner.getNode());
2791 return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2792 }
2793 }
2794
2795 // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2796 // that transformation can't block an offset folding at any use of the ptradd.
2797 // This should be done late, after legalization, so that it doesn't block
2798 // other ptradd combines that could enable more offset folding.
2799 if (LegalOperations && DAG.haveNoCommonBitsSet(N0, N1)) {
2800 bool TransformCannotBreakAddrMode = none_of(N->users(), [&](SDNode *User) {
2801 return canFoldInAddressingMode(N, User, DAG, TLI);
2802 });
2803
2804 if (TransformCannotBreakAddrMode)
2805 return DAG.getNode(ISD::OR, DL, PtrVT, N0, N1, SDNodeFlags::Disjoint);
2806 }
2807
2808 return SDValue();
2809}
2810
2811/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2812/// a shift and add with a different constant.
2814 SelectionDAG &DAG) {
2815 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2816 "Expecting add or sub");
2817
2818 // We need a constant operand for the add/sub, and the other operand is a
2819 // logical shift right: add (srl), C or sub C, (srl).
2820 bool IsAdd = N->getOpcode() == ISD::ADD;
2821 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2822 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2823 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2824 ShiftOp.getOpcode() != ISD::SRL)
2825 return SDValue();
2826
2827 // The shift must be of a 'not' value.
2828 SDValue Not = ShiftOp.getOperand(0);
2829 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2830 return SDValue();
2831
2832 // The shift must be moving the sign bit to the least-significant-bit.
2833 EVT VT = ShiftOp.getValueType();
2834 SDValue ShAmt = ShiftOp.getOperand(1);
2835 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2836 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2837 return SDValue();
2838
2839 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2840 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2841 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2842 if (SDValue NewC = DAG.FoldConstantArithmetic(
2843 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2844 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2845 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2846 Not.getOperand(0), ShAmt);
2847 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2848 }
2849
2850 return SDValue();
2851}
2852
2853static bool
2855 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2856 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2857}
2858
2859/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2860/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2861/// are no common bits set in the operands).
2862SDValue DAGCombiner::visitADDLike(SDNode *N) {
2863 SDValue N0 = N->getOperand(0);
2864 SDValue N1 = N->getOperand(1);
2865 EVT VT = N0.getValueType();
2866 SDLoc DL(N);
2867
2868 // fold (add x, undef) -> undef
2869 if (N0.isUndef())
2870 return N0;
2871 if (N1.isUndef())
2872 return N1;
2873
2874 // fold (add c1, c2) -> c1+c2
2875 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2876 return C;
2877
2878 // canonicalize constant to RHS
2881 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2882
2883 if (areBitwiseNotOfEachother(N0, N1))
2884 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2885
2886 // fold vector ops
2887 if (VT.isVector()) {
2888 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2889 return FoldedVOp;
2890
2891 // fold (add x, 0) -> x, vector edition
2893 return N0;
2894 }
2895
2896 // fold (add x, 0) -> x
2897 if (isNullConstant(N1))
2898 return N0;
2899
2900 if (N0.getOpcode() == ISD::SUB) {
2901 SDValue N00 = N0.getOperand(0);
2902 SDValue N01 = N0.getOperand(1);
2903
2904 // fold ((A-c1)+c2) -> (A+(c2-c1))
2905 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2906 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2907
2908 // fold ((c1-A)+c2) -> (c1+c2)-A
2909 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2910 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2911 }
2912
2913 // add (sext i1 X), 1 -> zext (not i1 X)
2914 // We don't transform this pattern:
2915 // add (zext i1 X), -1 -> sext (not i1 X)
2916 // because most (?) targets generate better code for the zext form.
2917 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2918 isOneOrOneSplat(N1)) {
2919 SDValue X = N0.getOperand(0);
2920 if ((!LegalOperations ||
2921 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2923 X.getScalarValueSizeInBits() == 1) {
2924 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2925 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2926 }
2927 }
2928
2929 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2930 // iff (or x, c0) is equivalent to (add x, c0).
2931 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2932 // iff (xor x, c0) is equivalent to (add x, c0).
2933 if (DAG.isADDLike(N0)) {
2934 SDValue N01 = N0.getOperand(1);
2935 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2936 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2937 }
2938
2939 if (SDValue NewSel = foldBinOpIntoSelect(N))
2940 return NewSel;
2941
2942 // reassociate add
2943 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2944 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2945 return RADD;
2946
2947 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2948 // equivalent to (add x, c).
2949 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2950 // equivalent to (add x, c).
2951 // Do this optimization only when adding c does not introduce instructions
2952 // for adding carries.
2953 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2954 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2955 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2956 // If N0's type does not split or is a sign mask, it does not introduce
2957 // add carry.
2958 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2959 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2962 if (NoAddCarry)
2963 return DAG.getNode(
2964 ISD::ADD, DL, VT,
2965 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2966 N0.getOperand(1));
2967 }
2968 return SDValue();
2969 };
2970 if (SDValue Add = ReassociateAddOr(N0, N1))
2971 return Add;
2972 if (SDValue Add = ReassociateAddOr(N1, N0))
2973 return Add;
2974
2975 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2976 if (SDValue SD =
2977 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2978 return SD;
2979 }
2980
2981 SDValue A, B, C, D;
2982
2983 // fold ((0-A) + B) -> B-A
2984 if (sd_match(N0, m_Neg(m_Value(A))))
2985 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2986
2987 // fold (A + (0-B)) -> A-B
2988 if (sd_match(N1, m_Neg(m_Value(B))))
2989 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2990
2991 // fold (A+(B-A)) -> B
2992 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2993 return B;
2994
2995 // fold ((B-A)+A) -> B
2996 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2997 return B;
2998
2999 // fold ((A-B)+(C-A)) -> (C-B)
3000 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3002 return DAG.getNode(ISD::SUB, DL, VT, C, B);
3003
3004 // fold ((A-B)+(B-C)) -> (A-C)
3005 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3007 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3008
3009 // fold (A+(B-(A+C))) to (B-C)
3010 // fold (A+(B-(C+A))) to (B-C)
3011 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
3012 return DAG.getNode(ISD::SUB, DL, VT, B, C);
3013
3014 // fold (A+((B-A)+or-C)) to (B+or-C)
3015 if (sd_match(N1,
3017 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
3018 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
3019
3020 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3021 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
3022 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
3024 return DAG.getNode(ISD::SUB, DL, VT,
3025 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
3026 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
3027
3028 // fold (add (umax X, C), -C) --> (usubsat X, C)
3029 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
3030 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
3031 return (!Max && !Op) ||
3032 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3033 };
3034 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
3035 /*AllowUndefs*/ true))
3036 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
3037 N0.getOperand(1));
3038 }
3039
3041 return SDValue(N, 0);
3042
3043 if (isOneOrOneSplat(N1)) {
3044 // fold (add (xor a, -1), 1) -> (sub 0, a)
3045 if (isBitwiseNot(N0))
3046 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
3047 N0.getOperand(0));
3048
3049 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3050 if (N0.getOpcode() == ISD::ADD) {
3051 SDValue A, Xor;
3052
3053 if (isBitwiseNot(N0.getOperand(0))) {
3054 A = N0.getOperand(1);
3055 Xor = N0.getOperand(0);
3056 } else if (isBitwiseNot(N0.getOperand(1))) {
3057 A = N0.getOperand(0);
3058 Xor = N0.getOperand(1);
3059 }
3060
3061 if (Xor)
3062 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3063 }
3064
3065 // Look for:
3066 // add (add x, y), 1
3067 // And if the target does not like this form then turn into:
3068 // sub y, (xor x, -1)
3069 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3070 N0.hasOneUse() &&
3071 // Limit this to after legalization if the add has wrap flags
3072 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3073 !N->getFlags().hasNoSignedWrap()))) {
3074 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3075 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3076 }
3077 }
3078
3079 // (x - y) + -1 -> add (xor y, -1), x
3080 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3081 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3082 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3083 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3084 }
3085
3086 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3087 // This can help if the inner add has multiple uses.
3088 APInt CM, CA;
3089 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3090 if (VT.getScalarSizeInBits() <= 64) {
3092 m_ConstInt(CM)))) &&
3094 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3095 SDNodeFlags Flags;
3096 // If all the inputs are nuw, the outputs can be nuw. If all the input
3097 // are _also_ nsw the outputs can be too.
3098 if (N->getFlags().hasNoUnsignedWrap() &&
3099 N0->getFlags().hasNoUnsignedWrap() &&
3102 if (N->getFlags().hasNoSignedWrap() &&
3103 N0->getFlags().hasNoSignedWrap() &&
3106 }
3107 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3108 DAG.getConstant(CM, DL, VT), Flags);
3109 return DAG.getNode(
3110 ISD::ADD, DL, VT, Mul,
3111 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3112 }
3113 // Also look in case there is an intermediate add.
3114 if (sd_match(N0, m_OneUse(m_Add(
3116 m_ConstInt(CM))),
3117 m_Value(B)))) &&
3119 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3120 SDNodeFlags Flags;
3121 // If all the inputs are nuw, the outputs can be nuw. If all the input
3122 // are _also_ nsw the outputs can be too.
3123 SDValue OMul =
3124 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3125 if (N->getFlags().hasNoUnsignedWrap() &&
3126 N0->getFlags().hasNoUnsignedWrap() &&
3127 OMul->getFlags().hasNoUnsignedWrap() &&
3128 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3130 if (N->getFlags().hasNoSignedWrap() &&
3131 N0->getFlags().hasNoSignedWrap() &&
3132 OMul->getFlags().hasNoSignedWrap() &&
3133 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3135 }
3136 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3137 DAG.getConstant(CM, DL, VT), Flags);
3138 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3139 return DAG.getNode(
3140 ISD::ADD, DL, VT, Add,
3141 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3142 }
3143 }
3144 }
3145
3146 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3147 return Combined;
3148
3149 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3150 return Combined;
3151
3152 return SDValue();
3153}
3154
3155// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3156SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3157 SDValue N0 = N->getOperand(0);
3158 EVT VT = N0.getValueType();
3159 SDValue A, B;
3160
3161 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3163 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3164 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3165 }
3166 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3168 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
3169 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3170 }
3171
3172 return SDValue();
3173}
3174
3175SDValue DAGCombiner::visitADD(SDNode *N) {
3176 SDValue N0 = N->getOperand(0);
3177 SDValue N1 = N->getOperand(1);
3178 EVT VT = N0.getValueType();
3179 SDLoc DL(N);
3180
3181 if (SDValue Combined = visitADDLike(N))
3182 return Combined;
3183
3184 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3185 return V;
3186
3187 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3188 return V;
3189
3190 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3191 return V;
3192
3193 // Try to match AVGFLOOR fixedwidth pattern
3194 if (SDValue V = foldAddToAvg(N, DL))
3195 return V;
3196
3197 // fold (a+b) -> (a|b) iff a and b share no bits.
3198 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3199 DAG.haveNoCommonBitsSet(N0, N1))
3200 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3201
3202 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3203 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3204 const APInt &C0 = N0->getConstantOperandAPInt(0);
3205 const APInt &C1 = N1->getConstantOperandAPInt(0);
3206 return DAG.getVScale(DL, VT, C0 + C1);
3207 }
3208
3209 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3210 if (N0.getOpcode() == ISD::ADD &&
3211 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3212 N1.getOpcode() == ISD::VSCALE) {
3213 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3214 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3215 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3216 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3217 }
3218
3219 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3220 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3221 N1.getOpcode() == ISD::STEP_VECTOR) {
3222 const APInt &C0 = N0->getConstantOperandAPInt(0);
3223 const APInt &C1 = N1->getConstantOperandAPInt(0);
3224 APInt NewStep = C0 + C1;
3225 return DAG.getStepVector(DL, VT, NewStep);
3226 }
3227
3228 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3229 if (N0.getOpcode() == ISD::ADD &&
3231 N1.getOpcode() == ISD::STEP_VECTOR) {
3232 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3233 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3234 APInt NewStep = SV0 + SV1;
3235 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3236 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3237 }
3238
3239 return SDValue();
3240}
3241
3242SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3243 unsigned Opcode = N->getOpcode();
3244 SDValue N0 = N->getOperand(0);
3245 SDValue N1 = N->getOperand(1);
3246 EVT VT = N0.getValueType();
3247 bool IsSigned = Opcode == ISD::SADDSAT;
3248 SDLoc DL(N);
3249
3250 // fold (add_sat x, undef) -> -1
3251 if (N0.isUndef() || N1.isUndef())
3252 return DAG.getAllOnesConstant(DL, VT);
3253
3254 // fold (add_sat c1, c2) -> c3
3255 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3256 return C;
3257
3258 // canonicalize constant to RHS
3261 return DAG.getNode(Opcode, DL, VT, N1, N0);
3262
3263 // fold vector ops
3264 if (VT.isVector()) {
3265 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3266 return FoldedVOp;
3267
3268 // fold (add_sat x, 0) -> x, vector edition
3270 return N0;
3271 }
3272
3273 // fold (add_sat x, 0) -> x
3274 if (isNullConstant(N1))
3275 return N0;
3276
3277 // If it cannot overflow, transform into an add.
3278 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3279 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3280
3281 return SDValue();
3282}
3283
3285 bool ForceCarryReconstruction = false) {
3286 bool Masked = false;
3287
3288 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3289 while (true) {
3290 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3291 V = V.getOperand(0);
3292 continue;
3293 }
3294
3295 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3296 if (ForceCarryReconstruction)
3297 return V;
3298
3299 Masked = true;
3300 V = V.getOperand(0);
3301 continue;
3302 }
3303
3304 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3305 return V;
3306
3307 break;
3308 }
3309
3310 // If this is not a carry, return.
3311 if (V.getResNo() != 1)
3312 return SDValue();
3313
3314 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3315 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3316 return SDValue();
3317
3318 EVT VT = V->getValueType(0);
3319 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3320 return SDValue();
3321
3322 // If the result is masked, then no matter what kind of bool it is we can
3323 // return. If it isn't, then we need to make sure the bool type is either 0 or
3324 // 1 and not other values.
3325 if (Masked ||
3326 TLI.getBooleanContents(V.getValueType()) ==
3328 return V;
3329
3330 return SDValue();
3331}
3332
3333/// Given the operands of an add/sub operation, see if the 2nd operand is a
3334/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3335/// the opcode and bypass the mask operation.
3336static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3337 SelectionDAG &DAG, const SDLoc &DL) {
3338 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3339 N1 = N1.getOperand(0);
3340
3341 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3342 return SDValue();
3343
3344 EVT VT = N0.getValueType();
3345 SDValue N10 = N1.getOperand(0);
3346 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3347 N10 = N10.getOperand(0);
3348
3349 if (N10.getValueType() != VT)
3350 return SDValue();
3351
3352 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3353 return SDValue();
3354
3355 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3356 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3357 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3358}
3359
3360/// Helper for doing combines based on N0 and N1 being added to each other.
3361SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3362 SDNode *LocReference) {
3363 EVT VT = N0.getValueType();
3364 SDLoc DL(LocReference);
3365
3366 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3367 SDValue Y, N;
3368 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3369 return DAG.getNode(ISD::SUB, DL, VT, N0,
3370 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3371
3372 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3373 return V;
3374
3375 // Look for:
3376 // add (add x, 1), y
3377 // And if the target does not like this form then turn into:
3378 // sub y, (xor x, -1)
3379 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3380 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3381 // Limit this to after legalization if the add has wrap flags
3382 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3383 !N0->getFlags().hasNoSignedWrap()))) {
3384 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3385 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3386 }
3387
3388 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3389 // Hoist one-use subtraction by non-opaque constant:
3390 // (x - C) + y -> (x + y) - C
3391 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3392 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3393 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3394 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3395 }
3396 // Hoist one-use subtraction from non-opaque constant:
3397 // (C - x) + y -> (y - x) + C
3398 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3399 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3400 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3401 }
3402 }
3403
3404 // add (mul x, C), x -> mul x, C+1
3405 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3406 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3407 N0.hasOneUse()) {
3408 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3409 DAG.getConstant(1, DL, VT));
3410 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3411 }
3412
3413 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3414 // rather than 'add 0/-1' (the zext should get folded).
3415 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3416 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3417 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3419 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3420 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3421 }
3422
3423 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3424 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3425 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3426 if (TN->getVT() == MVT::i1) {
3427 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3428 DAG.getConstant(1, DL, VT));
3429 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3430 }
3431 }
3432
3433 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3434 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3435 N1.getResNo() == 0)
3436 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3437 N0, N1.getOperand(0), N1.getOperand(2));
3438
3439 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3441 if (SDValue Carry = getAsCarry(TLI, N1))
3442 return DAG.getNode(ISD::UADDO_CARRY, DL,
3443 DAG.getVTList(VT, Carry.getValueType()), N0,
3444 DAG.getConstant(0, DL, VT), Carry);
3445
3446 return SDValue();
3447}
3448
3449SDValue DAGCombiner::visitADDC(SDNode *N) {
3450 SDValue N0 = N->getOperand(0);
3451 SDValue N1 = N->getOperand(1);
3452 EVT VT = N0.getValueType();
3453 SDLoc DL(N);
3454
3455 // If the flag result is dead, turn this into an ADD.
3456 if (!N->hasAnyUseOfValue(1))
3457 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3458 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3459
3460 // canonicalize constant to RHS.
3461 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3462 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3463 if (N0C && !N1C)
3464 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3465
3466 // fold (addc x, 0) -> x + no carry out
3467 if (isNullConstant(N1))
3468 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3469 DL, MVT::Glue));
3470
3471 // If it cannot overflow, transform into an add.
3473 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3474 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3475
3476 return SDValue();
3477}
3478
3479/**
3480 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3481 * then the flip also occurs if computing the inverse is the same cost.
3482 * This function returns an empty SDValue in case it cannot flip the boolean
3483 * without increasing the cost of the computation. If you want to flip a boolean
3484 * no matter what, use DAG.getLogicalNOT.
3485 */
3487 const TargetLowering &TLI,
3488 bool Force) {
3489 if (Force && isa<ConstantSDNode>(V))
3490 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3491
3492 if (V.getOpcode() != ISD::XOR)
3493 return SDValue();
3494
3495 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3496 return V.getOperand(0);
3497 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3498 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3499 return SDValue();
3500}
3501
3502SDValue DAGCombiner::visitADDO(SDNode *N) {
3503 SDValue N0 = N->getOperand(0);
3504 SDValue N1 = N->getOperand(1);
3505 EVT VT = N0.getValueType();
3506 bool IsSigned = (ISD::SADDO == N->getOpcode());
3507
3508 EVT CarryVT = N->getValueType(1);
3509 SDLoc DL(N);
3510
3511 // If the flag result is dead, turn this into an ADD.
3512 if (!N->hasAnyUseOfValue(1))
3513 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3514 DAG.getUNDEF(CarryVT));
3515
3516 // canonicalize constant to RHS.
3519 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3520
3521 // fold (addo x, 0) -> x + no carry out
3522 if (isNullOrNullSplat(N1))
3523 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3524
3525 // If it cannot overflow, transform into an add.
3526 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3527 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3528 DAG.getConstant(0, DL, CarryVT));
3529
3530 if (IsSigned) {
3531 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3532 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3533 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3534 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3535 } else {
3536 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3537 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3538 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3539 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3540 return CombineTo(
3541 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3542 }
3543
3544 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3545 return Combined;
3546
3547 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3548 return Combined;
3549 }
3550
3551 return SDValue();
3552}
3553
3554SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3555 EVT VT = N0.getValueType();
3556 if (VT.isVector())
3557 return SDValue();
3558
3559 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3560 // If Y + 1 cannot overflow.
3561 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3562 SDValue Y = N1.getOperand(0);
3563 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3565 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3566 N1.getOperand(2));
3567 }
3568
3569 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3571 if (SDValue Carry = getAsCarry(TLI, N1))
3572 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3573 DAG.getConstant(0, SDLoc(N), VT), Carry);
3574
3575 return SDValue();
3576}
3577
3578SDValue DAGCombiner::visitADDE(SDNode *N) {
3579 SDValue N0 = N->getOperand(0);
3580 SDValue N1 = N->getOperand(1);
3581 SDValue CarryIn = N->getOperand(2);
3582
3583 // canonicalize constant to RHS
3584 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3585 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3586 if (N0C && !N1C)
3587 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3588 N1, N0, CarryIn);
3589
3590 // fold (adde x, y, false) -> (addc x, y)
3591 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3592 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3593
3594 return SDValue();
3595}
3596
3597SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3598 SDValue N0 = N->getOperand(0);
3599 SDValue N1 = N->getOperand(1);
3600 SDValue CarryIn = N->getOperand(2);
3601 SDLoc DL(N);
3602
3603 // canonicalize constant to RHS
3604 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3605 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3606 if (N0C && !N1C)
3607 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3608
3609 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3610 if (isNullConstant(CarryIn)) {
3611 if (!LegalOperations ||
3612 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3613 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3614 }
3615
3616 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3617 if (isNullConstant(N0) && isNullConstant(N1)) {
3618 EVT VT = N0.getValueType();
3619 EVT CarryVT = CarryIn.getValueType();
3620 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3621 AddToWorklist(CarryExt.getNode());
3622 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3623 DAG.getConstant(1, DL, VT)),
3624 DAG.getConstant(0, DL, CarryVT));
3625 }
3626
3627 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3628 return Combined;
3629
3630 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3631 return Combined;
3632
3633 // We want to avoid useless duplication.
3634 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3635 // not a binary operation, this is not really possible to leverage this
3636 // existing mechanism for it. However, if more operations require the same
3637 // deduplication logic, then it may be worth generalize.
3638 SDValue Ops[] = {N1, N0, CarryIn};
3639 SDNode *CSENode =
3640 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3641 if (CSENode)
3642 return SDValue(CSENode, 0);
3643
3644 return SDValue();
3645}
3646
3647/**
3648 * If we are facing some sort of diamond carry propagation pattern try to
3649 * break it up to generate something like:
3650 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3651 *
3652 * The end result is usually an increase in operation required, but because the
3653 * carry is now linearized, other transforms can kick in and optimize the DAG.
3654 *
3655 * Patterns typically look something like
3656 * (uaddo A, B)
3657 * / \
3658 * Carry Sum
3659 * | \
3660 * | (uaddo_carry *, 0, Z)
3661 * | /
3662 * \ Carry
3663 * | /
3664 * (uaddo_carry X, *, *)
3665 *
3666 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3667 * produce a combine with a single path for carry propagation.
3668 */
3670 SelectionDAG &DAG, SDValue X,
3671 SDValue Carry0, SDValue Carry1,
3672 SDNode *N) {
3673 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3674 return SDValue();
3675 if (Carry1.getOpcode() != ISD::UADDO)
3676 return SDValue();
3677
3678 SDValue Z;
3679
3680 /**
3681 * First look for a suitable Z. It will present itself in the form of
3682 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3683 */
3684 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3685 isNullConstant(Carry0.getOperand(1))) {
3686 Z = Carry0.getOperand(2);
3687 } else if (Carry0.getOpcode() == ISD::UADDO &&
3688 isOneConstant(Carry0.getOperand(1))) {
3689 EVT VT = Carry0->getValueType(1);
3690 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3691 } else {
3692 // We couldn't find a suitable Z.
3693 return SDValue();
3694 }
3695
3696
3697 auto cancelDiamond = [&](SDValue A,SDValue B) {
3698 SDLoc DL(N);
3699 SDValue NewY =
3700 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3701 Combiner.AddToWorklist(NewY.getNode());
3702 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3703 DAG.getConstant(0, DL, X.getValueType()),
3704 NewY.getValue(1));
3705 };
3706
3707 /**
3708 * (uaddo A, B)
3709 * |
3710 * Sum
3711 * |
3712 * (uaddo_carry *, 0, Z)
3713 */
3714 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3715 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3716 }
3717
3718 /**
3719 * (uaddo_carry A, 0, Z)
3720 * |
3721 * Sum
3722 * |
3723 * (uaddo *, B)
3724 */
3725 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3726 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3727 }
3728
3729 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3730 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3731 }
3732
3733 return SDValue();
3734}
3735
3736// If we are facing some sort of diamond carry/borrow in/out pattern try to
3737// match patterns like:
3738//
3739// (uaddo A, B) CarryIn
3740// | \ |
3741// | \ |
3742// PartialSum PartialCarryOutX /
3743// | | /
3744// | ____|____________/
3745// | / |
3746// (uaddo *, *) \________
3747// | \ \
3748// | \ |
3749// | PartialCarryOutY |
3750// | \ |
3751// | \ /
3752// AddCarrySum | ______/
3753// | /
3754// CarryOut = (or *, *)
3755//
3756// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3757//
3758// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3759//
3760// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3761// with a single path for carry/borrow out propagation.
3763 SDValue N0, SDValue N1, SDNode *N) {
3764 SDValue Carry0 = getAsCarry(TLI, N0);
3765 if (!Carry0)
3766 return SDValue();
3767 SDValue Carry1 = getAsCarry(TLI, N1);
3768 if (!Carry1)
3769 return SDValue();
3770
3771 unsigned Opcode = Carry0.getOpcode();
3772 if (Opcode != Carry1.getOpcode())
3773 return SDValue();
3774 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3775 return SDValue();
3776 // Guarantee identical type of CarryOut
3777 EVT CarryOutType = N->getValueType(0);
3778 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3779 CarryOutType != Carry1.getValue(1).getValueType())
3780 return SDValue();
3781
3782 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3783 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3784 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3785 std::swap(Carry0, Carry1);
3786
3787 // Check if nodes are connected in expected way.
3788 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3789 Carry1.getOperand(1) != Carry0.getValue(0))
3790 return SDValue();
3791
3792 // The carry in value must be on the righthand side for subtraction.
3793 unsigned CarryInOperandNum =
3794 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3795 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3796 return SDValue();
3797 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3798
3799 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3800 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3801 return SDValue();
3802
3803 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3804 CarryIn = getAsCarry(TLI, CarryIn, true);
3805 if (!CarryIn)
3806 return SDValue();
3807
3808 SDLoc DL(N);
3809 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3810 Carry1->getValueType(0));
3811 SDValue Merged =
3812 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3813 Carry0.getOperand(1), CarryIn);
3814
3815 // Please note that because we have proven that the result of the UADDO/USUBO
3816 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3817 // therefore prove that if the first UADDO/USUBO overflows, the second
3818 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3819 // maximum value.
3820 //
3821 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3822 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3823 //
3824 // This is important because it means that OR and XOR can be used to merge
3825 // carry flags; and that AND can return a constant zero.
3826 //
3827 // TODO: match other operations that can merge flags (ADD, etc)
3828 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3829 if (N->getOpcode() == ISD::AND)
3830 return DAG.getConstant(0, DL, CarryOutType);
3831 return Merged.getValue(1);
3832}
3833
3834SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3835 SDValue CarryIn, SDNode *N) {
3836 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3837 // carry.
3838 if (isBitwiseNot(N0))
3839 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3840 SDLoc DL(N);
3841 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3842 N0.getOperand(0), NotC);
3843 return CombineTo(
3844 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3845 }
3846
3847 // Iff the flag result is dead:
3848 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3849 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3850 // or the dependency between the instructions.
3851 if ((N0.getOpcode() == ISD::ADD ||
3852 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3853 N0.getValue(1) != CarryIn)) &&
3854 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3855 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3856 N0.getOperand(0), N0.getOperand(1), CarryIn);
3857
3858 /**
3859 * When one of the uaddo_carry argument is itself a carry, we may be facing
3860 * a diamond carry propagation. In which case we try to transform the DAG
3861 * to ensure linear carry propagation if that is possible.
3862 */
3863 if (auto Y = getAsCarry(TLI, N1)) {
3864 // Because both are carries, Y and Z can be swapped.
3865 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3866 return R;
3867 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3868 return R;
3869 }
3870
3871 return SDValue();
3872}
3873
3874SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3875 SDValue CarryIn, SDNode *N) {
3876 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3877 if (isBitwiseNot(N0)) {
3878 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3879 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3880 N0.getOperand(0), NotC);
3881 }
3882
3883 return SDValue();
3884}
3885
3886SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3887 SDValue N0 = N->getOperand(0);
3888 SDValue N1 = N->getOperand(1);
3889 SDValue CarryIn = N->getOperand(2);
3890 SDLoc DL(N);
3891
3892 // canonicalize constant to RHS
3893 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3894 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3895 if (N0C && !N1C)
3896 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3897
3898 // fold (saddo_carry x, y, false) -> (saddo x, y)
3899 if (isNullConstant(CarryIn)) {
3900 if (!LegalOperations ||
3901 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3902 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3903 }
3904
3905 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3906 return Combined;
3907
3908 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3909 return Combined;
3910
3911 return SDValue();
3912}
3913
3914// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3915// clamp/truncation if necessary.
3917 SDValue RHS, SelectionDAG &DAG,
3918 const SDLoc &DL) {
3919 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3920 "Illegal truncation");
3921
3922 if (DstVT == SrcVT)
3923 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3924
3925 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3926 // clamping RHS.
3928 DstVT.getScalarSizeInBits());
3929 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3930 return SDValue();
3931
3932 SDValue SatLimit =
3934 DstVT.getScalarSizeInBits()),
3935 DL, SrcVT);
3936 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3937 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3938 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3939 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3940}
3941
3942// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3943// usubsat(a,b), optionally as a truncated type.
3944SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3945 if (N->getOpcode() != ISD::SUB ||
3946 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3947 return SDValue();
3948
3949 EVT SubVT = N->getValueType(0);
3950 SDValue Op0 = N->getOperand(0);
3951 SDValue Op1 = N->getOperand(1);
3952
3953 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3954 // they may be converted to usubsat(a,b).
3955 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3956 SDValue MaxLHS = Op0.getOperand(0);
3957 SDValue MaxRHS = Op0.getOperand(1);
3958 if (MaxLHS == Op1)
3959 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3960 if (MaxRHS == Op1)
3961 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3962 }
3963
3964 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3965 SDValue MinLHS = Op1.getOperand(0);
3966 SDValue MinRHS = Op1.getOperand(1);
3967 if (MinLHS == Op0)
3968 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3969 if (MinRHS == Op0)
3970 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3971 }
3972
3973 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3974 if (Op1.getOpcode() == ISD::TRUNCATE &&
3975 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3976 Op1.getOperand(0).hasOneUse()) {
3977 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3978 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3979 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3980 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3981 DAG, DL);
3982 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3983 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3984 DAG, DL);
3985 }
3986
3987 return SDValue();
3988}
3989
3990// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
3991// counting leading ones. Broadly, it replaces the substraction with a left
3992// shift.
3993//
3994// * DAG Legalisation Pattern:
3995//
3996// (sub (ctlz (zeroextend (not Src)))
3997// BitWidthDiff)
3998//
3999// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
4000// -->
4001//
4002// (ctlz_zero_undef (not (shl (anyextend Src)
4003// BitWidthDiff)))
4004//
4005// * Type Legalisation Pattern:
4006//
4007// (sub (ctlz (and (xor Src XorMask)
4008// AndMask))
4009// BitWidthDiff)
4010//
4011// if AndMask has only trailing ones
4012// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
4013// and XorMask has more trailing ones than AndMask
4014// -->
4015//
4016// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
4017template <class MatchContextClass>
4019 const SDLoc DL(N);
4020 SDValue N0 = N->getOperand(0);
4021 EVT VT = N0.getValueType();
4022 unsigned BitWidth = VT.getScalarSizeInBits();
4023
4024 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4025
4026 APInt AndMask;
4027 APInt XorMask;
4028 APInt BitWidthDiff;
4029
4030 SDValue CtlzOp;
4031 SDValue Src;
4032
4033 if (!sd_context_match(
4034 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
4035 return SDValue();
4036
4037 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
4038 // DAG Legalisation Pattern:
4039 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4040 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4041 return SDValue();
4042
4043 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
4044 } else if (sd_context_match(CtlzOp, Matcher,
4045 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
4046 m_ConstInt(AndMask)))) {
4047 // Type Legalisation Pattern:
4048 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4049 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
4050 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4051 return SDValue();
4052 } else
4053 return SDValue();
4054
4055 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4056 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4057 SDValue Not =
4058 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4059
4060 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4061}
4062
4063// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4065 const SDLoc &DL) {
4066 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4067 SDValue Sub0 = N->getOperand(0);
4068 SDValue Sub1 = N->getOperand(1);
4069
4070 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4071 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4072 DivRem.getOpcode() == ISD::UDIVREM) &&
4073 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4074 DivRem.getOperand(1) == MaybeY) {
4075 return SDValue(DivRem.getNode(), 1);
4076 }
4077 return SDValue();
4078 };
4079
4080 if (Sub1.getOpcode() == ISD::MUL) {
4081 // (sub x, (mul divrem(x,y)[0], y))
4082 SDValue Mul0 = Sub1.getOperand(0);
4083 SDValue Mul1 = Sub1.getOperand(1);
4084
4085 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4086 return Res;
4087
4088 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4089 return Res;
4090
4091 } else if (Sub1.getOpcode() == ISD::SHL) {
4092 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4093 SDValue Shl0 = Sub1.getOperand(0);
4094 SDValue Shl1 = Sub1.getOperand(1);
4095 // Check if Shl0 is divrem(x, Y)[0]
4096 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4097 Shl0.getOpcode() == ISD::UDIVREM) &&
4098 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4099
4100 SDValue Divisor = Shl0.getOperand(1);
4101
4102 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4104 if (!DivC || !ShC)
4105 return SDValue();
4106
4107 if (DivC->getAPIntValue().isPowerOf2() &&
4108 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4109 return SDValue(Shl0.getNode(), 1);
4110 }
4111 }
4112 return SDValue();
4113}
4114
4115// Since it may not be valid to emit a fold to zero for vector initializers
4116// check if we can before folding.
4117static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4118 SelectionDAG &DAG, bool LegalOperations) {
4119 if (!VT.isVector())
4120 return DAG.getConstant(0, DL, VT);
4121 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4122 return DAG.getConstant(0, DL, VT);
4123 return SDValue();
4124}
4125
4126SDValue DAGCombiner::visitSUB(SDNode *N) {
4127 SDValue N0 = N->getOperand(0);
4128 SDValue N1 = N->getOperand(1);
4129 EVT VT = N0.getValueType();
4130 unsigned BitWidth = VT.getScalarSizeInBits();
4131 SDLoc DL(N);
4132
4134 return V;
4135
4136 // fold (sub x, x) -> 0
4137 if (N0 == N1)
4138 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4139
4140 // fold (sub c1, c2) -> c3
4141 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4142 return C;
4143
4144 // fold vector ops
4145 if (VT.isVector()) {
4146 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4147 return FoldedVOp;
4148
4149 // fold (sub x, 0) -> x, vector edition
4151 return N0;
4152 }
4153
4154 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4155 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4156 if (N1.hasOneUse() && hasUMin(VT)) {
4157 SDValue Y;
4158 auto MS0 = m_Specific(N0);
4159 auto MVY = m_Value(Y);
4160 auto MZ = m_Zero();
4161 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4162 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4163
4164 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4165 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4166 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4167 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4168
4169 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4170 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4171 }
4172
4173 if (SDValue NewSel = foldBinOpIntoSelect(N))
4174 return NewSel;
4175
4176 // fold (sub x, c) -> (add x, -c)
4177 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4178 return DAG.getNode(ISD::ADD, DL, VT, N0,
4179 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4180
4181 if (isNullOrNullSplat(N0)) {
4182 // Right-shifting everything out but the sign bit followed by negation is
4183 // the same as flipping arithmetic/logical shift type without the negation:
4184 // -(X >>u 31) -> (X >>s 31)
4185 // -(X >>s 31) -> (X >>u 31)
4186 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4187 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4188 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4189 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4190 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4191 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4192 }
4193 }
4194
4195 // 0 - X --> 0 if the sub is NUW.
4196 if (N->getFlags().hasNoUnsignedWrap())
4197 return N0;
4198
4200 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4201 // N1 must be 0 because negating the minimum signed value is undefined.
4202 if (N->getFlags().hasNoSignedWrap())
4203 return N0;
4204
4205 // 0 - X --> X if X is 0 or the minimum signed value.
4206 return N1;
4207 }
4208
4209 // Convert 0 - abs(x).
4210 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4212 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4213 return Result;
4214
4215 // Similar to the previous rule, but this time targeting an expanded abs.
4216 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4217 // as well as
4218 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4219 // Note that these two are applicable to both signed and unsigned min/max.
4220 SDValue X;
4221 SDValue S0;
4222 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4223 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4224 m_UMax(m_Value(X), NegPat),
4225 m_SMin(m_Value(X), NegPat),
4226 m_UMin(m_Value(X), NegPat))))) {
4227 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4228 if (hasOperation(NewOpc, VT))
4229 return DAG.getNode(NewOpc, DL, VT, X, S0);
4230 }
4231
4232 // Fold neg(splat(neg(x)) -> splat(x)
4233 if (VT.isVector()) {
4234 SDValue N1S = DAG.getSplatValue(N1, true);
4235 if (N1S && N1S.getOpcode() == ISD::SUB &&
4236 isNullConstant(N1S.getOperand(0)))
4237 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4238 }
4239
4240 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4241 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4242 isOneOrOneSplat(N1->getOperand(1))) {
4243 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4244 if (VT.isVector())
4245 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4249 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4250 DAG.getValueType(ExtVT));
4251 }
4252 }
4253 }
4254
4255 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4257 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4258
4259 // fold (A - (0-B)) -> A+B
4260 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4261 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4262
4263 // fold A-(A-B) -> B
4264 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4265 return N1.getOperand(1);
4266
4267 // fold (A+B)-A -> B
4268 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4269 return N0.getOperand(1);
4270
4271 // fold (A+B)-B -> A
4272 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4273 return N0.getOperand(0);
4274
4275 // fold (A+C1)-C2 -> A+(C1-C2)
4276 if (N0.getOpcode() == ISD::ADD) {
4277 SDValue N01 = N0.getOperand(1);
4278 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4279 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4280 }
4281
4282 // fold C2-(A+C1) -> (C2-C1)-A
4283 if (N1.getOpcode() == ISD::ADD) {
4284 SDValue N11 = N1.getOperand(1);
4285 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4286 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4287 }
4288
4289 // fold (A-C1)-C2 -> A-(C1+C2)
4290 if (N0.getOpcode() == ISD::SUB) {
4291 SDValue N01 = N0.getOperand(1);
4292 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4293 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4294 }
4295
4296 // fold (c1-A)-c2 -> (c1-c2)-A
4297 if (N0.getOpcode() == ISD::SUB) {
4298 SDValue N00 = N0.getOperand(0);
4299 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4300 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4301 }
4302
4303 SDValue A, B, C;
4304
4305 // fold ((A+(B+C))-B) -> A+C
4306 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4307 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4308
4309 // fold ((A+(B-C))-B) -> A-C
4310 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4311 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4312
4313 // fold ((A-(B-C))-C) -> A-B
4314 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4315 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4316
4317 // fold (A-(B-C)) -> A+(C-B)
4318 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4319 return DAG.getNode(ISD::ADD, DL, VT, N0,
4320 DAG.getNode(ISD::SUB, DL, VT, C, B));
4321
4322 // A - (A & B) -> A & (~B)
4323 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4324 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4325 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4326
4327 // fold (A - (-B * C)) -> (A + (B * C))
4328 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4329 return DAG.getNode(ISD::ADD, DL, VT, N0,
4330 DAG.getNode(ISD::MUL, DL, VT, B, C));
4331
4332 // If either operand of a sub is undef, the result is undef
4333 if (N0.isUndef())
4334 return N0;
4335 if (N1.isUndef())
4336 return N1;
4337
4338 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4339 return V;
4340
4341 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4342 return V;
4343
4344 // Try to match AVGCEIL fixedwidth pattern
4345 if (SDValue V = foldSubToAvg(N, DL))
4346 return V;
4347
4348 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4349 return V;
4350
4351 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4352 return V;
4353
4354 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4355 return V;
4356
4357 // (A - B) - 1 -> add (xor B, -1), A
4359 m_One(/*AllowUndefs=*/true))))
4360 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4361
4362 // Look for:
4363 // sub y, (xor x, -1)
4364 // And if the target does not like this form then turn into:
4365 // add (add x, y), 1
4366 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4367 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4368 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4369 }
4370
4371 // Hoist one-use addition by non-opaque constant:
4372 // (x + C) - y -> (x - y) + C
4373 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4374 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4375 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4376 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4377 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4378 }
4379 // y - (x + C) -> (y - x) - C
4380 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4381 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4382 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4383 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4384 }
4385 // (x - C) - y -> (x - y) - C
4386 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4387 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4388 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4389 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4390 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4391 }
4392 // (C - x) - y -> C - (x + y)
4393 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4394 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4395 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4396 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4397 }
4398
4399 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4400 // rather than 'sub 0/1' (the sext should get folded).
4401 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4402 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4403 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4404 TLI.getBooleanContents(VT) ==
4406 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4407 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4408 }
4409
4410 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4411 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4413 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4414 return DAG.getNode(ISD::ABS, DL, VT, A);
4415
4416 // If the relocation model supports it, consider symbol offsets.
4417 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4418 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4419 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4420 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4421 if (GA->getGlobal() == GB->getGlobal())
4422 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4423 DL, VT);
4424 }
4425
4426 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4427 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4428 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4429 if (TN->getVT() == MVT::i1) {
4430 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4431 DAG.getConstant(1, DL, VT));
4432 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4433 }
4434 }
4435
4436 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4437 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4438 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4439 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4440 }
4441
4442 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4443 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4444 APInt NewStep = -N1.getConstantOperandAPInt(0);
4445 return DAG.getNode(ISD::ADD, DL, VT, N0,
4446 DAG.getStepVector(DL, VT, NewStep));
4447 }
4448
4449 // Prefer an add for more folding potential and possibly better codegen:
4450 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4451 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4452 SDValue ShAmt = N1.getOperand(1);
4453 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4454 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4455 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4456 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4457 }
4458 }
4459
4460 // As with the previous fold, prefer add for more folding potential.
4461 // Subtracting SMIN/0 is the same as adding SMIN/0:
4462 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4463 if (N1.getOpcode() == ISD::SHL) {
4464 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4465 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4466 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4467 }
4468
4469 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4470 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4471 N0.getResNo() == 0 && N0.hasOneUse())
4472 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4473 N0.getOperand(0), N1, N0.getOperand(2));
4474
4476 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4477 if (SDValue Carry = getAsCarry(TLI, N0)) {
4478 SDValue X = N1;
4479 SDValue Zero = DAG.getConstant(0, DL, VT);
4480 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4481 return DAG.getNode(ISD::UADDO_CARRY, DL,
4482 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4483 Carry);
4484 }
4485 }
4486
4487 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4488 // sub C0, X --> xor X, C0
4489 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4490 if (!C0->isOpaque()) {
4491 const APInt &C0Val = C0->getAPIntValue();
4492 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4493 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4494 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4495 }
4496 }
4497
4498 // smax(a,b) - smin(a,b) --> abds(a,b)
4499 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4500 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4502 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4503
4504 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4505 if (hasOperation(ISD::ABDS, VT) &&
4506 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4508 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4509
4510 // umax(a,b) - umin(a,b) --> abdu(a,b)
4511 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4512 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4514 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4515
4516 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4517 if (hasOperation(ISD::ABDU, VT) &&
4518 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4520 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4521
4522 return SDValue();
4523}
4524
4525SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4526 unsigned Opcode = N->getOpcode();
4527 SDValue N0 = N->getOperand(0);
4528 SDValue N1 = N->getOperand(1);
4529 EVT VT = N0.getValueType();
4530 bool IsSigned = Opcode == ISD::SSUBSAT;
4531 SDLoc DL(N);
4532
4533 // fold (sub_sat x, undef) -> 0
4534 if (N0.isUndef() || N1.isUndef())
4535 return DAG.getConstant(0, DL, VT);
4536
4537 // fold (sub_sat x, x) -> 0
4538 if (N0 == N1)
4539 return DAG.getConstant(0, DL, VT);
4540
4541 // fold (sub_sat c1, c2) -> c3
4542 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4543 return C;
4544
4545 // fold vector ops
4546 if (VT.isVector()) {
4547 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4548 return FoldedVOp;
4549
4550 // fold (sub_sat x, 0) -> x, vector edition
4552 return N0;
4553 }
4554
4555 // fold (sub_sat x, 0) -> x
4556 if (isNullConstant(N1))
4557 return N0;
4558
4559 // If it cannot overflow, transform into an sub.
4560 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4561 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4562
4563 return SDValue();
4564}
4565
4566SDValue DAGCombiner::visitSUBC(SDNode *N) {
4567 SDValue N0 = N->getOperand(0);
4568 SDValue N1 = N->getOperand(1);
4569 EVT VT = N0.getValueType();
4570 SDLoc DL(N);
4571
4572 // If the flag result is dead, turn this into an SUB.
4573 if (!N->hasAnyUseOfValue(1))
4574 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4575 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4576
4577 // fold (subc x, x) -> 0 + no borrow
4578 if (N0 == N1)
4579 return CombineTo(N, DAG.getConstant(0, DL, VT),
4580 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4581
4582 // fold (subc x, 0) -> x + no borrow
4583 if (isNullConstant(N1))
4584 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4585
4586 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4587 if (isAllOnesConstant(N0))
4588 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4589 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4590
4591 return SDValue();
4592}
4593
4594SDValue DAGCombiner::visitSUBO(SDNode *N) {
4595 SDValue N0 = N->getOperand(0);
4596 SDValue N1 = N->getOperand(1);
4597 EVT VT = N0.getValueType();
4598 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4599
4600 EVT CarryVT = N->getValueType(1);
4601 SDLoc DL(N);
4602
4603 // If the flag result is dead, turn this into an SUB.
4604 if (!N->hasAnyUseOfValue(1))
4605 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4606 DAG.getUNDEF(CarryVT));
4607
4608 // fold (subo x, x) -> 0 + no borrow
4609 if (N0 == N1)
4610 return CombineTo(N, DAG.getConstant(0, DL, VT),
4611 DAG.getConstant(0, DL, CarryVT));
4612
4613 // fold (subox, c) -> (addo x, -c)
4614 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4615 if (IsSigned && !N1C->isMinSignedValue())
4616 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4617 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4618
4619 // fold (subo x, 0) -> x + no borrow
4620 if (isNullOrNullSplat(N1))
4621 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4622
4623 // If it cannot overflow, transform into an sub.
4624 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4625 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4626 DAG.getConstant(0, DL, CarryVT));
4627
4628 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4629 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4630 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4631 DAG.getConstant(0, DL, CarryVT));
4632
4633 return SDValue();
4634}
4635
4636SDValue DAGCombiner::visitSUBE(SDNode *N) {
4637 SDValue N0 = N->getOperand(0);
4638 SDValue N1 = N->getOperand(1);
4639 SDValue CarryIn = N->getOperand(2);
4640
4641 // fold (sube x, y, false) -> (subc x, y)
4642 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4643 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4644
4645 return SDValue();
4646}
4647
4648SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4649 SDValue N0 = N->getOperand(0);
4650 SDValue N1 = N->getOperand(1);
4651 SDValue CarryIn = N->getOperand(2);
4652
4653 // fold (usubo_carry x, y, false) -> (usubo x, y)
4654 if (isNullConstant(CarryIn)) {
4655 if (!LegalOperations ||
4656 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4657 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4658 }
4659
4660 return SDValue();
4661}
4662
4663SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4664 SDValue N0 = N->getOperand(0);
4665 SDValue N1 = N->getOperand(1);
4666 SDValue CarryIn = N->getOperand(2);
4667
4668 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4669 if (isNullConstant(CarryIn)) {
4670 if (!LegalOperations ||
4671 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4672 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4673 }
4674
4675 return SDValue();
4676}
4677
4678// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4679// UMULFIXSAT here.
4680SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4681 SDValue N0 = N->getOperand(0);
4682 SDValue N1 = N->getOperand(1);
4683 SDValue Scale = N->getOperand(2);
4684 EVT VT = N0.getValueType();
4685
4686 // fold (mulfix x, undef, scale) -> 0
4687 if (N0.isUndef() || N1.isUndef())
4688 return DAG.getConstant(0, SDLoc(N), VT);
4689
4690 // Canonicalize constant to RHS (vector doesn't have to splat)
4693 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4694
4695 // fold (mulfix x, 0, scale) -> 0
4696 if (isNullConstant(N1))
4697 return DAG.getConstant(0, SDLoc(N), VT);
4698
4699 return SDValue();
4700}
4701
4702template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4703 SDValue N0 = N->getOperand(0);
4704 SDValue N1 = N->getOperand(1);
4705 EVT VT = N0.getValueType();
4706 unsigned BitWidth = VT.getScalarSizeInBits();
4707 SDLoc DL(N);
4708 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4709 MatchContextClass Matcher(DAG, TLI, N);
4710
4711 // fold (mul x, undef) -> 0
4712 if (N0.isUndef() || N1.isUndef())
4713 return DAG.getConstant(0, DL, VT);
4714
4715 // fold (mul c1, c2) -> c1*c2
4716 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4717 return C;
4718
4719 // canonicalize constant to RHS (vector doesn't have to splat)
4722 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4723
4724 bool N1IsConst = false;
4725 bool N1IsOpaqueConst = false;
4726 APInt ConstValue1;
4727
4728 // fold vector ops
4729 if (VT.isVector()) {
4730 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4731 if (!UseVP)
4732 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4733 return FoldedVOp;
4734
4735 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4736 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4737 "Splat APInt should be element width");
4738 } else {
4739 N1IsConst = isa<ConstantSDNode>(N1);
4740 if (N1IsConst) {
4741 ConstValue1 = N1->getAsAPIntVal();
4742 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4743 }
4744 }
4745
4746 // fold (mul x, 0) -> 0
4747 if (N1IsConst && ConstValue1.isZero())
4748 return N1;
4749
4750 // fold (mul x, 1) -> x
4751 if (N1IsConst && ConstValue1.isOne())
4752 return N0;
4753
4754 if (!UseVP)
4755 if (SDValue NewSel = foldBinOpIntoSelect(N))
4756 return NewSel;
4757
4758 // fold (mul x, -1) -> 0-x
4759 if (N1IsConst && ConstValue1.isAllOnes())
4760 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4761
4762 // fold (mul x, (1 << c)) -> x << c
4763 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4764 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4765 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4766 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4767 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4768 SDNodeFlags Flags;
4769 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4770 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4771 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4772 }
4773 }
4774
4775 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4776 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4777 unsigned Log2Val = (-ConstValue1).logBase2();
4778
4779 // FIXME: If the input is something that is easily negated (e.g. a
4780 // single-use add), we should put the negate there.
4781 return Matcher.getNode(
4782 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4783 Matcher.getNode(ISD::SHL, DL, VT, N0,
4784 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4785 }
4786
4787 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4788 // hi result is in use in case we hit this mid-legalization.
4789 if (!UseVP) {
4790 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4791 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4792 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4793 // TODO: Can we match commutable operands with getNodeIfExists?
4794 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4795 if (LoHi->hasAnyUseOfValue(1))
4796 return SDValue(LoHi, 0);
4797 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4798 if (LoHi->hasAnyUseOfValue(1))
4799 return SDValue(LoHi, 0);
4800 }
4801 }
4802 }
4803
4804 // Try to transform:
4805 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4806 // mul x, (2^N + 1) --> add (shl x, N), x
4807 // mul x, (2^N - 1) --> sub (shl x, N), x
4808 // Examples: x * 33 --> (x << 5) + x
4809 // x * 15 --> (x << 4) - x
4810 // x * -33 --> -((x << 5) + x)
4811 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4812 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4813 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4814 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4815 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4816 // x * 0xf800 --> (x << 16) - (x << 11)
4817 // x * -0x8800 --> -((x << 15) + (x << 11))
4818 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4819 if (!UseVP && N1IsConst &&
4820 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4821 // TODO: We could handle more general decomposition of any constant by
4822 // having the target set a limit on number of ops and making a
4823 // callback to determine that sequence (similar to sqrt expansion).
4824 unsigned MathOp = ISD::DELETED_NODE;
4825 APInt MulC = ConstValue1.abs();
4826 // The constant `2` should be treated as (2^0 + 1).
4827 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4828 MulC.lshrInPlace(TZeros);
4829 if ((MulC - 1).isPowerOf2())
4830 MathOp = ISD::ADD;
4831 else if ((MulC + 1).isPowerOf2())
4832 MathOp = ISD::SUB;
4833
4834 if (MathOp != ISD::DELETED_NODE) {
4835 unsigned ShAmt =
4836 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4837 ShAmt += TZeros;
4838 assert(ShAmt < BitWidth &&
4839 "multiply-by-constant generated out of bounds shift");
4840 SDValue Shl =
4841 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4842 SDValue R =
4843 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4844 DAG.getNode(ISD::SHL, DL, VT, N0,
4845 DAG.getConstant(TZeros, DL, VT)))
4846 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4847 if (ConstValue1.isNegative())
4848 R = DAG.getNegative(R, DL, VT);
4849 return R;
4850 }
4851 }
4852
4853 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4854 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4855 SDValue N01 = N0.getOperand(1);
4856 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4857 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4858 }
4859
4860 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4861 // use.
4862 {
4863 SDValue Sh, Y;
4864
4865 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4866 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4868 Sh = N0; Y = N1;
4869 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4871 Sh = N1; Y = N0;
4872 }
4873
4874 if (Sh.getNode()) {
4875 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4876 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4877 }
4878 }
4879
4880 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4881 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4885 return Matcher.getNode(
4886 ISD::ADD, DL, VT,
4887 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4888 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4889
4890 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4891 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4892 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4893 const APInt &C0 = N0.getConstantOperandAPInt(0);
4894 const APInt &C1 = NC1->getAPIntValue();
4895 return DAG.getVScale(DL, VT, C0 * C1);
4896 }
4897
4898 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4899 APInt MulVal;
4900 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4901 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4902 const APInt &C0 = N0.getConstantOperandAPInt(0);
4903 APInt NewStep = C0 * MulVal;
4904 return DAG.getStepVector(DL, VT, NewStep);
4905 }
4906
4907 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4908 SDValue X;
4909 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4911 N, Matcher,
4913 m_Deferred(X)))) {
4914 return Matcher.getNode(ISD::ABS, DL, VT, X);
4915 }
4916
4917 // Fold ((mul x, 0/undef) -> 0,
4918 // (mul x, 1) -> x) -> x)
4919 // -> and(x, mask)
4920 // We can replace vectors with '0' and '1' factors with a clearing mask.
4921 if (VT.isFixedLengthVector()) {
4922 unsigned NumElts = VT.getVectorNumElements();
4923 SmallBitVector ClearMask;
4924 ClearMask.reserve(NumElts);
4925 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4926 if (!V || V->isZero()) {
4927 ClearMask.push_back(true);
4928 return true;
4929 }
4930 ClearMask.push_back(false);
4931 return V->isOne();
4932 };
4933 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4934 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4935 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4936 EVT LegalSVT = N1.getOperand(0).getValueType();
4937 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4938 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4940 for (unsigned I = 0; I != NumElts; ++I)
4941 if (ClearMask[I])
4942 Mask[I] = Zero;
4943 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4944 }
4945 }
4946
4947 // reassociate mul
4948 // TODO: Change reassociateOps to support vp ops.
4949 if (!UseVP)
4950 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4951 return RMUL;
4952
4953 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4954 // TODO: Change reassociateReduction to support vp ops.
4955 if (!UseVP)
4956 if (SDValue SD =
4957 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4958 return SD;
4959
4960 // Simplify the operands using demanded-bits information.
4962 return SDValue(N, 0);
4963
4964 return SDValue();
4965}
4966
4967/// Return true if divmod libcall is available.
4969 const TargetLowering &TLI) {
4970 RTLIB::Libcall LC;
4971 EVT NodeType = Node->getValueType(0);
4972 if (!NodeType.isSimple())
4973 return false;
4974 switch (NodeType.getSimpleVT().SimpleTy) {
4975 default: return false; // No libcall for vector types.
4976 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4977 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4978 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4979 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4980 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4981 }
4982
4983 return TLI.getLibcallName(LC) != nullptr;
4984}
4985
4986/// Issue divrem if both quotient and remainder are needed.
4987SDValue DAGCombiner::useDivRem(SDNode *Node) {
4988 if (Node->use_empty())
4989 return SDValue(); // This is a dead node, leave it alone.
4990
4991 unsigned Opcode = Node->getOpcode();
4992 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4993 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4994
4995 // DivMod lib calls can still work on non-legal types if using lib-calls.
4996 EVT VT = Node->getValueType(0);
4997 if (VT.isVector() || !VT.isInteger())
4998 return SDValue();
4999
5000 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
5001 return SDValue();
5002
5003 // If DIVREM is going to get expanded into a libcall,
5004 // but there is no libcall available, then don't combine.
5005 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
5007 return SDValue();
5008
5009 // If div is legal, it's better to do the normal expansion
5010 unsigned OtherOpcode = 0;
5011 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
5012 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
5013 if (TLI.isOperationLegalOrCustom(Opcode, VT))
5014 return SDValue();
5015 } else {
5016 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5017 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
5018 return SDValue();
5019 }
5020
5021 SDValue Op0 = Node->getOperand(0);
5022 SDValue Op1 = Node->getOperand(1);
5023 SDValue combined;
5024 for (SDNode *User : Op0->users()) {
5025 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
5026 User->use_empty())
5027 continue;
5028 // Convert the other matching node(s), too;
5029 // otherwise, the DIVREM may get target-legalized into something
5030 // target-specific that we won't be able to recognize.
5031 unsigned UserOpc = User->getOpcode();
5032 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
5033 User->getOperand(0) == Op0 &&
5034 User->getOperand(1) == Op1) {
5035 if (!combined) {
5036 if (UserOpc == OtherOpcode) {
5037 SDVTList VTs = DAG.getVTList(VT, VT);
5038 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
5039 } else if (UserOpc == DivRemOpc) {
5040 combined = SDValue(User, 0);
5041 } else {
5042 assert(UserOpc == Opcode);
5043 continue;
5044 }
5045 }
5046 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
5047 CombineTo(User, combined);
5048 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
5049 CombineTo(User, combined.getValue(1));
5050 }
5051 }
5052 return combined;
5053}
5054
5056 SDValue N0 = N->getOperand(0);
5057 SDValue N1 = N->getOperand(1);
5058 EVT VT = N->getValueType(0);
5059 SDLoc DL(N);
5060
5061 unsigned Opc = N->getOpcode();
5062 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5063
5064 // X / undef -> undef
5065 // X % undef -> undef
5066 // X / 0 -> undef
5067 // X % 0 -> undef
5068 // NOTE: This includes vectors where any divisor element is zero/undef.
5069 if (DAG.isUndef(Opc, {N0, N1}))
5070 return DAG.getUNDEF(VT);
5071
5072 // undef / X -> 0
5073 // undef % X -> 0
5074 if (N0.isUndef())
5075 return DAG.getConstant(0, DL, VT);
5076
5077 // 0 / X -> 0
5078 // 0 % X -> 0
5080 if (N0C && N0C->isZero())
5081 return N0;
5082
5083 // X / X -> 1
5084 // X % X -> 0
5085 if (N0 == N1)
5086 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5087
5088 // X / 1 -> X
5089 // X % 1 -> 0
5090 // If this is a boolean op (single-bit element type), we can't have
5091 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5092 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5093 // it's a 1.
5094 if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
5095 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5096
5097 return SDValue();
5098}
5099
5100SDValue DAGCombiner::visitSDIV(SDNode *N) {
5101 SDValue N0 = N->getOperand(0);
5102 SDValue N1 = N->getOperand(1);
5103 EVT VT = N->getValueType(0);
5104 EVT CCVT = getSetCCResultType(VT);
5105 SDLoc DL(N);
5106
5107 // fold (sdiv c1, c2) -> c1/c2
5108 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5109 return C;
5110
5111 // fold vector ops
5112 if (VT.isVector())
5113 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5114 return FoldedVOp;
5115
5116 // fold (sdiv X, -1) -> 0-X
5117 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5118 if (N1C && N1C->isAllOnes())
5119 return DAG.getNegative(N0, DL, VT);
5120
5121 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5122 if (N1C && N1C->isMinSignedValue())
5123 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5124 DAG.getConstant(1, DL, VT),
5125 DAG.getConstant(0, DL, VT));
5126
5127 if (SDValue V = simplifyDivRem(N, DAG))
5128 return V;
5129
5130 if (SDValue NewSel = foldBinOpIntoSelect(N))
5131 return NewSel;
5132
5133 // If we know the sign bits of both operands are zero, strength reduce to a
5134 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5135 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5136 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5137
5138 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5139 // If the corresponding remainder node exists, update its users with
5140 // (Dividend - (Quotient * Divisor).
5141 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5142 { N0, N1 })) {
5143 // If the sdiv has the exact flag we shouldn't propagate it to the
5144 // remainder node.
5145 if (!N->getFlags().hasExact()) {
5146 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5147 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5148 AddToWorklist(Mul.getNode());
5149 AddToWorklist(Sub.getNode());
5150 CombineTo(RemNode, Sub);
5151 }
5152 }
5153 return V;
5154 }
5155
5156 // sdiv, srem -> sdivrem
5157 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5158 // true. Otherwise, we break the simplification logic in visitREM().
5159 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5160 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5161 if (SDValue DivRem = useDivRem(N))
5162 return DivRem;
5163
5164 return SDValue();
5165}
5166
5167static bool isDivisorPowerOfTwo(SDValue Divisor) {
5168 // Helper for determining whether a value is a power-2 constant scalar or a
5169 // vector of such elements.
5170 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5171 if (C->isZero() || C->isOpaque())
5172 return false;
5173 if (C->getAPIntValue().isPowerOf2())
5174 return true;
5175 if (C->getAPIntValue().isNegatedPowerOf2())
5176 return true;
5177 return false;
5178 };
5179
5180 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
5181}
5182
5183SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5184 SDLoc DL(N);
5185 EVT VT = N->getValueType(0);
5186 EVT CCVT = getSetCCResultType(VT);
5187 unsigned BitWidth = VT.getScalarSizeInBits();
5188
5189 // fold (sdiv X, pow2) -> simple ops after legalize
5190 // FIXME: We check for the exact bit here because the generic lowering gives
5191 // better results in that case. The target-specific lowering should learn how
5192 // to handle exact sdivs efficiently.
5193 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5194 // Target-specific implementation of sdiv x, pow2.
5195 if (SDValue Res = BuildSDIVPow2(N))
5196 return Res;
5197
5198 // Create constants that are functions of the shift amount value.
5199 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5200 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5201 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5202 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5203 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5204 if (!isConstantOrConstantVector(Inexact))
5205 return SDValue();
5206
5207 // Splat the sign bit into the register
5208 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5209 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5210 AddToWorklist(Sign.getNode());
5211
5212 // Add (N0 < 0) ? abs2 - 1 : 0;
5213 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5214 AddToWorklist(Srl.getNode());
5215 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5216 AddToWorklist(Add.getNode());
5217 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5218 AddToWorklist(Sra.getNode());
5219
5220 // Special case: (sdiv X, 1) -> X
5221 // Special Case: (sdiv X, -1) -> 0-X
5222 SDValue One = DAG.getConstant(1, DL, VT);
5224 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5225 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5226 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5227 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5228
5229 // If dividing by a positive value, we're done. Otherwise, the result must
5230 // be negated.
5231 SDValue Zero = DAG.getConstant(0, DL, VT);
5232 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5233
5234 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5235 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5236 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5237 return Res;
5238 }
5239
5240 // If integer divide is expensive and we satisfy the requirements, emit an
5241 // alternate sequence. Targets may check function attributes for size/speed
5242 // trade-offs.
5243 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5245 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5246 if (SDValue Op = BuildSDIV(N))
5247 return Op;
5248
5249 return SDValue();
5250}
5251
5252SDValue DAGCombiner::visitUDIV(SDNode *N) {
5253 SDValue N0 = N->getOperand(0);
5254 SDValue N1 = N->getOperand(1);
5255 EVT VT = N->getValueType(0);
5256 EVT CCVT = getSetCCResultType(VT);
5257 SDLoc DL(N);
5258
5259 // fold (udiv c1, c2) -> c1/c2
5260 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5261 return C;
5262
5263 // fold vector ops
5264 if (VT.isVector())
5265 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5266 return FoldedVOp;
5267
5268 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5269 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5270 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5271 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5272 DAG.getConstant(1, DL, VT),
5273 DAG.getConstant(0, DL, VT));
5274 }
5275
5276 if (SDValue V = simplifyDivRem(N, DAG))
5277 return V;
5278
5279 if (SDValue NewSel = foldBinOpIntoSelect(N))
5280 return NewSel;
5281
5282 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5283 // If the corresponding remainder node exists, update its users with
5284 // (Dividend - (Quotient * Divisor).
5285 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5286 { N0, N1 })) {
5287 // If the udiv has the exact flag we shouldn't propagate it to the
5288 // remainder node.
5289 if (!N->getFlags().hasExact()) {
5290 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5291 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5292 AddToWorklist(Mul.getNode());
5293 AddToWorklist(Sub.getNode());
5294 CombineTo(RemNode, Sub);
5295 }
5296 }
5297 return V;
5298 }
5299
5300 // sdiv, srem -> sdivrem
5301 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5302 // true. Otherwise, we break the simplification logic in visitREM().
5303 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5304 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5305 if (SDValue DivRem = useDivRem(N))
5306 return DivRem;
5307
5308 // Simplify the operands using demanded-bits information.
5309 // We don't have demanded bits support for UDIV so this just enables constant
5310 // folding based on known bits.
5312 return SDValue(N, 0);
5313
5314 return SDValue();
5315}
5316
5317SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5318 SDLoc DL(N);
5319 EVT VT = N->getValueType(0);
5320
5321 // fold (udiv x, (1 << c)) -> x >>u c
5322 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5323 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5324 AddToWorklist(LogBase2.getNode());
5325
5326 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5327 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5328 AddToWorklist(Trunc.getNode());
5329 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5330 }
5331 }
5332
5333 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5334 if (N1.getOpcode() == ISD::SHL) {
5335 SDValue N10 = N1.getOperand(0);
5336 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5337 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5338 AddToWorklist(LogBase2.getNode());
5339
5340 EVT ADDVT = N1.getOperand(1).getValueType();
5341 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5342 AddToWorklist(Trunc.getNode());
5343 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5344 AddToWorklist(Add.getNode());
5345 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5346 }
5347 }
5348 }
5349
5350 // fold (udiv x, c) -> alternate
5351 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5353 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5354 if (SDValue Op = BuildUDIV(N))
5355 return Op;
5356
5357 return SDValue();
5358}
5359
5360SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5361 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5362 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5363 // Target-specific implementation of srem x, pow2.
5364 if (SDValue Res = BuildSREMPow2(N))
5365 return Res;
5366 }
5367 return SDValue();
5368}
5369
5370// handles ISD::SREM and ISD::UREM
5371SDValue DAGCombiner::visitREM(SDNode *N) {
5372 unsigned Opcode = N->getOpcode();
5373 SDValue N0 = N->getOperand(0);
5374 SDValue N1 = N->getOperand(1);
5375 EVT VT = N->getValueType(0);
5376 EVT CCVT = getSetCCResultType(VT);
5377
5378 bool isSigned = (Opcode == ISD::SREM);
5379 SDLoc DL(N);
5380
5381 // fold (rem c1, c2) -> c1%c2
5382 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5383 return C;
5384
5385 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5386 // Freeze the numerator to avoid a miscompile with an undefined value.
5387 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5388 CCVT.isVector() == VT.isVector()) {
5389 SDValue F0 = DAG.getFreeze(N0);
5390 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5391 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5392 }
5393
5394 if (SDValue V = simplifyDivRem(N, DAG))
5395 return V;
5396
5397 if (SDValue NewSel = foldBinOpIntoSelect(N))
5398 return NewSel;
5399
5400 if (isSigned) {
5401 // If we know the sign bits of both operands are zero, strength reduce to a
5402 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5403 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5404 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5405 } else {
5406 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5407 // fold (urem x, pow2) -> (and x, pow2-1)
5408 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5409 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5410 AddToWorklist(Add.getNode());
5411 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5412 }
5413 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5414 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5415 // TODO: We should sink the following into isKnownToBePowerOfTwo
5416 // using a OrZero parameter analogous to our handling in ValueTracking.
5417 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5419 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5420 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5421 AddToWorklist(Add.getNode());
5422 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5423 }
5424 }
5425
5426 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5427
5428 // If X/C can be simplified by the division-by-constant logic, lower
5429 // X%C to the equivalent of X-X/C*C.
5430 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5431 // speculative DIV must not cause a DIVREM conversion. We guard against this
5432 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5433 // combine will not return a DIVREM. Regardless, checking cheapness here
5434 // makes sense since the simplification results in fatter code.
5435 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5436 if (isSigned) {
5437 // check if we can build faster implementation for srem
5438 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5439 return OptimizedRem;
5440 }
5441
5442 SDValue OptimizedDiv =
5443 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5444 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5445 // If the equivalent Div node also exists, update its users.
5446 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5447 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5448 { N0, N1 }))
5449 CombineTo(DivNode, OptimizedDiv);
5450 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5451 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5452 AddToWorklist(OptimizedDiv.getNode());
5453 AddToWorklist(Mul.getNode());
5454 return Sub;
5455 }
5456 }
5457
5458 // sdiv, srem -> sdivrem
5459 if (SDValue DivRem = useDivRem(N))
5460 return DivRem.getValue(1);
5461
5462 // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5463 // iff urem(BCst, Op1Cst) == 0
5464 SDValue A;
5465 APInt Op1Cst, BCst;
5466 if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
5467 m_ConstInt(Op1Cst))) &&
5468 BCst.urem(Op1Cst).isZero()) {
5469 return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5470 }
5471
5472 // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5473 // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5474 if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
5475 m_ConstInt(Op1Cst))) &&
5476 BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5477 return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5478 }
5479
5480 return SDValue();
5481}
5482
5483SDValue DAGCombiner::visitMULHS(SDNode *N) {
5484 SDValue N0 = N->getOperand(0);
5485 SDValue N1 = N->getOperand(1);
5486 EVT VT = N->getValueType(0);
5487 SDLoc DL(N);
5488
5489 // fold (mulhs c1, c2)
5490 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5491 return C;
5492
5493 // canonicalize constant to RHS.
5496 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5497
5498 if (VT.isVector()) {
5499 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5500 return FoldedVOp;
5501
5502 // fold (mulhs x, 0) -> 0
5503 // do not return N1, because undef node may exist.
5505 return DAG.getConstant(0, DL, VT);
5506 }
5507
5508 // fold (mulhs x, 0) -> 0
5509 if (isNullConstant(N1))
5510 return N1;
5511
5512 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5513 if (isOneConstant(N1))
5514 return DAG.getNode(
5515 ISD::SRA, DL, VT, N0,
5517
5518 // fold (mulhs x, undef) -> 0
5519 if (N0.isUndef() || N1.isUndef())
5520 return DAG.getConstant(0, DL, VT);
5521
5522 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5523 // plus a shift.
5524 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5525 !VT.isVector()) {
5526 MVT Simple = VT.getSimpleVT();
5527 unsigned SimpleSize = Simple.getSizeInBits();
5528 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5529 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5530 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5531 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5532 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5533 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5534 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5535 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5536 }
5537 }
5538
5539 return SDValue();
5540}
5541
5542SDValue DAGCombiner::visitMULHU(SDNode *N) {
5543 SDValue N0 = N->getOperand(0);
5544 SDValue N1 = N->getOperand(1);
5545 EVT VT = N->getValueType(0);
5546 SDLoc DL(N);
5547
5548 // fold (mulhu c1, c2)
5549 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5550 return C;
5551
5552 // canonicalize constant to RHS.
5555 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5556
5557 if (VT.isVector()) {
5558 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5559 return FoldedVOp;
5560
5561 // fold (mulhu x, 0) -> 0
5562 // do not return N1, because undef node may exist.
5564 return DAG.getConstant(0, DL, VT);
5565 }
5566
5567 // fold (mulhu x, 0) -> 0
5568 if (isNullConstant(N1))
5569 return N1;
5570
5571 // fold (mulhu x, 1) -> 0
5572 if (isOneConstant(N1))
5573 return DAG.getConstant(0, DL, VT);
5574
5575 // fold (mulhu x, undef) -> 0
5576 if (N0.isUndef() || N1.isUndef())
5577 return DAG.getConstant(0, DL, VT);
5578
5579 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5580 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5581 hasOperation(ISD::SRL, VT)) {
5582 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5583 unsigned NumEltBits = VT.getScalarSizeInBits();
5584 SDValue SRLAmt = DAG.getNode(
5585 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5586 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5587 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5588 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5589 }
5590 }
5591
5592 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5593 // plus a shift.
5594 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5595 !VT.isVector()) {
5596 MVT Simple = VT.getSimpleVT();
5597 unsigned SimpleSize = Simple.getSizeInBits();
5598 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5599 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5600 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5601 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5602 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5603 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5604 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5605 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5606 }
5607 }
5608
5609 // Simplify the operands using demanded-bits information.
5610 // We don't have demanded bits support for MULHU so this just enables constant
5611 // folding based on known bits.
5613 return SDValue(N, 0);
5614
5615 return SDValue();
5616}
5617
5618SDValue DAGCombiner::visitAVG(SDNode *N) {
5619 unsigned Opcode = N->getOpcode();
5620 SDValue N0 = N->getOperand(0);
5621 SDValue N1 = N->getOperand(1);
5622 EVT VT = N->getValueType(0);
5623 SDLoc DL(N);
5624 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5625
5626 // fold (avg c1, c2)
5627 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5628 return C;
5629
5630 // canonicalize constant to RHS.
5633 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5634
5635 if (VT.isVector())
5636 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5637 return FoldedVOp;
5638
5639 // fold (avg x, undef) -> x
5640 if (N0.isUndef())
5641 return N1;
5642 if (N1.isUndef())
5643 return N0;
5644
5645 // fold (avg x, x) --> x
5646 if (N0 == N1 && Level >= AfterLegalizeTypes)
5647 return N0;
5648
5649 // fold (avgfloor x, 0) -> x >> 1
5650 SDValue X, Y;
5652 return DAG.getNode(ISD::SRA, DL, VT, X,
5653 DAG.getShiftAmountConstant(1, VT, DL));
5655 return DAG.getNode(ISD::SRL, DL, VT, X,
5656 DAG.getShiftAmountConstant(1, VT, DL));
5657
5658 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5659 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5660 if (!IsSigned &&
5661 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5662 X.getValueType() == Y.getValueType() &&
5663 hasOperation(Opcode, X.getValueType())) {
5664 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5665 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5666 }
5667 if (IsSigned &&
5668 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5669 X.getValueType() == Y.getValueType() &&
5670 hasOperation(Opcode, X.getValueType())) {
5671 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5672 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5673 }
5674
5675 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5676 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5677 // Check if avgflooru isn't legal/custom but avgceilu is.
5678 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5679 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5680 if (DAG.isKnownNeverZero(N1))
5681 return DAG.getNode(
5682 ISD::AVGCEILU, DL, VT, N0,
5683 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5684 if (DAG.isKnownNeverZero(N0))
5685 return DAG.getNode(
5686 ISD::AVGCEILU, DL, VT, N1,
5687 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5688 }
5689
5690 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5691 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5692 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5693 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5694 SDValue Add;
5695 if (sd_match(N,
5696 m_c_BinOp(Opcode,
5698 m_One())) ||
5699 sd_match(N, m_c_BinOp(Opcode,
5701 m_Value(Y)))) {
5702
5703 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5704 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5705
5706 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5707 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5708 }
5709 }
5710
5711 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5712 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5713 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5714 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5715 }
5716
5717 return SDValue();
5718}
5719
5720SDValue DAGCombiner::visitABD(SDNode *N) {
5721 unsigned Opcode = N->getOpcode();
5722 SDValue N0 = N->getOperand(0);
5723 SDValue N1 = N->getOperand(1);
5724 EVT VT = N->getValueType(0);
5725 SDLoc DL(N);
5726
5727 // fold (abd c1, c2)
5728 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5729 return C;
5730
5731 // canonicalize constant to RHS.
5734 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5735
5736 if (VT.isVector())
5737 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5738 return FoldedVOp;
5739
5740 // fold (abd x, undef) -> 0
5741 if (N0.isUndef() || N1.isUndef())
5742 return DAG.getConstant(0, DL, VT);
5743
5744 // fold (abd x, x) -> 0
5745 if (N0 == N1)
5746 return DAG.getConstant(0, DL, VT);
5747
5748 SDValue X;
5749
5750 // fold (abds x, 0) -> abs x
5752 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5753 return DAG.getNode(ISD::ABS, DL, VT, X);
5754
5755 // fold (abdu x, 0) -> x
5757 return X;
5758
5759 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5760 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5761 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5762 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5763
5764 return SDValue();
5765}
5766
5767/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5768/// give the opcodes for the two computations that are being performed. Return
5769/// true if a simplification was made.
5770SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5771 unsigned HiOp) {
5772 // If the high half is not needed, just compute the low half.
5773 bool HiExists = N->hasAnyUseOfValue(1);
5774 if (!HiExists && (!LegalOperations ||
5775 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5776 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5777 return CombineTo(N, Res, Res);
5778 }
5779
5780 // If the low half is not needed, just compute the high half.
5781 bool LoExists = N->hasAnyUseOfValue(0);
5782 if (!LoExists && (!LegalOperations ||
5783 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5784 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5785 return CombineTo(N, Res, Res);
5786 }
5787
5788 // If both halves are used, return as it is.
5789 if (LoExists && HiExists)
5790 return SDValue();
5791
5792 // If the two computed results can be simplified separately, separate them.
5793 if (LoExists) {
5794 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5795 AddToWorklist(Lo.getNode());
5796 SDValue LoOpt = combine(Lo.getNode());
5797 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5798 (!LegalOperations ||
5799 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5800 return CombineTo(N, LoOpt, LoOpt);
5801 }
5802
5803 if (HiExists) {
5804 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5805 AddToWorklist(Hi.getNode());
5806 SDValue HiOpt = combine(Hi.getNode());
5807 if (HiOpt.getNode() && HiOpt != Hi &&
5808 (!LegalOperations ||
5809 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5810 return CombineTo(N, HiOpt, HiOpt);
5811 }
5812
5813 return SDValue();
5814}
5815
5816SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5817 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5818 return Res;
5819
5820 SDValue N0 = N->getOperand(0);
5821 SDValue N1 = N->getOperand(1);
5822 EVT VT = N->getValueType(0);
5823 SDLoc DL(N);
5824
5825 // Constant fold.
5827 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5828
5829 // canonicalize constant to RHS (vector doesn't have to splat)
5832 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5833
5834 // If the type is twice as wide is legal, transform the mulhu to a wider
5835 // multiply plus a shift.
5836 if (VT.isSimple() && !VT.isVector()) {
5837 MVT Simple = VT.getSimpleVT();
5838 unsigned SimpleSize = Simple.getSizeInBits();
5839 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5840 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5841 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5842 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5843 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5844 // Compute the high part as N1.
5845 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5846 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5847 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5848 // Compute the low part as N0.
5849 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5850 return CombineTo(N, Lo, Hi);
5851 }
5852 }
5853
5854 return SDValue();
5855}
5856
5857SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5858 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5859 return Res;
5860
5861 SDValue N0 = N->getOperand(0);
5862 SDValue N1 = N->getOperand(1);
5863 EVT VT = N->getValueType(0);
5864 SDLoc DL(N);
5865
5866 // Constant fold.
5868 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5869
5870 // canonicalize constant to RHS (vector doesn't have to splat)
5873 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5874
5875 // (umul_lohi N0, 0) -> (0, 0)
5876 if (isNullConstant(N1)) {
5877 SDValue Zero = DAG.getConstant(0, DL, VT);
5878 return CombineTo(N, Zero, Zero);
5879 }
5880
5881 // (umul_lohi N0, 1) -> (N0, 0)
5882 if (isOneConstant(N1)) {
5883 SDValue Zero = DAG.getConstant(0, DL, VT);
5884 return CombineTo(N, N0, Zero);
5885 }
5886
5887 // If the type is twice as wide is legal, transform the mulhu to a wider
5888 // multiply plus a shift.
5889 if (VT.isSimple() && !VT.isVector()) {
5890 MVT Simple = VT.getSimpleVT();
5891 unsigned SimpleSize = Simple.getSizeInBits();
5892 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5893 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5894 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5895 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5896 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5897 // Compute the high part as N1.
5898 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5899 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5900 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5901 // Compute the low part as N0.
5902 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5903 return CombineTo(N, Lo, Hi);
5904 }
5905 }
5906
5907 return SDValue();
5908}
5909
5910SDValue DAGCombiner::visitMULO(SDNode *N) {
5911 SDValue N0 = N->getOperand(0);
5912 SDValue N1 = N->getOperand(1);
5913 EVT VT = N0.getValueType();
5914 bool IsSigned = (ISD::SMULO == N->getOpcode());
5915
5916 EVT CarryVT = N->getValueType(1);
5917 SDLoc DL(N);
5918
5919 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5920 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5921
5922 // fold operation with constant operands.
5923 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5924 // multiple results.
5925 if (N0C && N1C) {
5926 bool Overflow;
5927 APInt Result =
5928 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5929 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5930 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5931 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5932 }
5933
5934 // canonicalize constant to RHS.
5937 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5938
5939 // fold (mulo x, 0) -> 0 + no carry out
5940 if (isNullOrNullSplat(N1))
5941 return CombineTo(N, DAG.getConstant(0, DL, VT),
5942 DAG.getConstant(0, DL, CarryVT));
5943
5944 // (mulo x, 2) -> (addo x, x)
5945 // FIXME: This needs a freeze.
5946 if (N1C && N1C->getAPIntValue() == 2 &&
5947 (!IsSigned || VT.getScalarSizeInBits() > 2))
5948 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5949 N->getVTList(), N0, N0);
5950
5951 // A 1 bit SMULO overflows if both inputs are 1.
5952 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5953 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5954 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5955 DAG.getConstant(0, DL, VT), ISD::SETNE);
5956 return CombineTo(N, And, Cmp);
5957 }
5958
5959 // If it cannot overflow, transform into a mul.
5960 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5961 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5962 DAG.getConstant(0, DL, CarryVT));
5963 return SDValue();
5964}
5965
5966// Function to calculate whether the Min/Max pair of SDNodes (potentially
5967// swapped around) make a signed saturate pattern, clamping to between a signed
5968// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5969// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5970// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5971// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5973 SDValue N3, ISD::CondCode CC, unsigned &BW,
5974 bool &Unsigned, SelectionDAG &DAG) {
5975 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5976 ISD::CondCode CC) {
5977 // The compare and select operand should be the same or the select operands
5978 // should be truncated versions of the comparison.
5979 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5980 return 0;
5981 // The constants need to be the same or a truncated version of each other.
5984 if (!N1C || !N3C)
5985 return 0;
5986 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5987 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5988 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5989 return 0;
5990 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5991 };
5992
5993 // Check the initial value is a SMIN/SMAX equivalent.
5994 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5995 if (!Opcode0)
5996 return SDValue();
5997
5998 // We could only need one range check, if the fptosi could never produce
5999 // the upper value.
6000 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
6001 if (isNullOrNullSplat(N3)) {
6002 EVT IntVT = N0.getValueType().getScalarType();
6003 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
6004 if (FPVT.isSimple()) {
6005 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
6006 const fltSemantics &Semantics = InputTy->getFltSemantics();
6007 uint32_t MinBitWidth =
6008 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
6009 if (IntVT.getSizeInBits() >= MinBitWidth) {
6010 Unsigned = true;
6011 BW = PowerOf2Ceil(MinBitWidth);
6012 return N0;
6013 }
6014 }
6015 }
6016 }
6017
6018 SDValue N00, N01, N02, N03;
6019 ISD::CondCode N0CC;
6020 switch (N0.getOpcode()) {
6021 case ISD::SMIN:
6022 case ISD::SMAX:
6023 N00 = N02 = N0.getOperand(0);
6024 N01 = N03 = N0.getOperand(1);
6025 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
6026 break;
6027 case ISD::SELECT_CC:
6028 N00 = N0.getOperand(0);
6029 N01 = N0.getOperand(1);
6030 N02 = N0.getOperand(2);
6031 N03 = N0.getOperand(3);
6032 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
6033 break;
6034 case ISD::SELECT:
6035 case ISD::VSELECT:
6036 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
6037 return SDValue();
6038 N00 = N0.getOperand(0).getOperand(0);
6039 N01 = N0.getOperand(0).getOperand(1);
6040 N02 = N0.getOperand(1);
6041 N03 = N0.getOperand(2);
6042 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
6043 break;
6044 default:
6045 return SDValue();
6046 }
6047
6048 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
6049 if (!Opcode1 || Opcode0 == Opcode1)
6050 return SDValue();
6051
6052 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
6053 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
6054 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
6055 return SDValue();
6056
6057 const APInt &MinC = MinCOp->getAPIntValue();
6058 const APInt &MaxC = MaxCOp->getAPIntValue();
6059 APInt MinCPlus1 = MinC + 1;
6060 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6061 BW = MinCPlus1.exactLogBase2() + 1;
6062 Unsigned = false;
6063 return N02;
6064 }
6065
6066 if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
6067 BW = MinCPlus1.exactLogBase2();
6068 Unsigned = true;
6069 return N02;
6070 }
6071
6072 return SDValue();
6073}
6074
6076 SDValue N3, ISD::CondCode CC,
6077 SelectionDAG &DAG) {
6078 unsigned BW;
6079 bool Unsigned;
6080 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6081 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6082 return SDValue();
6083 EVT FPVT = Fp.getOperand(0).getValueType();
6084 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6085 if (FPVT.isVector())
6086 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6087 FPVT.getVectorElementCount());
6088 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6089 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6090 return SDValue();
6091 SDLoc DL(Fp);
6092 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6093 DAG.getValueType(NewVT.getScalarType()));
6094 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6095}
6096
6098 SDValue N3, ISD::CondCode CC,
6099 SelectionDAG &DAG) {
6100 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6101 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6102 // be truncated versions of the setcc (N0/N1).
6103 if ((N0 != N2 &&
6104 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6105 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6106 return SDValue();
6109 if (!N1C || !N3C)
6110 return SDValue();
6111 const APInt &C1 = N1C->getAPIntValue();
6112 const APInt &C3 = N3C->getAPIntValue();
6113 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6114 C1 != C3.zext(C1.getBitWidth()))
6115 return SDValue();
6116
6117 unsigned BW = (C1 + 1).exactLogBase2();
6118 EVT FPVT = N0.getOperand(0).getValueType();
6119 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6120 if (FPVT.isVector())
6121 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6122 FPVT.getVectorElementCount());
6124 FPVT, NewVT))
6125 return SDValue();
6126
6127 SDValue Sat =
6128 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6129 DAG.getValueType(NewVT.getScalarType()));
6130 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6131}
6132
6133SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6134 SDValue N0 = N->getOperand(0);
6135 SDValue N1 = N->getOperand(1);
6136 EVT VT = N0.getValueType();
6137 unsigned Opcode = N->getOpcode();
6138 SDLoc DL(N);
6139
6140 // fold operation with constant operands.
6141 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6142 return C;
6143
6144 // If the operands are the same, this is a no-op.
6145 if (N0 == N1)
6146 return N0;
6147
6148 // Fold operation with vscale operands.
6149 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6150 uint64_t C0 = N0->getConstantOperandVal(0);
6151 uint64_t C1 = N1->getConstantOperandVal(0);
6152 if (Opcode == ISD::UMAX)
6153 return C0 > C1 ? N0 : N1;
6154 else if (Opcode == ISD::UMIN)
6155 return C0 > C1 ? N1 : N0;
6156 }
6157
6158 // canonicalize constant to RHS
6161 return DAG.getNode(Opcode, DL, VT, N1, N0);
6162
6163 // fold vector ops
6164 if (VT.isVector())
6165 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6166 return FoldedVOp;
6167
6168 // reassociate minmax
6169 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6170 return RMINMAX;
6171
6172 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6173 // Only do this if:
6174 // 1. The current op isn't legal and the flipped is.
6175 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6176 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6177 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6178 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6179 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6180 unsigned AltOpcode;
6181 switch (Opcode) {
6182 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6183 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6184 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6185 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6186 default: llvm_unreachable("Unknown MINMAX opcode");
6187 }
6188 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6189 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6190 }
6191
6192 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6194 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6195 return S;
6196 if (Opcode == ISD::UMIN)
6197 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6198 return S;
6199
6200 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6201 auto ReductionOpcode = [](unsigned Opcode) {
6202 switch (Opcode) {
6203 case ISD::SMIN:
6204 return ISD::VECREDUCE_SMIN;
6205 case ISD::SMAX:
6206 return ISD::VECREDUCE_SMAX;
6207 case ISD::UMIN:
6208 return ISD::VECREDUCE_UMIN;
6209 case ISD::UMAX:
6210 return ISD::VECREDUCE_UMAX;
6211 default:
6212 llvm_unreachable("Unexpected opcode");
6213 }
6214 };
6215 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6216 SDLoc(N), VT, N0, N1))
6217 return SD;
6218
6219 // Simplify the operands using demanded-bits information.
6221 return SDValue(N, 0);
6222
6223 return SDValue();
6224}
6225
6226/// If this is a bitwise logic instruction and both operands have the same
6227/// opcode, try to sink the other opcode after the logic instruction.
6228SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6229 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6230 EVT VT = N0.getValueType();
6231 unsigned LogicOpcode = N->getOpcode();
6232 unsigned HandOpcode = N0.getOpcode();
6233 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6234 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6235
6236 // Bail early if none of these transforms apply.
6237 if (N0.getNumOperands() == 0)
6238 return SDValue();
6239
6240 // FIXME: We should check number of uses of the operands to not increase
6241 // the instruction count for all transforms.
6242
6243 // Handle size-changing casts (or sign_extend_inreg).
6244 SDValue X = N0.getOperand(0);
6245 SDValue Y = N1.getOperand(0);
6246 EVT XVT = X.getValueType();
6247 SDLoc DL(N);
6248 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6249 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6250 N0.getOperand(1) == N1.getOperand(1))) {
6251 // If both operands have other uses, this transform would create extra
6252 // instructions without eliminating anything.
6253 if (!N0.hasOneUse() && !N1.hasOneUse())
6254 return SDValue();
6255 // We need matching integer source types.
6256 if (XVT != Y.getValueType())
6257 return SDValue();
6258 // Don't create an illegal op during or after legalization. Don't ever
6259 // create an unsupported vector op.
6260 if ((VT.isVector() || LegalOperations) &&
6261 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6262 return SDValue();
6263 // Avoid infinite looping with PromoteIntBinOp.
6264 // TODO: Should we apply desirable/legal constraints to all opcodes?
6265 if ((HandOpcode == ISD::ANY_EXTEND ||
6266 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6267 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6268 return SDValue();
6269 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6270 SDNodeFlags LogicFlags;
6271 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6272 ISD::isExtOpcode(HandOpcode));
6273 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6274 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6275 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6276 return DAG.getNode(HandOpcode, DL, VT, Logic);
6277 }
6278
6279 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6280 if (HandOpcode == ISD::TRUNCATE) {
6281 // If both operands have other uses, this transform would create extra
6282 // instructions without eliminating anything.
6283 if (!N0.hasOneUse() && !N1.hasOneUse())
6284 return SDValue();
6285 // We need matching source types.
6286 if (XVT != Y.getValueType())
6287 return SDValue();
6288 // Don't create an illegal op during or after legalization.
6289 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6290 return SDValue();
6291 // Be extra careful sinking truncate. If it's free, there's no benefit in
6292 // widening a binop. Also, don't create a logic op on an illegal type.
6293 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6294 return SDValue();
6295 if (!TLI.isTypeLegal(XVT))
6296 return SDValue();
6297 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6298 return DAG.getNode(HandOpcode, DL, VT, Logic);
6299 }
6300
6301 // For binops SHL/SRL/SRA/AND:
6302 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6303 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6304 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6305 N0.getOperand(1) == N1.getOperand(1)) {
6306 // If either operand has other uses, this transform is not an improvement.
6307 if (!N0.hasOneUse() || !N1.hasOneUse())
6308 return SDValue();
6309 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6310 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6311 }
6312
6313 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6314 if (HandOpcode == ISD::BSWAP) {
6315 // If either operand has other uses, this transform is not an improvement.
6316 if (!N0.hasOneUse() || !N1.hasOneUse())
6317 return SDValue();
6318 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6319 return DAG.getNode(HandOpcode, DL, VT, Logic);
6320 }
6321
6322 // For funnel shifts FSHL/FSHR:
6323 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6324 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6325 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6326 N0.getOperand(2) == N1.getOperand(2)) {
6327 if (!N0.hasOneUse() || !N1.hasOneUse())
6328 return SDValue();
6329 SDValue X1 = N0.getOperand(1);
6330 SDValue Y1 = N1.getOperand(1);
6331 SDValue S = N0.getOperand(2);
6332 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6333 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6334 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6335 }
6336
6337 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6338 // Only perform this optimization up until type legalization, before
6339 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6340 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6341 // we don't want to undo this promotion.
6342 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6343 // on scalars.
6344 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6345 Level <= AfterLegalizeTypes) {
6346 // Input types must be integer and the same.
6347 if (XVT.isInteger() && XVT == Y.getValueType() &&
6348 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6349 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6350 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6351 return DAG.getNode(HandOpcode, DL, VT, Logic);
6352 }
6353 }
6354
6355 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6356 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6357 // If both shuffles use the same mask, and both shuffle within a single
6358 // vector, then it is worthwhile to move the swizzle after the operation.
6359 // The type-legalizer generates this pattern when loading illegal
6360 // vector types from memory. In many cases this allows additional shuffle
6361 // optimizations.
6362 // There are other cases where moving the shuffle after the xor/and/or
6363 // is profitable even if shuffles don't perform a swizzle.
6364 // If both shuffles use the same mask, and both shuffles have the same first
6365 // or second operand, then it might still be profitable to move the shuffle
6366 // after the xor/and/or operation.
6367 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6368 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6369 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6370 assert(X.getValueType() == Y.getValueType() &&
6371 "Inputs to shuffles are not the same type");
6372
6373 // Check that both shuffles use the same mask. The masks are known to be of
6374 // the same length because the result vector type is the same.
6375 // Check also that shuffles have only one use to avoid introducing extra
6376 // instructions.
6377 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6378 !SVN0->getMask().equals(SVN1->getMask()))
6379 return SDValue();
6380
6381 // Don't try to fold this node if it requires introducing a
6382 // build vector of all zeros that might be illegal at this stage.
6383 SDValue ShOp = N0.getOperand(1);
6384 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6385 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6386
6387 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6388 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6389 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6390 N0.getOperand(0), N1.getOperand(0));
6391 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6392 }
6393
6394 // Don't try to fold this node if it requires introducing a
6395 // build vector of all zeros that might be illegal at this stage.
6396 ShOp = N0.getOperand(0);
6397 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6398 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6399
6400 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6401 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6402 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6403 N1.getOperand(1));
6404 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6405 }
6406 }
6407
6408 return SDValue();
6409}
6410
6411/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6412SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6413 const SDLoc &DL) {
6414 SDValue LL, LR, RL, RR, N0CC, N1CC;
6415 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6416 !isSetCCEquivalent(N1, RL, RR, N1CC))
6417 return SDValue();
6418
6419 assert(N0.getValueType() == N1.getValueType() &&
6420 "Unexpected operand types for bitwise logic op");
6421 assert(LL.getValueType() == LR.getValueType() &&
6422 RL.getValueType() == RR.getValueType() &&
6423 "Unexpected operand types for setcc");
6424
6425 // If we're here post-legalization or the logic op type is not i1, the logic
6426 // op type must match a setcc result type. Also, all folds require new
6427 // operations on the left and right operands, so those types must match.
6428 EVT VT = N0.getValueType();
6429 EVT OpVT = LL.getValueType();
6430 if (LegalOperations || VT.getScalarType() != MVT::i1)
6431 if (VT != getSetCCResultType(OpVT))
6432 return SDValue();
6433 if (OpVT != RL.getValueType())
6434 return SDValue();
6435
6436 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6437 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6438 bool IsInteger = OpVT.isInteger();
6439 if (LR == RR && CC0 == CC1 && IsInteger) {
6440 bool IsZero = isNullOrNullSplat(LR);
6441 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6442
6443 // All bits clear?
6444 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6445 // All sign bits clear?
6446 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6447 // Any bits set?
6448 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6449 // Any sign bits set?
6450 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6451
6452 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6453 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6454 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6455 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6456 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6457 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6458 AddToWorklist(Or.getNode());
6459 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6460 }
6461
6462 // All bits set?
6463 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6464 // All sign bits set?
6465 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6466 // Any bits clear?
6467 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6468 // Any sign bits clear?
6469 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6470
6471 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6472 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6473 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6474 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6475 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6476 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6477 AddToWorklist(And.getNode());
6478 return DAG.getSetCC(DL, VT, And, LR, CC1);
6479 }
6480 }
6481
6482 // TODO: What is the 'or' equivalent of this fold?
6483 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6484 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6485 IsInteger && CC0 == ISD::SETNE &&
6486 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6487 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6488 SDValue One = DAG.getConstant(1, DL, OpVT);
6489 SDValue Two = DAG.getConstant(2, DL, OpVT);
6490 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6491 AddToWorklist(Add.getNode());
6492 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6493 }
6494
6495 // Try more general transforms if the predicates match and the only user of
6496 // the compares is the 'and' or 'or'.
6497 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6498 N0.hasOneUse() && N1.hasOneUse()) {
6499 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6500 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6501 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6502 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6503 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6504 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6505 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6506 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6507 }
6508
6509 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6510 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6511 // Match a shared variable operand and 2 non-opaque constant operands.
6512 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6513 // The difference of the constants must be a single bit.
6514 const APInt &CMax =
6515 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6516 const APInt &CMin =
6517 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6518 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6519 };
6520 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6521 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6522 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6523 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6524 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6525 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6526 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6527 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6528 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6529 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6530 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6531 }
6532 }
6533 }
6534
6535 // Canonicalize equivalent operands to LL == RL.
6536 if (LL == RR && LR == RL) {
6538 std::swap(RL, RR);
6539 }
6540
6541 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6542 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6543 if (LL == RL && LR == RR) {
6544 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6545 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6546 if (NewCC != ISD::SETCC_INVALID &&
6547 (!LegalOperations ||
6548 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6549 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6550 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6551 }
6552
6553 return SDValue();
6554}
6555
6556static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6557 SelectionDAG &DAG) {
6558 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6559}
6560
6561static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6562 SelectionDAG &DAG) {
6563 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6564}
6565
6566// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6567static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6568 ISD::CondCode CC, unsigned OrAndOpcode,
6569 SelectionDAG &DAG,
6570 bool isFMAXNUMFMINNUM_IEEE,
6571 bool isFMAXNUMFMINNUM) {
6572 // The optimization cannot be applied for all the predicates because
6573 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6574 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6575 // applied at all if one of the operands is a signaling NaN.
6576
6577 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6578 // are non NaN values.
6579 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6580 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6581 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6582 isFMAXNUMFMINNUM_IEEE
6583 ? ISD::FMINNUM_IEEE
6585 }
6586
6587 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6588 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6589 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6590 isFMAXNUMFMINNUM_IEEE
6591 ? ISD::FMAXNUM_IEEE
6593 }
6594
6595 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6596 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6597 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6598 // that there are not any sNaNs, then the optimization is not valid
6599 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6600 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6601 // we can prove that we do not have any sNaNs, then we can do the
6602 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6603 // cases.
6604 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6605 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6606 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6607 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6608 isFMAXNUMFMINNUM_IEEE
6609 ? ISD::FMINNUM_IEEE
6611 }
6612
6613 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6614 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6615 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6616 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6617 isFMAXNUMFMINNUM_IEEE
6618 ? ISD::FMAXNUM_IEEE
6620 }
6621
6622 return ISD::DELETED_NODE;
6623}
6624
6627 assert(
6628 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6629 "Invalid Op to combine SETCC with");
6630
6631 // TODO: Search past casts/truncates.
6632 SDValue LHS = LogicOp->getOperand(0);
6633 SDValue RHS = LogicOp->getOperand(1);
6634 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6635 !LHS->hasOneUse() || !RHS->hasOneUse())
6636 return SDValue();
6637
6638 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6640 LogicOp, LHS.getNode(), RHS.getNode());
6641
6642 SDValue LHS0 = LHS->getOperand(0);
6643 SDValue RHS0 = RHS->getOperand(0);
6644 SDValue LHS1 = LHS->getOperand(1);
6645 SDValue RHS1 = RHS->getOperand(1);
6646 // TODO: We don't actually need a splat here, for vectors we just need the
6647 // invariants to hold for each element.
6648 auto *LHS1C = isConstOrConstSplat(LHS1);
6649 auto *RHS1C = isConstOrConstSplat(RHS1);
6650 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6651 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6652 EVT VT = LogicOp->getValueType(0);
6653 EVT OpVT = LHS0.getValueType();
6654 SDLoc DL(LogicOp);
6655
6656 // Check if the operands of an and/or operation are comparisons and if they
6657 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6658 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6659 // sequence will be replaced with min-cmp sequence:
6660 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6661 // and and-cmp-cmp will be replaced with max-cmp sequence:
6662 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6663 // The optimization does not work for `==` or `!=` .
6664 // The two comparisons should have either the same predicate or the
6665 // predicate of one of the comparisons is the opposite of the other one.
6666 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6667 TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6668 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6669 TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6670 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6671 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6672 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6673 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6674 (OpVT.isFloatingPoint() &&
6675 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6677 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6678 CCL != ISD::SETTRUE &&
6679 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6680
6681 SDValue CommonValue, Operand1, Operand2;
6683 if (CCL == CCR) {
6684 if (LHS0 == RHS0) {
6685 CommonValue = LHS0;
6686 Operand1 = LHS1;
6687 Operand2 = RHS1;
6689 } else if (LHS1 == RHS1) {
6690 CommonValue = LHS1;
6691 Operand1 = LHS0;
6692 Operand2 = RHS0;
6693 CC = CCL;
6694 }
6695 } else {
6696 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6697 if (LHS0 == RHS1) {
6698 CommonValue = LHS0;
6699 Operand1 = LHS1;
6700 Operand2 = RHS0;
6701 CC = CCR;
6702 } else if (RHS0 == LHS1) {
6703 CommonValue = LHS1;
6704 Operand1 = LHS0;
6705 Operand2 = RHS1;
6706 CC = CCL;
6707 }
6708 }
6709
6710 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6711 // handle it using OR/AND.
6712 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6713 CC = ISD::SETCC_INVALID;
6714 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6715 CC = ISD::SETCC_INVALID;
6716
6717 if (CC != ISD::SETCC_INVALID) {
6718 unsigned NewOpcode = ISD::DELETED_NODE;
6719 bool IsSigned = isSignedIntSetCC(CC);
6720 if (OpVT.isInteger()) {
6721 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6722 CC == ISD::SETLT || CC == ISD::SETULT);
6723 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6724 if (IsLess == IsOr)
6725 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6726 else
6727 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6728 } else if (OpVT.isFloatingPoint())
6729 NewOpcode =
6730 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6731 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6732
6733 if (NewOpcode != ISD::DELETED_NODE) {
6734 SDValue MinMaxValue =
6735 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6736 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6737 }
6738 }
6739 }
6740
6741 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6742 LHS0.getValueType() == RHS0.getValueType() &&
6743 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6744 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6745 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6746
6747 if (TargetPreference == AndOrSETCCFoldKind::None)
6748 return SDValue();
6749
6750 if (CCL == CCR &&
6751 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6752 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6753 const APInt &APLhs = LHS1C->getAPIntValue();
6754 const APInt &APRhs = RHS1C->getAPIntValue();
6755
6756 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6757 // case this is just a compare).
6758 if (APLhs == (-APRhs) &&
6759 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6760 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6761 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6762 // (icmp eq A, C) | (icmp eq A, -C)
6763 // -> (icmp eq Abs(A), C)
6764 // (icmp ne A, C) & (icmp ne A, -C)
6765 // -> (icmp ne Abs(A), C)
6766 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6767 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6768 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6769 } else if (TargetPreference &
6771
6772 // AndOrSETCCFoldKind::AddAnd:
6773 // A == C0 | A == C1
6774 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6775 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6776 // A != C0 & A != C1
6777 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6778 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6779
6780 // AndOrSETCCFoldKind::NotAnd:
6781 // A == C0 | A == C1
6782 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6783 // -> ~A & smin(C0, C1) == 0
6784 // A != C0 & A != C1
6785 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6786 // -> ~A & smin(C0, C1) != 0
6787
6788 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6789 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6790 APInt Dif = MaxC - MinC;
6791 if (!Dif.isZero() && Dif.isPowerOf2()) {
6792 if (MaxC.isAllOnes() &&
6793 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6794 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6795 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6796 DAG.getConstant(MinC, DL, OpVT));
6797 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6798 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6799 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6800
6801 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6802 DAG.getConstant(-MinC, DL, OpVT));
6803 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6804 DAG.getConstant(~Dif, DL, OpVT));
6805 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6806 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6807 }
6808 }
6809 }
6810 }
6811
6812 return SDValue();
6813}
6814
6815// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6816// We canonicalize to the `select` form in the middle end, but the `and` form
6817// gets better codegen and all tested targets (arm, x86, riscv)
6819 const SDLoc &DL, SelectionDAG &DAG) {
6820 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6821 if (!isNullConstant(F))
6822 return SDValue();
6823
6824 EVT CondVT = Cond.getValueType();
6825 if (TLI.getBooleanContents(CondVT) !=
6827 return SDValue();
6828
6829 if (T.getOpcode() != ISD::AND)
6830 return SDValue();
6831
6832 if (!isOneConstant(T.getOperand(1)))
6833 return SDValue();
6834
6835 EVT OpVT = T.getValueType();
6836
6837 SDValue CondMask =
6838 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6839 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6840}
6841
6842/// This contains all DAGCombine rules which reduce two values combined by
6843/// an And operation to a single value. This makes them reusable in the context
6844/// of visitSELECT(). Rules involving constants are not included as
6845/// visitSELECT() already handles those cases.
6846SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6847 EVT VT = N1.getValueType();
6848 SDLoc DL(N);
6849
6850 // fold (and x, undef) -> 0
6851 if (N0.isUndef() || N1.isUndef())
6852 return DAG.getConstant(0, DL, VT);
6853
6854 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6855 return V;
6856
6857 // Canonicalize:
6858 // and(x, add) -> and(add, x)
6859 if (N1.getOpcode() == ISD::ADD)
6860 std::swap(N0, N1);
6861
6862 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6863 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6864 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6865 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6866 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6867 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6868 // immediate for an add, but it is legal if its top c2 bits are set,
6869 // transform the ADD so the immediate doesn't need to be materialized
6870 // in a register.
6871 APInt ADDC = ADDI->getAPIntValue();
6872 APInt SRLC = SRLI->getAPIntValue();
6873 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6874 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6876 SRLC.getZExtValue());
6877 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6878 ADDC |= Mask;
6879 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6880 SDLoc DL0(N0);
6881 SDValue NewAdd =
6882 DAG.getNode(ISD::ADD, DL0, VT,
6883 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6884 CombineTo(N0.getNode(), NewAdd);
6885 // Return N so it doesn't get rechecked!
6886 return SDValue(N, 0);
6887 }
6888 }
6889 }
6890 }
6891 }
6892 }
6893
6894 return SDValue();
6895}
6896
6897bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6898 EVT LoadResultTy, EVT &ExtVT) {
6899 if (!AndC->getAPIntValue().isMask())
6900 return false;
6901
6902 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6903
6904 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6905 EVT LoadedVT = LoadN->getMemoryVT();
6906
6907 if (ExtVT == LoadedVT &&
6908 (!LegalOperations ||
6909 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6910 // ZEXTLOAD will match without needing to change the size of the value being
6911 // loaded.
6912 return true;
6913 }
6914
6915 // Do not change the width of a volatile or atomic loads.
6916 if (!LoadN->isSimple())
6917 return false;
6918
6919 // Do not generate loads of non-round integer types since these can
6920 // be expensive (and would be wrong if the type is not byte sized).
6921 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6922 return false;
6923
6924 if (LegalOperations &&
6925 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6926 return false;
6927
6928 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6929 return false;
6930
6931 return true;
6932}
6933
6934bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6935 ISD::LoadExtType ExtType, EVT &MemVT,
6936 unsigned ShAmt) {
6937 if (!LDST)
6938 return false;
6939
6940 // Only allow byte offsets.
6941 if (ShAmt % 8)
6942 return false;
6943 const unsigned ByteShAmt = ShAmt / 8;
6944
6945 // Do not generate loads of non-round integer types since these can
6946 // be expensive (and would be wrong if the type is not byte sized).
6947 if (!MemVT.isRound())
6948 return false;
6949
6950 // Don't change the width of a volatile or atomic loads.
6951 if (!LDST->isSimple())
6952 return false;
6953
6954 EVT LdStMemVT = LDST->getMemoryVT();
6955
6956 // Bail out when changing the scalable property, since we can't be sure that
6957 // we're actually narrowing here.
6958 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6959 return false;
6960
6961 // Verify that we are actually reducing a load width here.
6962 if (LdStMemVT.bitsLT(MemVT))
6963 return false;
6964
6965 // Ensure that this isn't going to produce an unsupported memory access.
6966 if (ShAmt) {
6967 const Align LDSTAlign = LDST->getAlign();
6968 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6969 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6970 LDST->getAddressSpace(), NarrowAlign,
6971 LDST->getMemOperand()->getFlags()))
6972 return false;
6973 }
6974
6975 // It's not possible to generate a constant of extended or untyped type.
6976 EVT PtrType = LDST->getBasePtr().getValueType();
6977 if (PtrType == MVT::Untyped || PtrType.isExtended())
6978 return false;
6979
6980 if (isa<LoadSDNode>(LDST)) {
6981 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6982 // Don't transform one with multiple uses, this would require adding a new
6983 // load.
6984 if (!SDValue(Load, 0).hasOneUse())
6985 return false;
6986
6987 if (LegalOperations &&
6988 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6989 return false;
6990
6991 // For the transform to be legal, the load must produce only two values
6992 // (the value loaded and the chain). Don't transform a pre-increment
6993 // load, for example, which produces an extra value. Otherwise the
6994 // transformation is not equivalent, and the downstream logic to replace
6995 // uses gets things wrong.
6996 if (Load->getNumValues() > 2)
6997 return false;
6998
6999 // If the load that we're shrinking is an extload and we're not just
7000 // discarding the extension we can't simply shrink the load. Bail.
7001 // TODO: It would be possible to merge the extensions in some cases.
7002 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
7003 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7004 return false;
7005
7006 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
7007 return false;
7008 } else {
7009 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
7010 StoreSDNode *Store = cast<StoreSDNode>(LDST);
7011 // Can't write outside the original store
7012 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7013 return false;
7014
7015 if (LegalOperations &&
7016 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
7017 return false;
7018 }
7019 return true;
7020}
7021
7022bool DAGCombiner::SearchForAndLoads(SDNode *N,
7023 SmallVectorImpl<LoadSDNode*> &Loads,
7024 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
7025 ConstantSDNode *Mask,
7026 SDNode *&NodeToMask) {
7027 // Recursively search for the operands, looking for loads which can be
7028 // narrowed.
7029 for (SDValue Op : N->op_values()) {
7030 if (Op.getValueType().isVector())
7031 return false;
7032
7033 // Some constants may need fixing up later if they are too large.
7034 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7035 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7036 "Expected bitwise logic operation");
7037 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
7038 NodesWithConsts.insert(N);
7039 continue;
7040 }
7041
7042 if (!Op.hasOneUse())
7043 return false;
7044
7045 switch(Op.getOpcode()) {
7046 case ISD::LOAD: {
7047 auto *Load = cast<LoadSDNode>(Op);
7048 EVT ExtVT;
7049 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
7050 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
7051
7052 // ZEXTLOAD is already small enough.
7053 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7054 ExtVT.bitsGE(Load->getMemoryVT()))
7055 continue;
7056
7057 // Use LE to convert equal sized loads to zext.
7058 if (ExtVT.bitsLE(Load->getMemoryVT()))
7059 Loads.push_back(Load);
7060
7061 continue;
7062 }
7063 return false;
7064 }
7065 case ISD::ZERO_EXTEND:
7066 case ISD::AssertZext: {
7067 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7068 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7069 EVT VT = Op.getOpcode() == ISD::AssertZext ?
7070 cast<VTSDNode>(Op.getOperand(1))->getVT() :
7071 Op.getOperand(0).getValueType();
7072
7073 // We can accept extending nodes if the mask is wider or an equal
7074 // width to the original type.
7075 if (ExtVT.bitsGE(VT))
7076 continue;
7077 break;
7078 }
7079 case ISD::OR:
7080 case ISD::XOR:
7081 case ISD::AND:
7082 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7083 NodeToMask))
7084 return false;
7085 continue;
7086 }
7087
7088 // Allow one node which will masked along with any loads found.
7089 if (NodeToMask)
7090 return false;
7091
7092 // Also ensure that the node to be masked only produces one data result.
7093 NodeToMask = Op.getNode();
7094 if (NodeToMask->getNumValues() > 1) {
7095 bool HasValue = false;
7096 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7097 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7098 if (VT != MVT::Glue && VT != MVT::Other) {
7099 if (HasValue) {
7100 NodeToMask = nullptr;
7101 return false;
7102 }
7103 HasValue = true;
7104 }
7105 }
7106 assert(HasValue && "Node to be masked has no data result?");
7107 }
7108 }
7109 return true;
7110}
7111
7112bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7113 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7114 if (!Mask)
7115 return false;
7116
7117 if (!Mask->getAPIntValue().isMask())
7118 return false;
7119
7120 // No need to do anything if the and directly uses a load.
7121 if (isa<LoadSDNode>(N->getOperand(0)))
7122 return false;
7123
7125 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7126 SDNode *FixupNode = nullptr;
7127 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7128 if (Loads.empty())
7129 return false;
7130
7131 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7132 SDValue MaskOp = N->getOperand(1);
7133
7134 // If it exists, fixup the single node we allow in the tree that needs
7135 // masking.
7136 if (FixupNode) {
7137 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7138 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7139 FixupNode->getValueType(0),
7140 SDValue(FixupNode, 0), MaskOp);
7141 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7142 if (And.getOpcode() == ISD ::AND)
7143 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7144 }
7145
7146 // Narrow any constants that need it.
7147 for (auto *LogicN : NodesWithConsts) {
7148 SDValue Op0 = LogicN->getOperand(0);
7149 SDValue Op1 = LogicN->getOperand(1);
7150
7151 // We only need to fix AND if both inputs are constants. And we only need
7152 // to fix one of the constants.
7153 if (LogicN->getOpcode() == ISD::AND &&
7155 continue;
7156
7157 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7158 Op0 =
7159 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7160
7161 if (isa<ConstantSDNode>(Op1))
7162 Op1 =
7163 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7164
7165 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7166 std::swap(Op0, Op1);
7167
7168 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7169 }
7170
7171 // Create narrow loads.
7172 for (auto *Load : Loads) {
7173 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7174 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7175 SDValue(Load, 0), MaskOp);
7176 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7177 if (And.getOpcode() == ISD ::AND)
7178 And = SDValue(
7179 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7180 SDValue NewLoad = reduceLoadWidth(And.getNode());
7181 assert(NewLoad &&
7182 "Shouldn't be masking the load if it can't be narrowed");
7183 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7184 }
7185 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7186 return true;
7187 }
7188 return false;
7189}
7190
7191// Unfold
7192// x & (-1 'logical shift' y)
7193// To
7194// (x 'opposite logical shift' y) 'logical shift' y
7195// if it is better for performance.
7196SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7197 assert(N->getOpcode() == ISD::AND);
7198
7199 SDValue N0 = N->getOperand(0);
7200 SDValue N1 = N->getOperand(1);
7201
7202 // Do we actually prefer shifts over mask?
7204 return SDValue();
7205
7206 // Try to match (-1 '[outer] logical shift' y)
7207 unsigned OuterShift;
7208 unsigned InnerShift; // The opposite direction to the OuterShift.
7209 SDValue Y; // Shift amount.
7210 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7211 if (!M.hasOneUse())
7212 return false;
7213 OuterShift = M->getOpcode();
7214 if (OuterShift == ISD::SHL)
7215 InnerShift = ISD::SRL;
7216 else if (OuterShift == ISD::SRL)
7217 InnerShift = ISD::SHL;
7218 else
7219 return false;
7220 if (!isAllOnesConstant(M->getOperand(0)))
7221 return false;
7222 Y = M->getOperand(1);
7223 return true;
7224 };
7225
7226 SDValue X;
7227 if (matchMask(N1))
7228 X = N0;
7229 else if (matchMask(N0))
7230 X = N1;
7231 else
7232 return SDValue();
7233
7234 SDLoc DL(N);
7235 EVT VT = N->getValueType(0);
7236
7237 // tmp = x 'opposite logical shift' y
7238 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7239 // ret = tmp 'logical shift' y
7240 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7241
7242 return T1;
7243}
7244
7245/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7246/// For a target with a bit test, this is expected to become test + set and save
7247/// at least 1 instruction.
7249 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7250
7251 // Look through an optional extension.
7252 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7253 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7254 And0 = And0.getOperand(0);
7255 if (!isOneConstant(And1) || !And0.hasOneUse())
7256 return SDValue();
7257
7258 SDValue Src = And0;
7259
7260 // Attempt to find a 'not' op.
7261 // TODO: Should we favor test+set even without the 'not' op?
7262 bool FoundNot = false;
7263 if (isBitwiseNot(Src)) {
7264 FoundNot = true;
7265 Src = Src.getOperand(0);
7266
7267 // Look though an optional truncation. The source operand may not be the
7268 // same type as the original 'and', but that is ok because we are masking
7269 // off everything but the low bit.
7270 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7271 Src = Src.getOperand(0);
7272 }
7273
7274 // Match a shift-right by constant.
7275 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7276 return SDValue();
7277
7278 // This is probably not worthwhile without a supported type.
7279 EVT SrcVT = Src.getValueType();
7280 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7281 if (!TLI.isTypeLegal(SrcVT))
7282 return SDValue();
7283
7284 // We might have looked through casts that make this transform invalid.
7285 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7286 SDValue ShiftAmt = Src.getOperand(1);
7287 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7288 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7289 return SDValue();
7290
7291 // Set source to shift source.
7292 Src = Src.getOperand(0);
7293
7294 // Try again to find a 'not' op.
7295 // TODO: Should we favor test+set even with two 'not' ops?
7296 if (!FoundNot) {
7297 if (!isBitwiseNot(Src))
7298 return SDValue();
7299 Src = Src.getOperand(0);
7300 }
7301
7302 if (!TLI.hasBitTest(Src, ShiftAmt))
7303 return SDValue();
7304
7305 // Turn this into a bit-test pattern using mask op + setcc:
7306 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7307 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7308 SDLoc DL(And);
7309 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7310 EVT CCVT =
7311 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7312 SDValue Mask = DAG.getConstant(
7313 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7314 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7315 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7316 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7317 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7318}
7319
7320/// For targets that support usubsat, match a bit-hack form of that operation
7321/// that ends in 'and' and convert it.
7323 EVT VT = N->getValueType(0);
7324 unsigned BitWidth = VT.getScalarSizeInBits();
7325 APInt SignMask = APInt::getSignMask(BitWidth);
7326
7327 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7328 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7329 // xor/add with SMIN (signmask) are logically equivalent.
7330 SDValue X;
7331 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7333 m_SpecificInt(BitWidth - 1))))) &&
7336 m_SpecificInt(BitWidth - 1))))))
7337 return SDValue();
7338
7339 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7340 DAG.getConstant(SignMask, DL, VT));
7341}
7342
7343/// Given a bitwise logic operation N with a matching bitwise logic operand,
7344/// fold a pattern where 2 of the source operands are identically shifted
7345/// values. For example:
7346/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7348 SelectionDAG &DAG) {
7349 unsigned LogicOpcode = N->getOpcode();
7350 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7351 "Expected bitwise logic operation");
7352
7353 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7354 return SDValue();
7355
7356 // Match another bitwise logic op and a shift.
7357 unsigned ShiftOpcode = ShiftOp.getOpcode();
7358 if (LogicOp.getOpcode() != LogicOpcode ||
7359 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7360 ShiftOpcode == ISD::SRA))
7361 return SDValue();
7362
7363 // Match another shift op inside the first logic operand. Handle both commuted
7364 // possibilities.
7365 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7366 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7367 SDValue X1 = ShiftOp.getOperand(0);
7368 SDValue Y = ShiftOp.getOperand(1);
7369 SDValue X0, Z;
7370 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7371 LogicOp.getOperand(0).getOperand(1) == Y) {
7372 X0 = LogicOp.getOperand(0).getOperand(0);
7373 Z = LogicOp.getOperand(1);
7374 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7375 LogicOp.getOperand(1).getOperand(1) == Y) {
7376 X0 = LogicOp.getOperand(1).getOperand(0);
7377 Z = LogicOp.getOperand(0);
7378 } else {
7379 return SDValue();
7380 }
7381
7382 EVT VT = N->getValueType(0);
7383 SDLoc DL(N);
7384 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7385 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7386 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7387}
7388
7389/// Given a tree of logic operations with shape like
7390/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7391/// try to match and fold shift operations with the same shift amount.
7392/// For example:
7393/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7394/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7396 SDValue RightHand, SelectionDAG &DAG) {
7397 unsigned LogicOpcode = N->getOpcode();
7398 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7399 "Expected bitwise logic operation");
7400 if (LeftHand.getOpcode() != LogicOpcode ||
7401 RightHand.getOpcode() != LogicOpcode)
7402 return SDValue();
7403 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7404 return SDValue();
7405
7406 // Try to match one of following patterns:
7407 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7408 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7409 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7410 // itself.
7411 SDValue CombinedShifts, W;
7412 SDValue R0 = RightHand.getOperand(0);
7413 SDValue R1 = RightHand.getOperand(1);
7414 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7415 W = R1;
7416 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7417 W = R0;
7418 else
7419 return SDValue();
7420
7421 EVT VT = N->getValueType(0);
7422 SDLoc DL(N);
7423 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7424}
7425
7426/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7427/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7428/// pattern. This is typically a better representation for targets without a
7429/// fused "and-not" operation.
7431 const TargetLowering &TLI, const SDLoc &DL) {
7432 // Note that masked-merge variants using XOR or ADD expressions are
7433 // normalized to OR by InstCombine so we only check for OR or AND.
7434 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7435 "Must be called with ISD::OR or ISD::AND node");
7436
7437 // If the target supports and-not, don't fold this.
7438 if (TLI.hasAndNot(SDValue(Node, 0)))
7439 return SDValue();
7440
7441 SDValue M, X, Y;
7442
7443 if (sd_match(Node,
7445 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7446 sd_match(Node,
7448 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7449 EVT VT = M.getValueType();
7450 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7451 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7452 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7453 }
7454 return SDValue();
7455}
7456
7457SDValue DAGCombiner::visitAND(SDNode *N) {
7458 SDValue N0 = N->getOperand(0);
7459 SDValue N1 = N->getOperand(1);
7460 EVT VT = N1.getValueType();
7461 SDLoc DL(N);
7462
7463 // x & x --> x
7464 if (N0 == N1)
7465 return N0;
7466
7467 // fold (and c1, c2) -> c1&c2
7468 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7469 return C;
7470
7471 // canonicalize constant to RHS
7474 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7475
7476 if (areBitwiseNotOfEachother(N0, N1))
7477 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7478
7479 // fold vector ops
7480 if (VT.isVector()) {
7481 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7482 return FoldedVOp;
7483
7484 // fold (and x, 0) -> 0, vector edition
7486 // do not return N1, because undef node may exist in N1
7488 N1.getValueType());
7489
7490 // fold (and x, -1) -> x, vector edition
7492 return N0;
7493
7494 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7495 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
7496 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7497 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7498 EVT LoadVT = MLoad->getMemoryVT();
7499 EVT ExtVT = VT;
7500 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7501 // For this AND to be a zero extension of the masked load the elements
7502 // of the BuildVec must mask the bottom bits of the extended element
7503 // type
7504 uint64_t ElementSize =
7506 if (Splat->getAPIntValue().isMask(ElementSize)) {
7507 SDValue NewLoad = DAG.getMaskedLoad(
7508 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7509 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7510 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7511 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7512 bool LoadHasOtherUsers = !N0.hasOneUse();
7513 CombineTo(N, NewLoad);
7514 if (LoadHasOtherUsers)
7515 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7516 return SDValue(N, 0);
7517 }
7518 }
7519 }
7520 }
7521
7522 // fold (and x, -1) -> x
7523 if (isAllOnesConstant(N1))
7524 return N0;
7525
7526 // if (and x, c) is known to be zero, return 0
7527 unsigned BitWidth = VT.getScalarSizeInBits();
7528 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7530 return DAG.getConstant(0, DL, VT);
7531
7532 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7533 return R;
7534
7535 if (SDValue NewSel = foldBinOpIntoSelect(N))
7536 return NewSel;
7537
7538 // reassociate and
7539 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7540 return RAND;
7541
7542 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7543 if (SDValue SD =
7544 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7545 return SD;
7546
7547 // fold (and (or x, C), D) -> D if (C & D) == D
7548 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7549 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7550 };
7551 if (N0.getOpcode() == ISD::OR &&
7552 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7553 return N1;
7554
7555 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7556 SDValue N0Op0 = N0.getOperand(0);
7557 EVT SrcVT = N0Op0.getValueType();
7558 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7559 APInt Mask = ~N1C->getAPIntValue();
7560 Mask = Mask.trunc(SrcBitWidth);
7561
7562 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7563 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7564 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7565
7566 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7567 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7568 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7569 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7570 TLI.isNarrowingProfitable(N, VT, SrcVT))
7571 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7572 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7573 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7574 }
7575
7576 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7577 if (ISD::isExtOpcode(N0.getOpcode())) {
7578 unsigned ExtOpc = N0.getOpcode();
7579 SDValue N0Op0 = N0.getOperand(0);
7580 if (N0Op0.getOpcode() == ISD::AND &&
7581 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7582 N0->hasOneUse() && N0Op0->hasOneUse()) {
7583 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7584 {N0Op0.getOperand(1)})) {
7585 if (SDValue NewMask =
7586 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7587 return DAG.getNode(ISD::AND, DL, VT,
7588 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7589 NewMask);
7590 }
7591 }
7592 }
7593 }
7594
7595 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7596 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7597 // already be zero by virtue of the width of the base type of the load.
7598 //
7599 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7600 // more cases.
7601 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7603 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7604 N0.getOperand(0).getResNo() == 0) ||
7605 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7606 auto *Load =
7607 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7608
7609 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7610 // This can be a pure constant or a vector splat, in which case we treat the
7611 // vector as a scalar and use the splat value.
7612 APInt Constant = APInt::getZero(1);
7613 if (const ConstantSDNode *C = isConstOrConstSplat(
7614 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7615 Constant = C->getAPIntValue();
7616 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7617 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7618 APInt SplatValue, SplatUndef;
7619 unsigned SplatBitSize;
7620 bool HasAnyUndefs;
7621 // Endianness should not matter here. Code below makes sure that we only
7622 // use the result if the SplatBitSize is a multiple of the vector element
7623 // size. And after that we AND all element sized parts of the splat
7624 // together. So the end result should be the same regardless of in which
7625 // order we do those operations.
7626 const bool IsBigEndian = false;
7627 bool IsSplat =
7628 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7629 HasAnyUndefs, EltBitWidth, IsBigEndian);
7630
7631 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7632 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7633 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7634 // Undef bits can contribute to a possible optimisation if set, so
7635 // set them.
7636 SplatValue |= SplatUndef;
7637
7638 // The splat value may be something like "0x00FFFFFF", which means 0 for
7639 // the first vector value and FF for the rest, repeating. We need a mask
7640 // that will apply equally to all members of the vector, so AND all the
7641 // lanes of the constant together.
7642 Constant = APInt::getAllOnes(EltBitWidth);
7643 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7644 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7645 }
7646 }
7647
7648 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7649 // actually legal and isn't going to get expanded, else this is a false
7650 // optimisation.
7651 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7652 Load->getValueType(0),
7653 Load->getMemoryVT());
7654
7655 // Resize the constant to the same size as the original memory access before
7656 // extension. If it is still the AllOnesValue then this AND is completely
7657 // unneeded.
7658 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7659
7660 bool B;
7661 switch (Load->getExtensionType()) {
7662 default: B = false; break;
7663 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7664 case ISD::ZEXTLOAD:
7665 case ISD::NON_EXTLOAD: B = true; break;
7666 }
7667
7668 if (B && Constant.isAllOnes()) {
7669 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7670 // preserve semantics once we get rid of the AND.
7671 SDValue NewLoad(Load, 0);
7672
7673 // Fold the AND away. NewLoad may get replaced immediately.
7674 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7675
7676 if (Load->getExtensionType() == ISD::EXTLOAD) {
7677 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7678 Load->getValueType(0), SDLoc(Load),
7679 Load->getChain(), Load->getBasePtr(),
7680 Load->getOffset(), Load->getMemoryVT(),
7681 Load->getMemOperand());
7682 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7683 if (Load->getNumValues() == 3) {
7684 // PRE/POST_INC loads have 3 values.
7685 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7686 NewLoad.getValue(2) };
7687 CombineTo(Load, To, 3, true);
7688 } else {
7689 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7690 }
7691 }
7692
7693 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7694 }
7695 }
7696
7697 // Try to convert a constant mask AND into a shuffle clear mask.
7698 if (VT.isVector())
7699 if (SDValue Shuffle = XformToShuffleWithZero(N))
7700 return Shuffle;
7701
7702 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7703 return Combined;
7704
7705 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7707 SDValue Ext = N0.getOperand(0);
7708 EVT ExtVT = Ext->getValueType(0);
7709 SDValue Extendee = Ext->getOperand(0);
7710
7711 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7712 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7713 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7714 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7715 // => (extract_subvector (iN_zeroext v))
7716 SDValue ZeroExtExtendee =
7717 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7718
7719 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7720 N0.getOperand(1));
7721 }
7722 }
7723
7724 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7725 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7726 EVT MemVT = GN0->getMemoryVT();
7727 EVT ScalarVT = MemVT.getScalarType();
7728
7729 if (SDValue(GN0, 0).hasOneUse() &&
7730 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7732 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7733 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7734
7735 SDValue ZExtLoad = DAG.getMaskedGather(
7736 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7737 GN0->getIndexType(), ISD::ZEXTLOAD);
7738
7739 CombineTo(N, ZExtLoad);
7740 AddToWorklist(ZExtLoad.getNode());
7741 // Avoid recheck of N.
7742 return SDValue(N, 0);
7743 }
7744 }
7745
7746 // fold (and (load x), 255) -> (zextload x, i8)
7747 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7748 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7749 if (SDValue Res = reduceLoadWidth(N))
7750 return Res;
7751
7752 if (LegalTypes) {
7753 // Attempt to propagate the AND back up to the leaves which, if they're
7754 // loads, can be combined to narrow loads and the AND node can be removed.
7755 // Perform after legalization so that extend nodes will already be
7756 // combined into the loads.
7757 if (BackwardsPropagateMask(N))
7758 return SDValue(N, 0);
7759 }
7760
7761 if (SDValue Combined = visitANDLike(N0, N1, N))
7762 return Combined;
7763
7764 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7765 if (N0.getOpcode() == N1.getOpcode())
7766 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7767 return V;
7768
7769 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7770 return R;
7771 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7772 return R;
7773
7774 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7775 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7776 SDValue X, Y, Z, NotY;
7777 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7778 if (sd_match(N,
7779 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7780 sd_match(NotY, m_Not(m_Value(Y))) &&
7781 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7782 return DAG.getNode(ISD::AND, DL, VT, X,
7783 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7784
7785 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7786 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7787 if (sd_match(N, m_And(m_Value(X),
7788 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7789 sd_match(NotY, m_Not(m_Value(Y))) &&
7790 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7791 return DAG.getNode(ISD::AND, DL, VT, X,
7792 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7793
7794 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7795 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7796 if (TLI.hasAndNot(SDValue(N, 0)))
7797 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7798 return Folded;
7799
7800 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7801 // If we are shifting down an extended sign bit, see if we can simplify
7802 // this to shifting the MSB directly to expose further simplifications.
7803 // This pattern often appears after sext_inreg legalization.
7804 APInt Amt;
7805 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7806 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7807 return DAG.getNode(ISD::SRL, DL, VT, X,
7808 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7809
7810 // Masking the negated extension of a boolean is just the zero-extended
7811 // boolean:
7812 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7813 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7814 //
7815 // Note: the SimplifyDemandedBits fold below can make an information-losing
7816 // transform, and then we have no way to find this better fold.
7817 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7818 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7819 X.getOperand(0).getScalarValueSizeInBits() == 1)
7820 return X;
7821 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7822 X.getOperand(0).getScalarValueSizeInBits() == 1)
7823 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7824 }
7825
7826 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7827 // fold (and (sra)) -> (and (srl)) when possible.
7829 return SDValue(N, 0);
7830
7831 // fold (zext_inreg (extload x)) -> (zextload x)
7832 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7833 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7834 (ISD::isEXTLoad(N0.getNode()) ||
7835 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7836 auto *LN0 = cast<LoadSDNode>(N0);
7837 EVT MemVT = LN0->getMemoryVT();
7838 // If we zero all the possible extended bits, then we can turn this into
7839 // a zextload if we are running before legalize or the operation is legal.
7840 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7841 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7842 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7843 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7844 ((!LegalOperations && LN0->isSimple()) ||
7845 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7846 SDValue ExtLoad =
7847 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7848 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7849 AddToWorklist(N);
7850 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7851 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7852 }
7853 }
7854
7855 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7856 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7857 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7858 N0.getOperand(1), false))
7859 return BSwap;
7860 }
7861
7862 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7863 return Shifts;
7864
7865 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7866 return V;
7867
7868 // Recognize the following pattern:
7869 //
7870 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7871 //
7872 // where bitmask is a mask that clears the upper bits of AndVT. The
7873 // number of bits in bitmask must be a power of two.
7874 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7875 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7876 return false;
7877
7879 if (!C)
7880 return false;
7881
7882 if (!C->getAPIntValue().isMask(
7883 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7884 return false;
7885
7886 return true;
7887 };
7888
7889 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7890 if (IsAndZeroExtMask(N0, N1))
7891 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7892
7893 if (hasOperation(ISD::USUBSAT, VT))
7894 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7895 return V;
7896
7897 // Postpone until legalization completed to avoid interference with bswap
7898 // folding
7899 if (LegalOperations || VT.isVector())
7900 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7901 return R;
7902
7903 if (VT.isScalarInteger() && VT != MVT::i1)
7904 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7905 return R;
7906
7907 return SDValue();
7908}
7909
7910/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7911SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7912 bool DemandHighBits) {
7913 if (!LegalOperations)
7914 return SDValue();
7915
7916 EVT VT = N->getValueType(0);
7917 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7918 return SDValue();
7920 return SDValue();
7921
7922 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7923 bool LookPassAnd0 = false;
7924 bool LookPassAnd1 = false;
7925 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7926 std::swap(N0, N1);
7927 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7928 std::swap(N0, N1);
7929 if (N0.getOpcode() == ISD::AND) {
7930 if (!N0->hasOneUse())
7931 return SDValue();
7932 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7933 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7934 // This is needed for X86.
7935 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7936 N01C->getZExtValue() != 0xFFFF))
7937 return SDValue();
7938 N0 = N0.getOperand(0);
7939 LookPassAnd0 = true;
7940 }
7941
7942 if (N1.getOpcode() == ISD::AND) {
7943 if (!N1->hasOneUse())
7944 return SDValue();
7945 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7946 if (!N11C || N11C->getZExtValue() != 0xFF)
7947 return SDValue();
7948 N1 = N1.getOperand(0);
7949 LookPassAnd1 = true;
7950 }
7951
7952 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7953 std::swap(N0, N1);
7954 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7955 return SDValue();
7956 if (!N0->hasOneUse() || !N1->hasOneUse())
7957 return SDValue();
7958
7959 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7960 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7961 if (!N01C || !N11C)
7962 return SDValue();
7963 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7964 return SDValue();
7965
7966 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7967 SDValue N00 = N0->getOperand(0);
7968 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7969 if (!N00->hasOneUse())
7970 return SDValue();
7971 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7972 if (!N001C || N001C->getZExtValue() != 0xFF)
7973 return SDValue();
7974 N00 = N00.getOperand(0);
7975 LookPassAnd0 = true;
7976 }
7977
7978 SDValue N10 = N1->getOperand(0);
7979 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7980 if (!N10->hasOneUse())
7981 return SDValue();
7982 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7983 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7984 // for X86.
7985 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7986 N101C->getZExtValue() != 0xFFFF))
7987 return SDValue();
7988 N10 = N10.getOperand(0);
7989 LookPassAnd1 = true;
7990 }
7991
7992 if (N00 != N10)
7993 return SDValue();
7994
7995 // Make sure everything beyond the low halfword gets set to zero since the SRL
7996 // 16 will clear the top bits.
7997 unsigned OpSizeInBits = VT.getSizeInBits();
7998 if (OpSizeInBits > 16) {
7999 // If the left-shift isn't masked out then the only way this is a bswap is
8000 // if all bits beyond the low 8 are 0. In that case the entire pattern
8001 // reduces to a left shift anyway: leave it for other parts of the combiner.
8002 if (DemandHighBits && !LookPassAnd0)
8003 return SDValue();
8004
8005 // However, if the right shift isn't masked out then it might be because
8006 // it's not needed. See if we can spot that too. If the high bits aren't
8007 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
8008 // upper bits to be zero.
8009 if (!LookPassAnd1) {
8010 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
8011 if (!DAG.MaskedValueIsZero(N10,
8012 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
8013 return SDValue();
8014 }
8015 }
8016
8017 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
8018 if (OpSizeInBits > 16) {
8019 SDLoc DL(N);
8020 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8021 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
8022 }
8023 return Res;
8024}
8025
8026/// Return true if the specified node is an element that makes up a 32-bit
8027/// packed halfword byteswap.
8028/// ((x & 0x000000ff) << 8) |
8029/// ((x & 0x0000ff00) >> 8) |
8030/// ((x & 0x00ff0000) << 8) |
8031/// ((x & 0xff000000) >> 8)
8033 if (!N->hasOneUse())
8034 return false;
8035
8036 unsigned Opc = N.getOpcode();
8037 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8038 return false;
8039
8040 SDValue N0 = N.getOperand(0);
8041 unsigned Opc0 = N0.getOpcode();
8042 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8043 return false;
8044
8045 ConstantSDNode *N1C = nullptr;
8046 // SHL or SRL: look upstream for AND mask operand
8047 if (Opc == ISD::AND)
8048 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8049 else if (Opc0 == ISD::AND)
8051 if (!N1C)
8052 return false;
8053
8054 unsigned MaskByteOffset;
8055 switch (N1C->getZExtValue()) {
8056 default:
8057 return false;
8058 case 0xFF: MaskByteOffset = 0; break;
8059 case 0xFF00: MaskByteOffset = 1; break;
8060 case 0xFFFF:
8061 // In case demanded bits didn't clear the bits that will be shifted out.
8062 // This is needed for X86.
8063 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8064 MaskByteOffset = 1;
8065 break;
8066 }
8067 return false;
8068 case 0xFF0000: MaskByteOffset = 2; break;
8069 case 0xFF000000: MaskByteOffset = 3; break;
8070 }
8071
8072 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8073 if (Opc == ISD::AND) {
8074 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8075 // (x >> 8) & 0xff
8076 // (x >> 8) & 0xff0000
8077 if (Opc0 != ISD::SRL)
8078 return false;
8080 if (!C || C->getZExtValue() != 8)
8081 return false;
8082 } else {
8083 // (x << 8) & 0xff00
8084 // (x << 8) & 0xff000000
8085 if (Opc0 != ISD::SHL)
8086 return false;
8088 if (!C || C->getZExtValue() != 8)
8089 return false;
8090 }
8091 } else if (Opc == ISD::SHL) {
8092 // (x & 0xff) << 8
8093 // (x & 0xff0000) << 8
8094 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8095 return false;
8096 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8097 if (!C || C->getZExtValue() != 8)
8098 return false;
8099 } else { // Opc == ISD::SRL
8100 // (x & 0xff00) >> 8
8101 // (x & 0xff000000) >> 8
8102 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8103 return false;
8104 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8105 if (!C || C->getZExtValue() != 8)
8106 return false;
8107 }
8108
8109 if (Parts[MaskByteOffset])
8110 return false;
8111
8112 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8113 return true;
8114}
8115
8116// Match 2 elements of a packed halfword bswap.
8118 if (N.getOpcode() == ISD::OR)
8119 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8120 isBSwapHWordElement(N.getOperand(1), Parts);
8121
8122 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8123 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8124 if (!C || C->getAPIntValue() != 16)
8125 return false;
8126 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8127 return true;
8128 }
8129
8130 return false;
8131}
8132
8133// Match this pattern:
8134// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8135// And rewrite this to:
8136// (rotr (bswap A), 16)
8138 SelectionDAG &DAG, SDNode *N, SDValue N0,
8139 SDValue N1, EVT VT) {
8140 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8141 "MatchBSwapHWordOrAndAnd: expecting i32");
8142 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8143 return SDValue();
8144 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8145 return SDValue();
8146 // TODO: this is too restrictive; lifting this restriction requires more tests
8147 if (!N0->hasOneUse() || !N1->hasOneUse())
8148 return SDValue();
8151 if (!Mask0 || !Mask1)
8152 return SDValue();
8153 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8154 Mask1->getAPIntValue() != 0x00ff00ff)
8155 return SDValue();
8156 SDValue Shift0 = N0.getOperand(0);
8157 SDValue Shift1 = N1.getOperand(0);
8158 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8159 return SDValue();
8160 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8161 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8162 if (!ShiftAmt0 || !ShiftAmt1)
8163 return SDValue();
8164 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8165 return SDValue();
8166 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8167 return SDValue();
8168
8169 SDLoc DL(N);
8170 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8171 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8172 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8173}
8174
8175/// Match a 32-bit packed halfword bswap. That is
8176/// ((x & 0x000000ff) << 8) |
8177/// ((x & 0x0000ff00) >> 8) |
8178/// ((x & 0x00ff0000) << 8) |
8179/// ((x & 0xff000000) >> 8)
8180/// => (rotl (bswap x), 16)
8181SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8182 if (!LegalOperations)
8183 return SDValue();
8184
8185 EVT VT = N->getValueType(0);
8186 if (VT != MVT::i32)
8187 return SDValue();
8189 return SDValue();
8190
8191 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8192 return BSwap;
8193
8194 // Try again with commuted operands.
8195 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8196 return BSwap;
8197
8198
8199 // Look for either
8200 // (or (bswaphpair), (bswaphpair))
8201 // (or (or (bswaphpair), (and)), (and))
8202 // (or (or (and), (bswaphpair)), (and))
8203 SDNode *Parts[4] = {};
8204
8205 if (isBSwapHWordPair(N0, Parts)) {
8206 // (or (or (and), (and)), (or (and), (and)))
8207 if (!isBSwapHWordPair(N1, Parts))
8208 return SDValue();
8209 } else if (N0.getOpcode() == ISD::OR) {
8210 // (or (or (or (and), (and)), (and)), (and))
8211 if (!isBSwapHWordElement(N1, Parts))
8212 return SDValue();
8213 SDValue N00 = N0.getOperand(0);
8214 SDValue N01 = N0.getOperand(1);
8215 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8216 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8217 return SDValue();
8218 } else {
8219 return SDValue();
8220 }
8221
8222 // Make sure the parts are all coming from the same node.
8223 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8224 return SDValue();
8225
8226 SDLoc DL(N);
8227 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8228 SDValue(Parts[0], 0));
8229
8230 // Result of the bswap should be rotated by 16. If it's not legal, then
8231 // do (x << 16) | (x >> 16).
8232 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8234 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8236 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8237 return DAG.getNode(ISD::OR, DL, VT,
8238 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8239 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8240}
8241
8242/// This contains all DAGCombine rules which reduce two values combined by
8243/// an Or operation to a single value \see visitANDLike().
8244SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8245 EVT VT = N1.getValueType();
8246
8247 // fold (or x, undef) -> -1
8248 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8249 return DAG.getAllOnesConstant(DL, VT);
8250
8251 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8252 return V;
8253
8254 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8255 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8256 // Don't increase # computations.
8257 (N0->hasOneUse() || N1->hasOneUse())) {
8258 // We can only do this xform if we know that bits from X that are set in C2
8259 // but not in C1 are already zero. Likewise for Y.
8260 if (const ConstantSDNode *N0O1C =
8262 if (const ConstantSDNode *N1O1C =
8264 // We can only do this xform if we know that bits from X that are set in
8265 // C2 but not in C1 are already zero. Likewise for Y.
8266 const APInt &LHSMask = N0O1C->getAPIntValue();
8267 const APInt &RHSMask = N1O1C->getAPIntValue();
8268
8269 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8270 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8271 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8272 N0.getOperand(0), N1.getOperand(0));
8273 return DAG.getNode(ISD::AND, DL, VT, X,
8274 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8275 }
8276 }
8277 }
8278 }
8279
8280 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8281 if (N0.getOpcode() == ISD::AND &&
8282 N1.getOpcode() == ISD::AND &&
8283 N0.getOperand(0) == N1.getOperand(0) &&
8284 // Don't increase # computations.
8285 (N0->hasOneUse() || N1->hasOneUse())) {
8286 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8287 N0.getOperand(1), N1.getOperand(1));
8288 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8289 }
8290
8291 return SDValue();
8292}
8293
8294/// OR combines for which the commuted variant will be tried as well.
8296 SDNode *N) {
8297 EVT VT = N0.getValueType();
8298 unsigned BW = VT.getScalarSizeInBits();
8299 SDLoc DL(N);
8300
8301 auto peekThroughResize = [](SDValue V) {
8302 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8303 return V->getOperand(0);
8304 return V;
8305 };
8306
8307 SDValue N0Resized = peekThroughResize(N0);
8308 if (N0Resized.getOpcode() == ISD::AND) {
8309 SDValue N1Resized = peekThroughResize(N1);
8310 SDValue N00 = N0Resized.getOperand(0);
8311 SDValue N01 = N0Resized.getOperand(1);
8312
8313 // fold or (and x, y), x --> x
8314 if (N00 == N1Resized || N01 == N1Resized)
8315 return N1;
8316
8317 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8318 // TODO: Set AllowUndefs = true.
8319 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8320 /* AllowUndefs */ false)) {
8321 if (peekThroughResize(NotOperand) == N1Resized)
8322 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8323 N1);
8324 }
8325
8326 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8327 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8328 /* AllowUndefs */ false)) {
8329 if (peekThroughResize(NotOperand) == N1Resized)
8330 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8331 N1);
8332 }
8333 }
8334
8335 SDValue X, Y;
8336
8337 // fold or (xor X, N1), N1 --> or X, N1
8338 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8339 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8340
8341 // fold or (xor x, y), (x and/or y) --> or x, y
8342 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8343 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8345 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8346
8347 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8348 return R;
8349
8350 auto peekThroughZext = [](SDValue V) {
8351 if (V->getOpcode() == ISD::ZERO_EXTEND)
8352 return V->getOperand(0);
8353 return V;
8354 };
8355
8356 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8357 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8358 N0.getOperand(0) == N1.getOperand(0) &&
8359 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8360 return N0;
8361
8362 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8363 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8364 N0.getOperand(1) == N1.getOperand(0) &&
8365 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8366 return N0;
8367
8368 // Attempt to match a legalized build_pair-esque pattern:
8369 // or(shl(aext(Hi),BW/2),zext(Lo))
8370 SDValue Lo, Hi;
8371 if (sd_match(N0,
8373 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8374 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8375 Lo.getValueType() == Hi.getValueType()) {
8376 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8377 SDValue NotLo, NotHi;
8378 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8379 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8380 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8381 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8382 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8383 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8384 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8385 }
8386 }
8387
8388 return SDValue();
8389}
8390
8391SDValue DAGCombiner::visitOR(SDNode *N) {
8392 SDValue N0 = N->getOperand(0);
8393 SDValue N1 = N->getOperand(1);
8394 EVT VT = N1.getValueType();
8395 SDLoc DL(N);
8396
8397 // x | x --> x
8398 if (N0 == N1)
8399 return N0;
8400
8401 // fold (or c1, c2) -> c1|c2
8402 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8403 return C;
8404
8405 // canonicalize constant to RHS
8408 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8409
8410 // fold vector ops
8411 if (VT.isVector()) {
8412 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8413 return FoldedVOp;
8414
8415 // fold (or x, 0) -> x, vector edition
8417 return N0;
8418
8419 // fold (or x, -1) -> -1, vector edition
8421 // do not return N1, because undef node may exist in N1
8422 return DAG.getAllOnesConstant(DL, N1.getValueType());
8423
8424 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8425 // Do this only if the resulting type / shuffle is legal.
8426 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8427 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8428 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8429 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8430 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8431 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8432 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8433 // Ensure both shuffles have a zero input.
8434 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8435 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8436 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8437 bool CanFold = true;
8438 int NumElts = VT.getVectorNumElements();
8439 SmallVector<int, 4> Mask(NumElts, -1);
8440
8441 for (int i = 0; i != NumElts; ++i) {
8442 int M0 = SV0->getMaskElt(i);
8443 int M1 = SV1->getMaskElt(i);
8444
8445 // Determine if either index is pointing to a zero vector.
8446 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8447 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8448
8449 // If one element is zero and the otherside is undef, keep undef.
8450 // This also handles the case that both are undef.
8451 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8452 continue;
8453
8454 // Make sure only one of the elements is zero.
8455 if (M0Zero == M1Zero) {
8456 CanFold = false;
8457 break;
8458 }
8459
8460 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8461
8462 // We have a zero and non-zero element. If the non-zero came from
8463 // SV0 make the index a LHS index. If it came from SV1, make it
8464 // a RHS index. We need to mod by NumElts because we don't care
8465 // which operand it came from in the original shuffles.
8466 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8467 }
8468
8469 if (CanFold) {
8470 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8471 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8472 SDValue LegalShuffle =
8473 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8474 if (LegalShuffle)
8475 return LegalShuffle;
8476 }
8477 }
8478 }
8479 }
8480
8481 // fold (or x, 0) -> x
8482 if (isNullConstant(N1))
8483 return N0;
8484
8485 // fold (or x, -1) -> -1
8486 if (isAllOnesConstant(N1))
8487 return N1;
8488
8489 if (SDValue NewSel = foldBinOpIntoSelect(N))
8490 return NewSel;
8491
8492 // fold (or x, c) -> c iff (x & ~c) == 0
8493 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8494 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8495 return N1;
8496
8497 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8498 return R;
8499
8500 if (SDValue Combined = visitORLike(N0, N1, DL))
8501 return Combined;
8502
8503 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8504 return Combined;
8505
8506 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8507 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8508 return BSwap;
8509 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8510 return BSwap;
8511
8512 // reassociate or
8513 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8514 return ROR;
8515
8516 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8517 if (SDValue SD =
8518 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8519 return SD;
8520
8521 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8522 // iff (c1 & c2) != 0 or c1/c2 are undef.
8523 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8524 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8525 };
8526 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8527 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8528 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8529 {N1, N0.getOperand(1)})) {
8530 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8531 AddToWorklist(IOR.getNode());
8532 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8533 }
8534 }
8535
8536 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8537 return Combined;
8538 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8539 return Combined;
8540
8541 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8542 if (N0.getOpcode() == N1.getOpcode())
8543 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8544 return V;
8545
8546 // See if this is some rotate idiom.
8547 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8548 return Rot;
8549
8550 if (SDValue Load = MatchLoadCombine(N))
8551 return Load;
8552
8553 // Simplify the operands using demanded-bits information.
8555 return SDValue(N, 0);
8556
8557 // If OR can be rewritten into ADD, try combines based on ADD.
8558 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8559 DAG.isADDLike(SDValue(N, 0)))
8560 if (SDValue Combined = visitADDLike(N))
8561 return Combined;
8562
8563 // Postpone until legalization completed to avoid interference with bswap
8564 // folding
8565 if (LegalOperations || VT.isVector())
8566 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8567 return R;
8568
8569 if (VT.isScalarInteger() && VT != MVT::i1)
8570 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8571 return R;
8572
8573 return SDValue();
8574}
8575
8577 SDValue &Mask) {
8578 if (Op.getOpcode() == ISD::AND &&
8579 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8580 Mask = Op.getOperand(1);
8581 return Op.getOperand(0);
8582 }
8583 return Op;
8584}
8585
8586/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8587static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8588 SDValue &Mask) {
8589 Op = stripConstantMask(DAG, Op, Mask);
8590 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8591 Shift = Op;
8592 return true;
8593 }
8594 return false;
8595}
8596
8597/// Helper function for visitOR to extract the needed side of a rotate idiom
8598/// from a shl/srl/mul/udiv. This is meant to handle cases where
8599/// InstCombine merged some outside op with one of the shifts from
8600/// the rotate pattern.
8601/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8602/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8603/// patterns:
8604///
8605/// (or (add v v) (shrl v bitwidth-1)):
8606/// expands (add v v) -> (shl v 1)
8607///
8608/// (or (mul v c0) (shrl (mul v c1) c2)):
8609/// expands (mul v c0) -> (shl (mul v c1) c3)
8610///
8611/// (or (udiv v c0) (shl (udiv v c1) c2)):
8612/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8613///
8614/// (or (shl v c0) (shrl (shl v c1) c2)):
8615/// expands (shl v c0) -> (shl (shl v c1) c3)
8616///
8617/// (or (shrl v c0) (shl (shrl v c1) c2)):
8618/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8619///
8620/// Such that in all cases, c3+c2==bitwidth(op v c1).
8622 SDValue ExtractFrom, SDValue &Mask,
8623 const SDLoc &DL) {
8624 assert(OppShift && ExtractFrom && "Empty SDValue");
8625 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8626 return SDValue();
8627
8628 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8629
8630 // Value and Type of the shift.
8631 SDValue OppShiftLHS = OppShift.getOperand(0);
8632 EVT ShiftedVT = OppShiftLHS.getValueType();
8633
8634 // Amount of the existing shift.
8635 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8636
8637 // (add v v) -> (shl v 1)
8638 // TODO: Should this be a general DAG canonicalization?
8639 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8640 ExtractFrom.getOpcode() == ISD::ADD &&
8641 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8642 ExtractFrom.getOperand(0) == OppShiftLHS &&
8643 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8644 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8645 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8646
8647 // Preconditions:
8648 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8649 //
8650 // Find opcode of the needed shift to be extracted from (op0 v c0).
8651 unsigned Opcode = ISD::DELETED_NODE;
8652 bool IsMulOrDiv = false;
8653 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8654 // opcode or its arithmetic (mul or udiv) variant.
8655 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8656 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8657 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8658 return false;
8659 Opcode = NeededShift;
8660 return true;
8661 };
8662 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8663 // that the needed shift can be extracted from.
8664 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8665 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8666 return SDValue();
8667
8668 // op0 must be the same opcode on both sides, have the same LHS argument,
8669 // and produce the same value type.
8670 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8671 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8672 ShiftedVT != ExtractFrom.getValueType())
8673 return SDValue();
8674
8675 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8676 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8677 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8678 ConstantSDNode *ExtractFromCst =
8679 isConstOrConstSplat(ExtractFrom.getOperand(1));
8680 // TODO: We should be able to handle non-uniform constant vectors for these values
8681 // Check that we have constant values.
8682 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8683 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8684 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8685 return SDValue();
8686
8687 // Compute the shift amount we need to extract to complete the rotate.
8688 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8689 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8690 return SDValue();
8691 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8692 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8693 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8694 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8695 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8696
8697 // Now try extract the needed shift from the ExtractFrom op and see if the
8698 // result matches up with the existing shift's LHS op.
8699 if (IsMulOrDiv) {
8700 // Op to extract from is a mul or udiv by a constant.
8701 // Check:
8702 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8703 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8704 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8705 NeededShiftAmt.getZExtValue());
8706 APInt ResultAmt;
8707 APInt Rem;
8708 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8709 if (Rem != 0 || ResultAmt != OppLHSAmt)
8710 return SDValue();
8711 } else {
8712 // Op to extract from is a shift by a constant.
8713 // Check:
8714 // c2 - (bitwidth(op0 v c0) - c1) == c0
8715 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8716 ExtractFromAmt.getBitWidth()))
8717 return SDValue();
8718 }
8719
8720 // Return the expanded shift op that should allow a rotate to be formed.
8721 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8722 EVT ResVT = ExtractFrom.getValueType();
8723 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8724 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8725}
8726
8727// Return true if we can prove that, whenever Neg and Pos are both in the
8728// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8729// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8730//
8731// (or (shift1 X, Neg), (shift2 X, Pos))
8732//
8733// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8734// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8735// to consider shift amounts with defined behavior.
8736//
8737// The IsRotate flag should be set when the LHS of both shifts is the same.
8738// Otherwise if matching a general funnel shift, it should be clear.
8739static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8740 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8741 const auto &TLI = DAG.getTargetLoweringInfo();
8742 // If EltSize is a power of 2 then:
8743 //
8744 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8745 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8746 //
8747 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8748 // for the stronger condition:
8749 //
8750 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8751 //
8752 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8753 // we can just replace Neg with Neg' for the rest of the function.
8754 //
8755 // In other cases we check for the even stronger condition:
8756 //
8757 // Neg == EltSize - Pos [B]
8758 //
8759 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8760 // behavior if Pos == 0 (and consequently Neg == EltSize).
8761 //
8762 // We could actually use [A] whenever EltSize is a power of 2, but the
8763 // only extra cases that it would match are those uninteresting ones
8764 // where Neg and Pos are never in range at the same time. E.g. for
8765 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8766 // as well as (sub 32, Pos), but:
8767 //
8768 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8769 //
8770 // always invokes undefined behavior for 32-bit X.
8771 //
8772 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8773 // This allows us to peek through any operations that only affect Mask's
8774 // un-demanded bits.
8775 //
8776 // NOTE: We can only do this when matching operations which won't modify the
8777 // least Log2(EltSize) significant bits and not a general funnel shift.
8778 unsigned MaskLoBits = 0;
8779 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8780 unsigned Bits = Log2_64(EltSize);
8781 unsigned NegBits = Neg.getScalarValueSizeInBits();
8782 if (NegBits >= Bits) {
8783 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8784 if (SDValue Inner =
8786 Neg = Inner;
8787 MaskLoBits = Bits;
8788 }
8789 }
8790 }
8791
8792 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8793 if (Neg.getOpcode() != ISD::SUB)
8794 return false;
8796 if (!NegC)
8797 return false;
8798 SDValue NegOp1 = Neg.getOperand(1);
8799
8800 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8801 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8802 // are redundant for the purpose of the equality.
8803 if (MaskLoBits) {
8804 unsigned PosBits = Pos.getScalarValueSizeInBits();
8805 if (PosBits >= MaskLoBits) {
8806 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8807 if (SDValue Inner =
8809 Pos = Inner;
8810 }
8811 }
8812 }
8813
8814 // The condition we need is now:
8815 //
8816 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8817 //
8818 // If NegOp1 == Pos then we need:
8819 //
8820 // EltSize & Mask == NegC & Mask
8821 //
8822 // (because "x & Mask" is a truncation and distributes through subtraction).
8823 //
8824 // We also need to account for a potential truncation of NegOp1 if the amount
8825 // has already been legalized to a shift amount type.
8826 APInt Width;
8827 if ((Pos == NegOp1) ||
8828 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8829 Width = NegC->getAPIntValue();
8830
8831 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8832 // Then the condition we want to prove becomes:
8833 //
8834 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8835 //
8836 // which, again because "x & Mask" is a truncation, becomes:
8837 //
8838 // NegC & Mask == (EltSize - PosC) & Mask
8839 // EltSize & Mask == (NegC + PosC) & Mask
8840 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8841 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8842 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8843 else
8844 return false;
8845 } else
8846 return false;
8847
8848 // Now we just need to check that EltSize & Mask == Width & Mask.
8849 if (MaskLoBits)
8850 // EltSize & Mask is 0 since Mask is EltSize - 1.
8851 return Width.getLoBits(MaskLoBits) == 0;
8852 return Width == EltSize;
8853}
8854
8855// A subroutine of MatchRotate used once we have found an OR of two opposite
8856// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8857// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8858// former being preferred if supported. InnerPos and InnerNeg are Pos and
8859// Neg with outer conversions stripped away.
8860SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8861 SDValue Neg, SDValue InnerPos,
8862 SDValue InnerNeg, bool FromAdd,
8863 bool HasPos, unsigned PosOpcode,
8864 unsigned NegOpcode, const SDLoc &DL) {
8865 // fold (or/add (shl x, (*ext y)),
8866 // (srl x, (*ext (sub 32, y)))) ->
8867 // (rotl x, y) or (rotr x, (sub 32, y))
8868 //
8869 // fold (or/add (shl x, (*ext (sub 32, y))),
8870 // (srl x, (*ext y))) ->
8871 // (rotr x, y) or (rotl x, (sub 32, y))
8872 EVT VT = Shifted.getValueType();
8873 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8874 /*IsRotate*/ true, FromAdd))
8875 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8876 HasPos ? Pos : Neg);
8877
8878 return SDValue();
8879}
8880
8881// A subroutine of MatchRotate used once we have found an OR of two opposite
8882// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8883// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8884// former being preferred if supported. InnerPos and InnerNeg are Pos and
8885// Neg with outer conversions stripped away.
8886// TODO: Merge with MatchRotatePosNeg.
8887SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8888 SDValue Neg, SDValue InnerPos,
8889 SDValue InnerNeg, bool FromAdd,
8890 bool HasPos, unsigned PosOpcode,
8891 unsigned NegOpcode, const SDLoc &DL) {
8892 EVT VT = N0.getValueType();
8893 unsigned EltBits = VT.getScalarSizeInBits();
8894
8895 // fold (or/add (shl x0, (*ext y)),
8896 // (srl x1, (*ext (sub 32, y)))) ->
8897 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8898 //
8899 // fold (or/add (shl x0, (*ext (sub 32, y))),
8900 // (srl x1, (*ext y))) ->
8901 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8902 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8903 FromAdd))
8904 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8905 HasPos ? Pos : Neg);
8906
8907 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8908 // so for now just use the PosOpcode case if its legal.
8909 // TODO: When can we use the NegOpcode case?
8910 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8911 SDValue X;
8912 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8913 // -> (fshl x0, x1, y)
8914 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8915 sd_match(InnerNeg,
8916 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8918 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8919 }
8920
8921 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8922 // -> (fshr x0, x1, y)
8923 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8924 sd_match(InnerPos,
8925 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8927 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8928 }
8929
8930 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8931 // -> (fshr x0, x1, y)
8932 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8933 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8934 sd_match(InnerPos,
8935 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8937 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8938 }
8939 }
8940
8941 return SDValue();
8942}
8943
8944// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8945// many idioms for rotate, and if the target supports rotation instructions,
8946// generate a rot[lr]. This also matches funnel shift patterns, similar to
8947// rotation but with different shifted sources.
8948SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8949 bool FromAdd) {
8950 EVT VT = LHS.getValueType();
8951
8952 // The target must have at least one rotate/funnel flavor.
8953 // We still try to match rotate by constant pre-legalization.
8954 // TODO: Support pre-legalization funnel-shift by constant.
8955 bool HasROTL = hasOperation(ISD::ROTL, VT);
8956 bool HasROTR = hasOperation(ISD::ROTR, VT);
8957 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8958 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8959
8960 // If the type is going to be promoted and the target has enabled custom
8961 // lowering for rotate, allow matching rotate by non-constants. Only allow
8962 // this for scalar types.
8963 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8967 }
8968
8969 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8970 return SDValue();
8971
8972 // Check for truncated rotate.
8973 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8974 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8975 assert(LHS.getValueType() == RHS.getValueType());
8976 if (SDValue Rot =
8977 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
8978 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8979 }
8980
8981 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8982 SDValue LHSShift; // The shift.
8983 SDValue LHSMask; // AND value if any.
8984 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8985
8986 SDValue RHSShift; // The shift.
8987 SDValue RHSMask; // AND value if any.
8988 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8989
8990 // If neither side matched a rotate half, bail
8991 if (!LHSShift && !RHSShift)
8992 return SDValue();
8993
8994 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8995 // side of the rotate, so try to handle that here. In all cases we need to
8996 // pass the matched shift from the opposite side to compute the opcode and
8997 // needed shift amount to extract. We still want to do this if both sides
8998 // matched a rotate half because one half may be a potential overshift that
8999 // can be broken down (ie if InstCombine merged two shl or srl ops into a
9000 // single one).
9001
9002 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
9003 if (LHSShift)
9004 if (SDValue NewRHSShift =
9005 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
9006 RHSShift = NewRHSShift;
9007 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
9008 if (RHSShift)
9009 if (SDValue NewLHSShift =
9010 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
9011 LHSShift = NewLHSShift;
9012
9013 // If a side is still missing, nothing else we can do.
9014 if (!RHSShift || !LHSShift)
9015 return SDValue();
9016
9017 // At this point we've matched or extracted a shift op on each side.
9018
9019 if (LHSShift.getOpcode() == RHSShift.getOpcode())
9020 return SDValue(); // Shifts must disagree.
9021
9022 // Canonicalize shl to left side in a shl/srl pair.
9023 if (RHSShift.getOpcode() == ISD::SHL) {
9024 std::swap(LHS, RHS);
9025 std::swap(LHSShift, RHSShift);
9026 std::swap(LHSMask, RHSMask);
9027 }
9028
9029 // Something has gone wrong - we've lost the shl/srl pair - bail.
9030 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
9031 return SDValue();
9032
9033 unsigned EltSizeInBits = VT.getScalarSizeInBits();
9034 SDValue LHSShiftArg = LHSShift.getOperand(0);
9035 SDValue LHSShiftAmt = LHSShift.getOperand(1);
9036 SDValue RHSShiftArg = RHSShift.getOperand(0);
9037 SDValue RHSShiftAmt = RHSShift.getOperand(1);
9038
9039 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9040 ConstantSDNode *RHS) {
9041 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9042 };
9043
9044 auto ApplyMasks = [&](SDValue Res) {
9045 // If there is an AND of either shifted operand, apply it to the result.
9046 if (LHSMask.getNode() || RHSMask.getNode()) {
9049
9050 if (LHSMask.getNode()) {
9051 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
9052 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9053 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
9054 }
9055 if (RHSMask.getNode()) {
9056 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
9057 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9058 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
9059 }
9060
9061 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
9062 }
9063
9064 return Res;
9065 };
9066
9067 // TODO: Support pre-legalization funnel-shift by constant.
9068 bool IsRotate = LHSShiftArg == RHSShiftArg;
9069 if (!IsRotate && !(HasFSHL || HasFSHR)) {
9070 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9071 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9072 // Look for a disguised rotate by constant.
9073 // The common shifted operand X may be hidden inside another 'or'.
9074 SDValue X, Y;
9075 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9076 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9077 return false;
9078 if (CommonOp == Or.getOperand(0)) {
9079 X = CommonOp;
9080 Y = Or.getOperand(1);
9081 return true;
9082 }
9083 if (CommonOp == Or.getOperand(1)) {
9084 X = CommonOp;
9085 Y = Or.getOperand(0);
9086 return true;
9087 }
9088 return false;
9089 };
9090
9091 SDValue Res;
9092 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9093 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9094 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9095 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9096 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9097 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9098 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9099 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9100 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9101 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9102 } else {
9103 return SDValue();
9104 }
9105
9106 return ApplyMasks(Res);
9107 }
9108
9109 return SDValue(); // Requires funnel shift support.
9110 }
9111
9112 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9113 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9114 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9115 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9116 // iff C1+C2 == EltSizeInBits
9117 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9118 SDValue Res;
9119 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9120 bool UseROTL = !LegalOperations || HasROTL;
9121 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9122 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9123 } else {
9124 bool UseFSHL = !LegalOperations || HasFSHL;
9125 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9126 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9127 }
9128
9129 return ApplyMasks(Res);
9130 }
9131
9132 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9133 // shift.
9134 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9135 return SDValue();
9136
9137 // If there is a mask here, and we have a variable shift, we can't be sure
9138 // that we're masking out the right stuff.
9139 if (LHSMask.getNode() || RHSMask.getNode())
9140 return SDValue();
9141
9142 // If the shift amount is sign/zext/any-extended just peel it off.
9143 SDValue LExtOp0 = LHSShiftAmt;
9144 SDValue RExtOp0 = RHSShiftAmt;
9145 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9146 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9147 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9148 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9149 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9150 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9151 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9152 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9153 LExtOp0 = LHSShiftAmt.getOperand(0);
9154 RExtOp0 = RHSShiftAmt.getOperand(0);
9155 }
9156
9157 if (IsRotate && (HasROTL || HasROTR)) {
9158 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9159 LExtOp0, RExtOp0, FromAdd, HasROTL,
9161 return TryL;
9162
9163 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9164 RExtOp0, LExtOp0, FromAdd, HasROTR,
9166 return TryR;
9167 }
9168
9169 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9170 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9171 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9172 return TryL;
9173
9174 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9175 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9176 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9177 return TryR;
9178
9179 return SDValue();
9180}
9181
9182/// Recursively traverses the expression calculating the origin of the requested
9183/// byte of the given value. Returns std::nullopt if the provider can't be
9184/// calculated.
9185///
9186/// For all the values except the root of the expression, we verify that the
9187/// value has exactly one use and if not then return std::nullopt. This way if
9188/// the origin of the byte is returned it's guaranteed that the values which
9189/// contribute to the byte are not used outside of this expression.
9190
9191/// However, there is a special case when dealing with vector loads -- we allow
9192/// more than one use if the load is a vector type. Since the values that
9193/// contribute to the byte ultimately come from the ExtractVectorElements of the
9194/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9195/// because those operations are independent from the pattern to be combined.
9196/// For vector loads, we simply care that the ByteProviders are adjacent
9197/// positions of the same vector, and their index matches the byte that is being
9198/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9199/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9200/// byte position we are trying to provide for the LoadCombine. If these do
9201/// not match, then we can not combine the vector loads. \p Index uses the
9202/// byte position we are trying to provide for and is matched against the
9203/// shl and load size. The \p Index algorithm ensures the requested byte is
9204/// provided for by the pattern, and the pattern does not over provide bytes.
9205///
9206///
9207/// The supported LoadCombine pattern for vector loads is as follows
9208/// or
9209/// / \
9210/// or shl
9211/// / \ |
9212/// or shl zext
9213/// / \ | |
9214/// shl zext zext EVE*
9215/// | | | |
9216/// zext EVE* EVE* LOAD
9217/// | | |
9218/// EVE* LOAD LOAD
9219/// |
9220/// LOAD
9221///
9222/// *ExtractVectorElement
9224
9225static std::optional<SDByteProvider>
9226calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9227 std::optional<uint64_t> VectorIndex,
9228 unsigned StartingIndex = 0) {
9229
9230 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9231 if (Depth == 10)
9232 return std::nullopt;
9233
9234 // Only allow multiple uses if the instruction is a vector load (in which
9235 // case we will use the load for every ExtractVectorElement)
9236 if (Depth && !Op.hasOneUse() &&
9237 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9238 return std::nullopt;
9239
9240 // Fail to combine if we have encountered anything but a LOAD after handling
9241 // an ExtractVectorElement.
9242 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9243 return std::nullopt;
9244
9245 unsigned BitWidth = Op.getScalarValueSizeInBits();
9246 if (BitWidth % 8 != 0)
9247 return std::nullopt;
9248 unsigned ByteWidth = BitWidth / 8;
9249 assert(Index < ByteWidth && "invalid index requested");
9250 (void) ByteWidth;
9251
9252 switch (Op.getOpcode()) {
9253 case ISD::OR: {
9254 auto LHS =
9255 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9256 if (!LHS)
9257 return std::nullopt;
9258 auto RHS =
9259 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9260 if (!RHS)
9261 return std::nullopt;
9262
9263 if (LHS->isConstantZero())
9264 return RHS;
9265 if (RHS->isConstantZero())
9266 return LHS;
9267 return std::nullopt;
9268 }
9269 case ISD::SHL: {
9270 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9271 if (!ShiftOp)
9272 return std::nullopt;
9273
9274 uint64_t BitShift = ShiftOp->getZExtValue();
9275
9276 if (BitShift % 8 != 0)
9277 return std::nullopt;
9278 uint64_t ByteShift = BitShift / 8;
9279
9280 // If we are shifting by an amount greater than the index we are trying to
9281 // provide, then do not provide anything. Otherwise, subtract the index by
9282 // the amount we shifted by.
9283 return Index < ByteShift
9285 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9286 Depth + 1, VectorIndex, Index);
9287 }
9288 case ISD::ANY_EXTEND:
9289 case ISD::SIGN_EXTEND:
9290 case ISD::ZERO_EXTEND: {
9291 SDValue NarrowOp = Op->getOperand(0);
9292 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9293 if (NarrowBitWidth % 8 != 0)
9294 return std::nullopt;
9295 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9296
9297 if (Index >= NarrowByteWidth)
9298 return Op.getOpcode() == ISD::ZERO_EXTEND
9299 ? std::optional<SDByteProvider>(
9301 : std::nullopt;
9302 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9303 StartingIndex);
9304 }
9305 case ISD::BSWAP:
9306 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9307 Depth + 1, VectorIndex, StartingIndex);
9309 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9310 if (!OffsetOp)
9311 return std::nullopt;
9312
9313 VectorIndex = OffsetOp->getZExtValue();
9314
9315 SDValue NarrowOp = Op->getOperand(0);
9316 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9317 if (NarrowBitWidth % 8 != 0)
9318 return std::nullopt;
9319 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9320 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9321 // type, leaving the high bits undefined.
9322 if (Index >= NarrowByteWidth)
9323 return std::nullopt;
9324
9325 // Check to see if the position of the element in the vector corresponds
9326 // with the byte we are trying to provide for. In the case of a vector of
9327 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9328 // the element will provide a range of bytes. For example, if we have a
9329 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9330 // 3).
9331 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9332 return std::nullopt;
9333 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9334 return std::nullopt;
9335
9336 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9337 VectorIndex, StartingIndex);
9338 }
9339 case ISD::LOAD: {
9340 auto L = cast<LoadSDNode>(Op.getNode());
9341 if (!L->isSimple() || L->isIndexed())
9342 return std::nullopt;
9343
9344 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9345 if (NarrowBitWidth % 8 != 0)
9346 return std::nullopt;
9347 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9348
9349 // If the width of the load does not reach byte we are trying to provide for
9350 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9351 // question
9352 if (Index >= NarrowByteWidth)
9353 return L->getExtensionType() == ISD::ZEXTLOAD
9354 ? std::optional<SDByteProvider>(
9356 : std::nullopt;
9357
9358 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9359 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9360 }
9361 }
9362
9363 return std::nullopt;
9364}
9365
9366static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9367 return i;
9368}
9369
9370static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9371 return BW - i - 1;
9372}
9373
9374// Check if the bytes offsets we are looking at match with either big or
9375// little endian value loaded. Return true for big endian, false for little
9376// endian, and std::nullopt if match failed.
9377static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
9378 int64_t FirstOffset) {
9379 // The endian can be decided only when it is 2 bytes at least.
9380 unsigned Width = ByteOffsets.size();
9381 if (Width < 2)
9382 return std::nullopt;
9383
9384 bool BigEndian = true, LittleEndian = true;
9385 for (unsigned i = 0; i < Width; i++) {
9386 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9387 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9388 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9389 if (!BigEndian && !LittleEndian)
9390 return std::nullopt;
9391 }
9392
9393 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9394 "little endian");
9395 return BigEndian;
9396}
9397
9398// Look through one layer of truncate or extend.
9400 switch (Value.getOpcode()) {
9401 case ISD::TRUNCATE:
9402 case ISD::ZERO_EXTEND:
9403 case ISD::SIGN_EXTEND:
9404 case ISD::ANY_EXTEND:
9405 return Value.getOperand(0);
9406 }
9407 return SDValue();
9408}
9409
9410/// Match a pattern where a wide type scalar value is stored by several narrow
9411/// stores. Fold it into a single store or a BSWAP and a store if the targets
9412/// supports it.
9413///
9414/// Assuming little endian target:
9415/// i8 *p = ...
9416/// i32 val = ...
9417/// p[0] = (val >> 0) & 0xFF;
9418/// p[1] = (val >> 8) & 0xFF;
9419/// p[2] = (val >> 16) & 0xFF;
9420/// p[3] = (val >> 24) & 0xFF;
9421/// =>
9422/// *((i32)p) = val;
9423///
9424/// i8 *p = ...
9425/// i32 val = ...
9426/// p[0] = (val >> 24) & 0xFF;
9427/// p[1] = (val >> 16) & 0xFF;
9428/// p[2] = (val >> 8) & 0xFF;
9429/// p[3] = (val >> 0) & 0xFF;
9430/// =>
9431/// *((i32)p) = BSWAP(val);
9432SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9433 // The matching looks for "store (trunc x)" patterns that appear early but are
9434 // likely to be replaced by truncating store nodes during combining.
9435 // TODO: If there is evidence that running this later would help, this
9436 // limitation could be removed. Legality checks may need to be added
9437 // for the created store and optional bswap/rotate.
9438 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9439 return SDValue();
9440
9441 // We only handle merging simple stores of 1-4 bytes.
9442 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9443 EVT MemVT = N->getMemoryVT();
9444 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9445 !N->isSimple() || N->isIndexed())
9446 return SDValue();
9447
9448 // Collect all of the stores in the chain, upto the maximum store width (i64).
9449 SDValue Chain = N->getChain();
9451 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9452 unsigned MaxWideNumBits = 64;
9453 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9454 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9455 // All stores must be the same size to ensure that we are writing all of the
9456 // bytes in the wide value.
9457 // This store should have exactly one use as a chain operand for another
9458 // store in the merging set. If there are other chain uses, then the
9459 // transform may not be safe because order of loads/stores outside of this
9460 // set may not be preserved.
9461 // TODO: We could allow multiple sizes by tracking each stored byte.
9462 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9463 Store->isIndexed() || !Store->hasOneUse())
9464 return SDValue();
9465 Stores.push_back(Store);
9466 Chain = Store->getChain();
9467 if (MaxStores < Stores.size())
9468 return SDValue();
9469 }
9470 // There is no reason to continue if we do not have at least a pair of stores.
9471 if (Stores.size() < 2)
9472 return SDValue();
9473
9474 // Handle simple types only.
9475 LLVMContext &Context = *DAG.getContext();
9476 unsigned NumStores = Stores.size();
9477 unsigned WideNumBits = NumStores * NarrowNumBits;
9478 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9479 return SDValue();
9480
9481 // Check if all bytes of the source value that we are looking at are stored
9482 // to the same base address. Collect offsets from Base address into OffsetMap.
9483 SDValue SourceValue;
9484 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9485 int64_t FirstOffset = INT64_MAX;
9486 StoreSDNode *FirstStore = nullptr;
9487 std::optional<BaseIndexOffset> Base;
9488 for (auto *Store : Stores) {
9489 // All the stores store different parts of the CombinedValue. A truncate is
9490 // required to get the partial value.
9491 SDValue Trunc = Store->getValue();
9492 if (Trunc.getOpcode() != ISD::TRUNCATE)
9493 return SDValue();
9494 // Other than the first/last part, a shift operation is required to get the
9495 // offset.
9496 int64_t Offset = 0;
9497 SDValue WideVal = Trunc.getOperand(0);
9498 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9499 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9500 // The shift amount must be a constant multiple of the narrow type.
9501 // It is translated to the offset address in the wide source value "y".
9502 //
9503 // x = srl y, ShiftAmtC
9504 // i8 z = trunc x
9505 // store z, ...
9506 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9507 if (ShiftAmtC % NarrowNumBits != 0)
9508 return SDValue();
9509
9510 // Make sure we aren't reading bits that are shifted in.
9511 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9512 return SDValue();
9513
9514 Offset = ShiftAmtC / NarrowNumBits;
9515 WideVal = WideVal.getOperand(0);
9516 }
9517
9518 // Stores must share the same source value with different offsets.
9519 if (!SourceValue)
9520 SourceValue = WideVal;
9521 else if (SourceValue != WideVal) {
9522 // Truncate and extends can be stripped to see if the values are related.
9523 if (stripTruncAndExt(SourceValue) != WideVal &&
9524 stripTruncAndExt(WideVal) != SourceValue)
9525 return SDValue();
9526
9527 if (WideVal.getScalarValueSizeInBits() >
9528 SourceValue.getScalarValueSizeInBits())
9529 SourceValue = WideVal;
9530
9531 // Give up if the source value type is smaller than the store size.
9532 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9533 return SDValue();
9534 }
9535
9536 // Stores must share the same base address.
9537 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9538 int64_t ByteOffsetFromBase = 0;
9539 if (!Base)
9540 Base = Ptr;
9541 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9542 return SDValue();
9543
9544 // Remember the first store.
9545 if (ByteOffsetFromBase < FirstOffset) {
9546 FirstStore = Store;
9547 FirstOffset = ByteOffsetFromBase;
9548 }
9549 // Map the offset in the store and the offset in the combined value, and
9550 // early return if it has been set before.
9551 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9552 return SDValue();
9553 OffsetMap[Offset] = ByteOffsetFromBase;
9554 }
9555
9556 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9557
9558 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9559 assert(FirstStore && "First store must be set");
9560
9561 // Check that a store of the wide type is both allowed and fast on the target
9562 const DataLayout &Layout = DAG.getDataLayout();
9563 unsigned Fast = 0;
9564 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9565 *FirstStore->getMemOperand(), &Fast);
9566 if (!Allowed || !Fast)
9567 return SDValue();
9568
9569 // Check if the pieces of the value are going to the expected places in memory
9570 // to merge the stores.
9571 auto checkOffsets = [&](bool MatchLittleEndian) {
9572 if (MatchLittleEndian) {
9573 for (unsigned i = 0; i != NumStores; ++i)
9574 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9575 return false;
9576 } else { // MatchBigEndian by reversing loop counter.
9577 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9578 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9579 return false;
9580 }
9581 return true;
9582 };
9583
9584 // Check if the offsets line up for the native data layout of this target.
9585 bool NeedBswap = false;
9586 bool NeedRotate = false;
9587 if (!checkOffsets(Layout.isLittleEndian())) {
9588 // Special-case: check if byte offsets line up for the opposite endian.
9589 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9590 NeedBswap = true;
9591 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9592 NeedRotate = true;
9593 else
9594 return SDValue();
9595 }
9596
9597 SDLoc DL(N);
9598 if (WideVT != SourceValue.getValueType()) {
9599 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9600 "Unexpected store value to merge");
9601 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9602 }
9603
9604 // Before legalize we can introduce illegal bswaps/rotates which will be later
9605 // converted to an explicit bswap sequence. This way we end up with a single
9606 // store and byte shuffling instead of several stores and byte shuffling.
9607 if (NeedBswap) {
9608 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9609 } else if (NeedRotate) {
9610 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9611 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9612 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9613 }
9614
9615 SDValue NewStore =
9616 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9617 FirstStore->getPointerInfo(), FirstStore->getAlign());
9618
9619 // Rely on other DAG combine rules to remove the other individual stores.
9620 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9621 return NewStore;
9622}
9623
9624/// Match a pattern where a wide type scalar value is loaded by several narrow
9625/// loads and combined by shifts and ors. Fold it into a single load or a load
9626/// and a BSWAP if the targets supports it.
9627///
9628/// Assuming little endian target:
9629/// i8 *a = ...
9630/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9631/// =>
9632/// i32 val = *((i32)a)
9633///
9634/// i8 *a = ...
9635/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9636/// =>
9637/// i32 val = BSWAP(*((i32)a))
9638///
9639/// TODO: This rule matches complex patterns with OR node roots and doesn't
9640/// interact well with the worklist mechanism. When a part of the pattern is
9641/// updated (e.g. one of the loads) its direct users are put into the worklist,
9642/// but the root node of the pattern which triggers the load combine is not
9643/// necessarily a direct user of the changed node. For example, once the address
9644/// of t28 load is reassociated load combine won't be triggered:
9645/// t25: i32 = add t4, Constant:i32<2>
9646/// t26: i64 = sign_extend t25
9647/// t27: i64 = add t2, t26
9648/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9649/// t29: i32 = zero_extend t28
9650/// t32: i32 = shl t29, Constant:i8<8>
9651/// t33: i32 = or t23, t32
9652/// As a possible fix visitLoad can check if the load can be a part of a load
9653/// combine pattern and add corresponding OR roots to the worklist.
9654SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9655 assert(N->getOpcode() == ISD::OR &&
9656 "Can only match load combining against OR nodes");
9657
9658 // Handles simple types only
9659 EVT VT = N->getValueType(0);
9660 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9661 return SDValue();
9662 unsigned ByteWidth = VT.getSizeInBits() / 8;
9663
9664 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9665 auto MemoryByteOffset = [&](SDByteProvider P) {
9666 assert(P.hasSrc() && "Must be a memory byte provider");
9667 auto *Load = cast<LoadSDNode>(P.Src.value());
9668
9669 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9670
9671 assert(LoadBitWidth % 8 == 0 &&
9672 "can only analyze providers for individual bytes not bit");
9673 unsigned LoadByteWidth = LoadBitWidth / 8;
9674 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9675 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9676 };
9677
9678 std::optional<BaseIndexOffset> Base;
9679 SDValue Chain;
9680
9681 SmallPtrSet<LoadSDNode *, 8> Loads;
9682 std::optional<SDByteProvider> FirstByteProvider;
9683 int64_t FirstOffset = INT64_MAX;
9684
9685 // Check if all the bytes of the OR we are looking at are loaded from the same
9686 // base address. Collect bytes offsets from Base address in ByteOffsets.
9687 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9688 unsigned ZeroExtendedBytes = 0;
9689 for (int i = ByteWidth - 1; i >= 0; --i) {
9690 auto P =
9691 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9692 /*StartingIndex*/ i);
9693 if (!P)
9694 return SDValue();
9695
9696 if (P->isConstantZero()) {
9697 // It's OK for the N most significant bytes to be 0, we can just
9698 // zero-extend the load.
9699 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9700 return SDValue();
9701 continue;
9702 }
9703 assert(P->hasSrc() && "provenance should either be memory or zero");
9704 auto *L = cast<LoadSDNode>(P->Src.value());
9705
9706 // All loads must share the same chain
9707 SDValue LChain = L->getChain();
9708 if (!Chain)
9709 Chain = LChain;
9710 else if (Chain != LChain)
9711 return SDValue();
9712
9713 // Loads must share the same base address
9714 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9715 int64_t ByteOffsetFromBase = 0;
9716
9717 // For vector loads, the expected load combine pattern will have an
9718 // ExtractElement for each index in the vector. While each of these
9719 // ExtractElements will be accessing the same base address as determined
9720 // by the load instruction, the actual bytes they interact with will differ
9721 // due to different ExtractElement indices. To accurately determine the
9722 // byte position of an ExtractElement, we offset the base load ptr with
9723 // the index multiplied by the byte size of each element in the vector.
9724 if (L->getMemoryVT().isVector()) {
9725 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9726 if (LoadWidthInBit % 8 != 0)
9727 return SDValue();
9728 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9729 Ptr.addToOffset(ByteOffsetFromVector);
9730 }
9731
9732 if (!Base)
9733 Base = Ptr;
9734
9735 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9736 return SDValue();
9737
9738 // Calculate the offset of the current byte from the base address
9739 ByteOffsetFromBase += MemoryByteOffset(*P);
9740 ByteOffsets[i] = ByteOffsetFromBase;
9741
9742 // Remember the first byte load
9743 if (ByteOffsetFromBase < FirstOffset) {
9744 FirstByteProvider = P;
9745 FirstOffset = ByteOffsetFromBase;
9746 }
9747
9748 Loads.insert(L);
9749 }
9750
9751 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9752 "memory, so there must be at least one load which produces the value");
9753 assert(Base && "Base address of the accessed memory location must be set");
9754 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9755
9756 bool NeedsZext = ZeroExtendedBytes > 0;
9757
9758 EVT MemVT =
9759 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9760
9761 if (!MemVT.isSimple())
9762 return SDValue();
9763
9764 // Before legalize we can introduce too wide illegal loads which will be later
9765 // split into legal sized loads. This enables us to combine i64 load by i8
9766 // patterns to a couple of i32 loads on 32 bit targets.
9767 if (LegalOperations &&
9768 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9769 MemVT))
9770 return SDValue();
9771
9772 // Check if the bytes of the OR we are looking at match with either big or
9773 // little endian value load
9774 std::optional<bool> IsBigEndian = isBigEndian(
9775 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9776 if (!IsBigEndian)
9777 return SDValue();
9778
9779 assert(FirstByteProvider && "must be set");
9780
9781 // Ensure that the first byte is loaded from zero offset of the first load.
9782 // So the combined value can be loaded from the first load address.
9783 if (MemoryByteOffset(*FirstByteProvider) != 0)
9784 return SDValue();
9785 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9786
9787 // The node we are looking at matches with the pattern, check if we can
9788 // replace it with a single (possibly zero-extended) load and bswap + shift if
9789 // needed.
9790
9791 // If the load needs byte swap check if the target supports it
9792 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9793
9794 // Before legalize we can introduce illegal bswaps which will be later
9795 // converted to an explicit bswap sequence. This way we end up with a single
9796 // load and byte shuffling instead of several loads and byte shuffling.
9797 // We do not introduce illegal bswaps when zero-extending as this tends to
9798 // introduce too many arithmetic instructions.
9799 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9800 !TLI.isOperationLegal(ISD::BSWAP, VT))
9801 return SDValue();
9802
9803 // If we need to bswap and zero extend, we have to insert a shift. Check that
9804 // it is legal.
9805 if (NeedsBswap && NeedsZext && LegalOperations &&
9806 !TLI.isOperationLegal(ISD::SHL, VT))
9807 return SDValue();
9808
9809 // Check that a load of the wide type is both allowed and fast on the target
9810 unsigned Fast = 0;
9811 bool Allowed =
9812 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9813 *FirstLoad->getMemOperand(), &Fast);
9814 if (!Allowed || !Fast)
9815 return SDValue();
9816
9817 SDValue NewLoad =
9818 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9819 Chain, FirstLoad->getBasePtr(),
9820 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9821
9822 // Transfer chain users from old loads to the new load.
9823 for (LoadSDNode *L : Loads)
9824 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9825
9826 if (!NeedsBswap)
9827 return NewLoad;
9828
9829 SDValue ShiftedLoad =
9830 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9831 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9832 VT, SDLoc(N)))
9833 : NewLoad;
9834 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9835}
9836
9837// If the target has andn, bsl, or a similar bit-select instruction,
9838// we want to unfold masked merge, with canonical pattern of:
9839// | A | |B|
9840// ((x ^ y) & m) ^ y
9841// | D |
9842// Into:
9843// (x & m) | (y & ~m)
9844// If y is a constant, m is not a 'not', and the 'andn' does not work with
9845// immediates, we unfold into a different pattern:
9846// ~(~x & m) & (m | y)
9847// If x is a constant, m is a 'not', and the 'andn' does not work with
9848// immediates, we unfold into a different pattern:
9849// (x | ~m) & ~(~m & ~y)
9850// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9851// the very least that breaks andnpd / andnps patterns, and because those
9852// patterns are simplified in IR and shouldn't be created in the DAG
9853SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9854 assert(N->getOpcode() == ISD::XOR);
9855
9856 // Don't touch 'not' (i.e. where y = -1).
9857 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9858 return SDValue();
9859
9860 EVT VT = N->getValueType(0);
9861
9862 // There are 3 commutable operators in the pattern,
9863 // so we have to deal with 8 possible variants of the basic pattern.
9864 SDValue X, Y, M;
9865 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9866 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9867 return false;
9868 SDValue Xor = And.getOperand(XorIdx);
9869 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9870 return false;
9871 SDValue Xor0 = Xor.getOperand(0);
9872 SDValue Xor1 = Xor.getOperand(1);
9873 // Don't touch 'not' (i.e. where y = -1).
9874 if (isAllOnesOrAllOnesSplat(Xor1))
9875 return false;
9876 if (Other == Xor0)
9877 std::swap(Xor0, Xor1);
9878 if (Other != Xor1)
9879 return false;
9880 X = Xor0;
9881 Y = Xor1;
9882 M = And.getOperand(XorIdx ? 0 : 1);
9883 return true;
9884 };
9885
9886 SDValue N0 = N->getOperand(0);
9887 SDValue N1 = N->getOperand(1);
9888 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9889 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9890 return SDValue();
9891
9892 // Don't do anything if the mask is constant. This should not be reachable.
9893 // InstCombine should have already unfolded this pattern, and DAGCombiner
9894 // probably shouldn't produce it, too.
9895 if (isa<ConstantSDNode>(M.getNode()))
9896 return SDValue();
9897
9898 // We can transform if the target has AndNot
9899 if (!TLI.hasAndNot(M))
9900 return SDValue();
9901
9902 SDLoc DL(N);
9903
9904 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9905 // a bitwise not that would already allow ANDN to be used.
9906 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9907 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9908 // If not, we need to do a bit more work to make sure andn is still used.
9909 SDValue NotX = DAG.getNOT(DL, X, VT);
9910 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9911 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9912 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9913 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9914 }
9915
9916 // If X is a constant and M is a bitwise not, check that 'andn' works with
9917 // immediates.
9918 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9919 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9920 // If not, we need to do a bit more work to make sure andn is still used.
9921 SDValue NotM = M.getOperand(0);
9922 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9923 SDValue NotY = DAG.getNOT(DL, Y, VT);
9924 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9925 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9926 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9927 }
9928
9929 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9930 SDValue NotM = DAG.getNOT(DL, M, VT);
9931 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9932
9933 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9934}
9935
9936SDValue DAGCombiner::visitXOR(SDNode *N) {
9937 SDValue N0 = N->getOperand(0);
9938 SDValue N1 = N->getOperand(1);
9939 EVT VT = N0.getValueType();
9940 SDLoc DL(N);
9941
9942 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9943 if (N0.isUndef() && N1.isUndef())
9944 return DAG.getConstant(0, DL, VT);
9945
9946 // fold (xor x, undef) -> undef
9947 if (N0.isUndef())
9948 return N0;
9949 if (N1.isUndef())
9950 return N1;
9951
9952 // fold (xor c1, c2) -> c1^c2
9953 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9954 return C;
9955
9956 // canonicalize constant to RHS
9959 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9960
9961 // fold vector ops
9962 if (VT.isVector()) {
9963 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9964 return FoldedVOp;
9965
9966 // fold (xor x, 0) -> x, vector edition
9968 return N0;
9969 }
9970
9971 // fold (xor x, 0) -> x
9972 if (isNullConstant(N1))
9973 return N0;
9974
9975 if (SDValue NewSel = foldBinOpIntoSelect(N))
9976 return NewSel;
9977
9978 // reassociate xor
9979 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9980 return RXOR;
9981
9982 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9983 if (SDValue SD =
9984 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9985 return SD;
9986
9987 // fold (a^b) -> (a|b) iff a and b share no bits.
9988 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9989 DAG.haveNoCommonBitsSet(N0, N1))
9990 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
9991
9992 // look for 'add-like' folds:
9993 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9994 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9996 if (SDValue Combined = visitADDLike(N))
9997 return Combined;
9998
9999 // fold not (setcc x, y, cc) -> setcc x y !cc
10000 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
10001 unsigned N0Opcode = N0.getOpcode();
10002 SDValue LHS, RHS, CC;
10003 if (TLI.isConstTrueVal(N1) &&
10004 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
10005 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
10006 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
10008 LHS.getValueType());
10009 if (!LegalOperations ||
10010 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
10011 switch (N0Opcode) {
10012 default:
10013 llvm_unreachable("Unhandled SetCC Equivalent!");
10014 case ISD::SETCC:
10015 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
10016 case ISD::SELECT_CC:
10017 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
10018 N0.getOperand(3), NotCC);
10019 case ISD::STRICT_FSETCC:
10020 case ISD::STRICT_FSETCCS: {
10021 if (N0.hasOneUse()) {
10022 // FIXME Can we handle multiple uses? Could we token factor the chain
10023 // results from the new/old setcc?
10024 SDValue SetCC =
10025 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
10026 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
10027 CombineTo(N, SetCC);
10028 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
10029 recursivelyDeleteUnusedNodes(N0.getNode());
10030 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10031 }
10032 break;
10033 }
10034 }
10035 }
10036 }
10037
10038 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10039 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10040 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
10041 SDValue V = N0.getOperand(0);
10042 SDLoc DL0(N0);
10043 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
10044 DAG.getConstant(1, DL0, V.getValueType()));
10045 AddToWorklist(V.getNode());
10046 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
10047 }
10048
10049 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10050 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10051 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
10052 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10053 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10054 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
10055 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10056 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10057 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10058 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10059 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10060 }
10061 }
10062 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10063 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10064 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
10065 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10066 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10067 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
10068 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10069 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10070 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10071 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10072 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10073 }
10074 }
10075
10076 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10077 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10078 SDValue Y = N0.getOperand(0);
10079 SDValue X = N0.getOperand(1);
10080
10081 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10082 APInt NotYValue = ~YConst->getAPIntValue();
10083 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10084 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10085 }
10086 }
10087
10088 // fold (not (add X, -1)) -> (neg X)
10089 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10091 return DAG.getNegative(N0.getOperand(0), DL, VT);
10092 }
10093
10094 // fold (xor (and x, y), y) -> (and (not x), y)
10095 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10096 SDValue X = N0.getOperand(0);
10097 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10098 AddToWorklist(NotX.getNode());
10099 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10100 }
10101
10102 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10103 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10104 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10105 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10106 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10107 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10108 SDValue S0 = S.getOperand(0);
10109 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10110 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10111 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10112 return DAG.getNode(ISD::ABS, DL, VT, S0);
10113 }
10114 }
10115
10116 // fold (xor x, x) -> 0
10117 if (N0 == N1)
10118 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10119
10120 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10121 // Here is a concrete example of this equivalence:
10122 // i16 x == 14
10123 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10124 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10125 //
10126 // =>
10127 //
10128 // i16 ~1 == 0b1111111111111110
10129 // i16 rol(~1, 14) == 0b1011111111111111
10130 //
10131 // Some additional tips to help conceptualize this transform:
10132 // - Try to see the operation as placing a single zero in a value of all ones.
10133 // - There exists no value for x which would allow the result to contain zero.
10134 // - Values of x larger than the bitwidth are undefined and do not require a
10135 // consistent result.
10136 // - Pushing the zero left requires shifting one bits in from the right.
10137 // A rotate left of ~1 is a nice way of achieving the desired result.
10138 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10140 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10141 N0.getOperand(1));
10142 }
10143
10144 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10145 if (N0Opcode == N1.getOpcode())
10146 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10147 return V;
10148
10149 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10150 return R;
10151 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10152 return R;
10153 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10154 return R;
10155
10156 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10157 if (SDValue MM = unfoldMaskedMerge(N))
10158 return MM;
10159
10160 // Simplify the expression using non-local knowledge.
10162 return SDValue(N, 0);
10163
10164 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10165 return Combined;
10166
10167 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10168 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10169 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10170 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10171 SDValue Op0;
10172 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10173 m_SMax(m_Value(Op0), m_Specific(N1)),
10174 m_UMin(m_Value(Op0), m_Specific(N1)),
10175 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10176
10177 if (isa<ConstantSDNode>(N1) ||
10179 // For vectors, only optimize when the constant is zero or all-ones to
10180 // avoid generating more instructions
10181 if (VT.isVector()) {
10182 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10183 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10184 return SDValue();
10185 }
10186
10187 // Avoid the fold if the minmax operation is legal and select is expensive
10188 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10190 return SDValue();
10191
10192 EVT CCVT = getSetCCResultType(VT);
10193 ISD::CondCode CC;
10194 switch (N0.getOpcode()) {
10195 case ISD::SMIN:
10196 CC = ISD::SETLT;
10197 break;
10198 case ISD::SMAX:
10199 CC = ISD::SETGT;
10200 break;
10201 case ISD::UMIN:
10202 CC = ISD::SETULT;
10203 break;
10204 case ISD::UMAX:
10205 CC = ISD::SETUGT;
10206 break;
10207 }
10208 SDValue FN1 = DAG.getFreeze(N1);
10209 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10210 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10211 SDValue Zero = DAG.getConstant(0, DL, VT);
10212 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10213 }
10214 }
10215
10216 return SDValue();
10217}
10218
10219/// If we have a shift-by-constant of a bitwise logic op that itself has a
10220/// shift-by-constant operand with identical opcode, we may be able to convert
10221/// that into 2 independent shifts followed by the logic op. This is a
10222/// throughput improvement.
10224 // Match a one-use bitwise logic op.
10225 SDValue LogicOp = Shift->getOperand(0);
10226 if (!LogicOp.hasOneUse())
10227 return SDValue();
10228
10229 unsigned LogicOpcode = LogicOp.getOpcode();
10230 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10231 LogicOpcode != ISD::XOR)
10232 return SDValue();
10233
10234 // Find a matching one-use shift by constant.
10235 unsigned ShiftOpcode = Shift->getOpcode();
10236 SDValue C1 = Shift->getOperand(1);
10237 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10238 assert(C1Node && "Expected a shift with constant operand");
10239 const APInt &C1Val = C1Node->getAPIntValue();
10240 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10241 const APInt *&ShiftAmtVal) {
10242 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10243 return false;
10244
10245 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10246 if (!ShiftCNode)
10247 return false;
10248
10249 // Capture the shifted operand and shift amount value.
10250 ShiftOp = V.getOperand(0);
10251 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10252
10253 // Shift amount types do not have to match their operand type, so check that
10254 // the constants are the same width.
10255 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10256 return false;
10257
10258 // The fold is not valid if the sum of the shift values doesn't fit in the
10259 // given shift amount type.
10260 bool Overflow = false;
10261 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10262 if (Overflow)
10263 return false;
10264
10265 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10266 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10267 return false;
10268
10269 return true;
10270 };
10271
10272 // Logic ops are commutative, so check each operand for a match.
10273 SDValue X, Y;
10274 const APInt *C0Val;
10275 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10276 Y = LogicOp.getOperand(1);
10277 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10278 Y = LogicOp.getOperand(0);
10279 else
10280 return SDValue();
10281
10282 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10283 SDLoc DL(Shift);
10284 EVT VT = Shift->getValueType(0);
10285 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10286 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10287 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10288 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10289 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10290 LogicOp->getFlags());
10291}
10292
10293/// Handle transforms common to the three shifts, when the shift amount is a
10294/// constant.
10295/// We are looking for: (shift being one of shl/sra/srl)
10296/// shift (binop X, C0), C1
10297/// And want to transform into:
10298/// binop (shift X, C1), (shift C0, C1)
10299SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10300 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10301
10302 // Do not turn a 'not' into a regular xor.
10303 if (isBitwiseNot(N->getOperand(0)))
10304 return SDValue();
10305
10306 // The inner binop must be one-use, since we want to replace it.
10307 SDValue LHS = N->getOperand(0);
10308 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10309 return SDValue();
10310
10311 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10312 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10313 return R;
10314
10315 // We want to pull some binops through shifts, so that we have (and (shift))
10316 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10317 // thing happens with address calculations, so it's important to canonicalize
10318 // it.
10319 switch (LHS.getOpcode()) {
10320 default:
10321 return SDValue();
10322 case ISD::OR:
10323 case ISD::XOR:
10324 case ISD::AND:
10325 break;
10326 case ISD::ADD:
10327 if (N->getOpcode() != ISD::SHL)
10328 return SDValue(); // only shl(add) not sr[al](add).
10329 break;
10330 }
10331
10332 // FIXME: disable this unless the input to the binop is a shift by a constant
10333 // or is copy/select. Enable this in other cases when figure out it's exactly
10334 // profitable.
10335 SDValue BinOpLHSVal = LHS.getOperand(0);
10336 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10337 BinOpLHSVal.getOpcode() == ISD::SRA ||
10338 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10339 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10340 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10341 BinOpLHSVal.getOpcode() == ISD::SELECT;
10342
10343 if (!IsShiftByConstant && !IsCopyOrSelect)
10344 return SDValue();
10345
10346 if (IsCopyOrSelect && N->hasOneUse())
10347 return SDValue();
10348
10349 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10350 SDLoc DL(N);
10351 EVT VT = N->getValueType(0);
10352 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10353 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10354 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10355 N->getOperand(1));
10356 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10357 }
10358
10359 return SDValue();
10360}
10361
10362SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10363 assert(N->getOpcode() == ISD::TRUNCATE);
10364 assert(N->getOperand(0).getOpcode() == ISD::AND);
10365
10366 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10367 EVT TruncVT = N->getValueType(0);
10368 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10369 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10370 SDValue N01 = N->getOperand(0).getOperand(1);
10371 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10372 SDLoc DL(N);
10373 SDValue N00 = N->getOperand(0).getOperand(0);
10374 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10375 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10376 AddToWorklist(Trunc00.getNode());
10377 AddToWorklist(Trunc01.getNode());
10378 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10379 }
10380 }
10381
10382 return SDValue();
10383}
10384
10385SDValue DAGCombiner::visitRotate(SDNode *N) {
10386 SDLoc dl(N);
10387 SDValue N0 = N->getOperand(0);
10388 SDValue N1 = N->getOperand(1);
10389 EVT VT = N->getValueType(0);
10390 unsigned Bitsize = VT.getScalarSizeInBits();
10391
10392 // fold (rot x, 0) -> x
10393 if (isNullOrNullSplat(N1))
10394 return N0;
10395
10396 // fold (rot x, c) -> x iff (c % BitSize) == 0
10397 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10398 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10399 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10400 return N0;
10401 }
10402
10403 // fold (rot x, c) -> (rot x, c % BitSize)
10404 bool OutOfRange = false;
10405 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10406 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10407 return true;
10408 };
10409 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10410 EVT AmtVT = N1.getValueType();
10411 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10412 if (SDValue Amt =
10413 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10414 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10415 }
10416
10417 // rot i16 X, 8 --> bswap X
10418 auto *RotAmtC = isConstOrConstSplat(N1);
10419 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10420 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10421 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10422
10423 // Simplify the operands using demanded-bits information.
10425 return SDValue(N, 0);
10426
10427 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10428 if (N1.getOpcode() == ISD::TRUNCATE &&
10429 N1.getOperand(0).getOpcode() == ISD::AND) {
10430 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10431 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10432 }
10433
10434 unsigned NextOp = N0.getOpcode();
10435
10436 // fold (rot* (rot* x, c2), c1)
10437 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10438 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10439 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10441 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10442 EVT ShiftVT = N1.getValueType();
10443 bool SameSide = (N->getOpcode() == NextOp);
10444 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10445 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10446 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10447 {N1, BitsizeC});
10448 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10449 {N0.getOperand(1), BitsizeC});
10450 if (Norm1 && Norm2)
10451 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10452 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10453 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10454 {CombinedShift, BitsizeC});
10455 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10456 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10457 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10458 CombinedShiftNorm);
10459 }
10460 }
10461 }
10462 return SDValue();
10463}
10464
10465SDValue DAGCombiner::visitSHL(SDNode *N) {
10466 SDValue N0 = N->getOperand(0);
10467 SDValue N1 = N->getOperand(1);
10468 if (SDValue V = DAG.simplifyShift(N0, N1))
10469 return V;
10470
10471 SDLoc DL(N);
10472 EVT VT = N0.getValueType();
10473 EVT ShiftVT = N1.getValueType();
10474 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10475
10476 // fold (shl c1, c2) -> c1<<c2
10477 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10478 return C;
10479
10480 // fold vector ops
10481 if (VT.isVector()) {
10482 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10483 return FoldedVOp;
10484
10485 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10486 // If setcc produces all-one true value then:
10487 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10488 if (N1CV && N1CV->isConstant()) {
10489 if (N0.getOpcode() == ISD::AND) {
10490 SDValue N00 = N0->getOperand(0);
10491 SDValue N01 = N0->getOperand(1);
10492 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10493
10494 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10497 if (SDValue C =
10498 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10499 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10500 }
10501 }
10502 }
10503 }
10504
10505 if (SDValue NewSel = foldBinOpIntoSelect(N))
10506 return NewSel;
10507
10508 // if (shl x, c) is known to be zero, return 0
10509 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10510 return DAG.getConstant(0, DL, VT);
10511
10512 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10513 if (N1.getOpcode() == ISD::TRUNCATE &&
10514 N1.getOperand(0).getOpcode() == ISD::AND) {
10515 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10516 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10517 }
10518
10519 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10520 if (N0.getOpcode() == ISD::SHL) {
10521 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10522 ConstantSDNode *RHS) {
10523 APInt c1 = LHS->getAPIntValue();
10524 APInt c2 = RHS->getAPIntValue();
10525 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10526 return (c1 + c2).uge(OpSizeInBits);
10527 };
10528 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10529 return DAG.getConstant(0, DL, VT);
10530
10531 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10532 ConstantSDNode *RHS) {
10533 APInt c1 = LHS->getAPIntValue();
10534 APInt c2 = RHS->getAPIntValue();
10535 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10536 return (c1 + c2).ult(OpSizeInBits);
10537 };
10538 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10539 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10540 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10541 }
10542 }
10543
10544 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10545 // For this to be valid, the second form must not preserve any of the bits
10546 // that are shifted out by the inner shift in the first form. This means
10547 // the outer shift size must be >= the number of bits added by the ext.
10548 // As a corollary, we don't care what kind of ext it is.
10549 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10550 N0.getOpcode() == ISD::ANY_EXTEND ||
10551 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10552 N0.getOperand(0).getOpcode() == ISD::SHL) {
10553 SDValue N0Op0 = N0.getOperand(0);
10554 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10555 EVT InnerVT = N0Op0.getValueType();
10556 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10557
10558 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10559 ConstantSDNode *RHS) {
10560 APInt c1 = LHS->getAPIntValue();
10561 APInt c2 = RHS->getAPIntValue();
10562 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10563 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10564 (c1 + c2).uge(OpSizeInBits);
10565 };
10566 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10567 /*AllowUndefs*/ false,
10568 /*AllowTypeMismatch*/ true))
10569 return DAG.getConstant(0, DL, VT);
10570
10571 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10572 ConstantSDNode *RHS) {
10573 APInt c1 = LHS->getAPIntValue();
10574 APInt c2 = RHS->getAPIntValue();
10575 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10576 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10577 (c1 + c2).ult(OpSizeInBits);
10578 };
10579 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10580 /*AllowUndefs*/ false,
10581 /*AllowTypeMismatch*/ true)) {
10582 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10583 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10584 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10585 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10586 }
10587 }
10588
10589 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10590 // Only fold this if the inner zext has no other uses to avoid increasing
10591 // the total number of instructions.
10592 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10593 N0.getOperand(0).getOpcode() == ISD::SRL) {
10594 SDValue N0Op0 = N0.getOperand(0);
10595 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10596
10597 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10598 APInt c1 = LHS->getAPIntValue();
10599 APInt c2 = RHS->getAPIntValue();
10600 zeroExtendToMatch(c1, c2);
10601 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10602 };
10603 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10604 /*AllowUndefs*/ false,
10605 /*AllowTypeMismatch*/ true)) {
10606 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10607 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10608 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10609 AddToWorklist(NewSHL.getNode());
10610 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10611 }
10612 }
10613
10614 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10615 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10616 ConstantSDNode *RHS) {
10617 const APInt &LHSC = LHS->getAPIntValue();
10618 const APInt &RHSC = RHS->getAPIntValue();
10619 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10620 LHSC.getZExtValue() <= RHSC.getZExtValue();
10621 };
10622
10623 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10624 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10625 if (N0->getFlags().hasExact()) {
10626 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10627 /*AllowUndefs*/ false,
10628 /*AllowTypeMismatch*/ true)) {
10629 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10630 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10631 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10632 }
10633 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10634 /*AllowUndefs*/ false,
10635 /*AllowTypeMismatch*/ true)) {
10636 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10637 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10638 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10639 }
10640 }
10641
10642 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10643 // (and (srl x, (sub c1, c2), MASK)
10644 // Only fold this if the inner shift has no other uses -- if it does,
10645 // folding this will increase the total number of instructions.
10646 if (N0.getOpcode() == ISD::SRL &&
10647 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10649 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10650 /*AllowUndefs*/ false,
10651 /*AllowTypeMismatch*/ true)) {
10652 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10653 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10654 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10655 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10656 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10657 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10658 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10659 }
10660 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10661 /*AllowUndefs*/ false,
10662 /*AllowTypeMismatch*/ true)) {
10663 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10664 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10665 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10666 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10667 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10668 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10669 }
10670 }
10671 }
10672
10673 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10674 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10675 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10676 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10677 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10678 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10679 }
10680
10681 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10682 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10683 // Variant of version done on multiply, except mul by a power of 2 is turned
10684 // into a shift.
10685 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10686 TLI.isDesirableToCommuteWithShift(N, Level)) {
10687 SDValue N01 = N0.getOperand(1);
10688 if (SDValue Shl1 =
10689 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10690 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10691 AddToWorklist(Shl0.getNode());
10692 SDNodeFlags Flags;
10693 // Preserve the disjoint flag for Or.
10694 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10696 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10697 }
10698 }
10699
10700 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10701 // TODO: Add zext/add_nuw variant with suitable test coverage
10702 // TODO: Should we limit this with isLegalAddImmediate?
10703 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10704 N0.getOperand(0).getOpcode() == ISD::ADD &&
10705 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10706 TLI.isDesirableToCommuteWithShift(N, Level)) {
10707 SDValue Add = N0.getOperand(0);
10708 SDLoc DL(N0);
10709 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10710 {Add.getOperand(1)})) {
10711 if (SDValue ShlC =
10712 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10713 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10714 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10715 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10716 }
10717 }
10718 }
10719
10720 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10721 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10722 SDValue N01 = N0.getOperand(1);
10723 if (SDValue Shl =
10724 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10725 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10726 }
10727
10728 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10729 if (N1C && !N1C->isOpaque())
10730 if (SDValue NewSHL = visitShiftByConstant(N))
10731 return NewSHL;
10732
10733 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10734 // target.
10735 if (((N1.getOpcode() == ISD::CTTZ &&
10736 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10738 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10740 SDValue Y = N1.getOperand(0);
10741 SDLoc DL(N);
10742 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10743 SDValue And =
10744 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10745 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10746 }
10747
10749 return SDValue(N, 0);
10750
10751 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10752 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10753 const APInt &C0 = N0.getConstantOperandAPInt(0);
10754 const APInt &C1 = N1C->getAPIntValue();
10755 return DAG.getVScale(DL, VT, C0 << C1);
10756 }
10757
10758 SDValue X;
10759 APInt VS0;
10760
10761 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10762 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10763 SDNodeFlags Flags;
10764 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10765 N0->getFlags().hasNoUnsignedWrap());
10766
10767 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10768 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10769 }
10770
10771 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10772 APInt ShlVal;
10773 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10774 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10775 const APInt &C0 = N0.getConstantOperandAPInt(0);
10776 if (ShlVal.ult(C0.getBitWidth())) {
10777 APInt NewStep = C0 << ShlVal;
10778 return DAG.getStepVector(DL, VT, NewStep);
10779 }
10780 }
10781
10782 return SDValue();
10783}
10784
10785// Transform a right shift of a multiply into a multiply-high.
10786// Examples:
10787// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10788// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10790 const TargetLowering &TLI) {
10791 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10792 "SRL or SRA node is required here!");
10793
10794 // Check the shift amount. Proceed with the transformation if the shift
10795 // amount is constant.
10796 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10797 if (!ShiftAmtSrc)
10798 return SDValue();
10799
10800 // The operation feeding into the shift must be a multiply.
10801 SDValue ShiftOperand = N->getOperand(0);
10802 if (ShiftOperand.getOpcode() != ISD::MUL)
10803 return SDValue();
10804
10805 // Both operands must be equivalent extend nodes.
10806 SDValue LeftOp = ShiftOperand.getOperand(0);
10807 SDValue RightOp = ShiftOperand.getOperand(1);
10808
10809 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10810 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10811
10812 if (!IsSignExt && !IsZeroExt)
10813 return SDValue();
10814
10815 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10816 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10817
10818 // return true if U may use the lower bits of its operands
10819 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10820 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10821 return true;
10822 }
10823 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10824 if (!UShiftAmtSrc) {
10825 return true;
10826 }
10827 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10828 return UShiftAmt < NarrowVTSize;
10829 };
10830
10831 // If the lower part of the MUL is also used and MUL_LOHI is supported
10832 // do not introduce the MULH in favor of MUL_LOHI
10833 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10834 if (!ShiftOperand.hasOneUse() &&
10835 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10836 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10837 return SDValue();
10838 }
10839
10840 SDValue MulhRightOp;
10842 unsigned ActiveBits = IsSignExt
10843 ? Constant->getAPIntValue().getSignificantBits()
10844 : Constant->getAPIntValue().getActiveBits();
10845 if (ActiveBits > NarrowVTSize)
10846 return SDValue();
10847 MulhRightOp = DAG.getConstant(
10848 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10849 NarrowVT);
10850 } else {
10851 if (LeftOp.getOpcode() != RightOp.getOpcode())
10852 return SDValue();
10853 // Check that the two extend nodes are the same type.
10854 if (NarrowVT != RightOp.getOperand(0).getValueType())
10855 return SDValue();
10856 MulhRightOp = RightOp.getOperand(0);
10857 }
10858
10859 EVT WideVT = LeftOp.getValueType();
10860 // Proceed with the transformation if the wide types match.
10861 assert((WideVT == RightOp.getValueType()) &&
10862 "Cannot have a multiply node with two different operand types.");
10863
10864 // Proceed with the transformation if the wide type is twice as large
10865 // as the narrow type.
10866 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10867 return SDValue();
10868
10869 // Check the shift amount with the narrow type size.
10870 // Proceed with the transformation if the shift amount is the width
10871 // of the narrow type.
10872 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10873 if (ShiftAmt != NarrowVTSize)
10874 return SDValue();
10875
10876 // If the operation feeding into the MUL is a sign extend (sext),
10877 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10878 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10879
10880 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10881 // or if it is a vector type then we could transform to an acceptable type and
10882 // rely on legalization to split/combine the result.
10883 if (NarrowVT.isVector()) {
10884 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10885 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10886 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10887 return SDValue();
10888 } else {
10889 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10890 return SDValue();
10891 }
10892
10893 SDValue Result =
10894 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10895 bool IsSigned = N->getOpcode() == ISD::SRA;
10896 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10897}
10898
10899// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10900// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10902 unsigned Opcode = N->getOpcode();
10903 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10904 return SDValue();
10905
10906 SDValue N0 = N->getOperand(0);
10907 EVT VT = N->getValueType(0);
10908 SDLoc DL(N);
10909 SDValue X, Y;
10910
10911 // If both operands are bswap/bitreverse, ignore the multiuse
10913 m_UnaryOp(Opcode, m_Value(Y))))))
10914 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10915
10916 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10918 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10919 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10920 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10921 }
10922
10923 return SDValue();
10924}
10925
10926SDValue DAGCombiner::visitSRA(SDNode *N) {
10927 SDValue N0 = N->getOperand(0);
10928 SDValue N1 = N->getOperand(1);
10929 if (SDValue V = DAG.simplifyShift(N0, N1))
10930 return V;
10931
10932 SDLoc DL(N);
10933 EVT VT = N0.getValueType();
10934 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10935
10936 // fold (sra c1, c2) -> (sra c1, c2)
10937 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10938 return C;
10939
10940 // Arithmetic shifting an all-sign-bit value is a no-op.
10941 // fold (sra 0, x) -> 0
10942 // fold (sra -1, x) -> -1
10943 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10944 return N0;
10945
10946 // fold vector ops
10947 if (VT.isVector())
10948 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10949 return FoldedVOp;
10950
10951 if (SDValue NewSel = foldBinOpIntoSelect(N))
10952 return NewSel;
10953
10954 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10955
10956 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10957 // clamp (add c1, c2) to max shift.
10958 if (N0.getOpcode() == ISD::SRA) {
10959 EVT ShiftVT = N1.getValueType();
10960 EVT ShiftSVT = ShiftVT.getScalarType();
10961 SmallVector<SDValue, 16> ShiftValues;
10962
10963 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10964 APInt c1 = LHS->getAPIntValue();
10965 APInt c2 = RHS->getAPIntValue();
10966 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10967 APInt Sum = c1 + c2;
10968 unsigned ShiftSum =
10969 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10970 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10971 return true;
10972 };
10973 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10974 SDValue ShiftValue;
10975 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10976 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10977 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10978 assert(ShiftValues.size() == 1 &&
10979 "Expected matchBinaryPredicate to return one element for "
10980 "SPLAT_VECTORs");
10981 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10982 } else
10983 ShiftValue = ShiftValues[0];
10984 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10985 }
10986 }
10987
10988 // fold (sra (shl X, m), (sub result_size, n))
10989 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10990 // result_size - n != m.
10991 // If truncate is free for the target sext(shl) is likely to result in better
10992 // code.
10993 if (N0.getOpcode() == ISD::SHL && N1C) {
10994 // Get the two constants of the shifts, CN0 = m, CN = n.
10995 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10996 if (N01C) {
10997 LLVMContext &Ctx = *DAG.getContext();
10998 // Determine what the truncate's result bitsize and type would be.
10999 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
11000
11001 if (VT.isVector())
11002 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11003
11004 // Determine the residual right-shift amount.
11005 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
11006
11007 // If the shift is not a no-op (in which case this should be just a sign
11008 // extend already), the truncated to type is legal, sign_extend is legal
11009 // on that type, and the truncate to that type is both legal and free,
11010 // perform the transform.
11011 if ((ShiftAmt > 0) &&
11014 TLI.isTruncateFree(VT, TruncVT)) {
11015 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
11016 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
11017 N0.getOperand(0), Amt);
11018 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
11019 Shift);
11020 return DAG.getNode(ISD::SIGN_EXTEND, DL,
11021 N->getValueType(0), Trunc);
11022 }
11023 }
11024 }
11025
11026 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
11027 // sra (add (shl X, N1C), AddC), N1C -->
11028 // sext (add (trunc X to (width - N1C)), AddC')
11029 // sra (sub AddC, (shl X, N1C)), N1C -->
11030 // sext (sub AddC1',(trunc X to (width - N1C)))
11031 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
11032 N0.hasOneUse()) {
11033 bool IsAdd = N0.getOpcode() == ISD::ADD;
11034 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
11035 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
11036 Shl.hasOneUse()) {
11037 // TODO: AddC does not need to be a splat.
11038 if (ConstantSDNode *AddC =
11039 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
11040 // Determine what the truncate's type would be and ask the target if
11041 // that is a free operation.
11042 LLVMContext &Ctx = *DAG.getContext();
11043 unsigned ShiftAmt = N1C->getZExtValue();
11044 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
11045 if (VT.isVector())
11046 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11047
11048 // TODO: The simple type check probably belongs in the default hook
11049 // implementation and/or target-specific overrides (because
11050 // non-simple types likely require masking when legalized), but
11051 // that restriction may conflict with other transforms.
11052 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
11053 TLI.isTruncateFree(VT, TruncVT)) {
11054 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
11055 SDValue ShiftC =
11056 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
11057 TruncVT.getScalarSizeInBits()),
11058 DL, TruncVT);
11059 SDValue Add;
11060 if (IsAdd)
11061 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
11062 else
11063 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
11064 return DAG.getSExtOrTrunc(Add, DL, VT);
11065 }
11066 }
11067 }
11068 }
11069
11070 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11071 if (N1.getOpcode() == ISD::TRUNCATE &&
11072 N1.getOperand(0).getOpcode() == ISD::AND) {
11073 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11074 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11075 }
11076
11077 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11078 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11079 // if c1 is equal to the number of bits the trunc removes
11080 // TODO - support non-uniform vector shift amounts.
11081 if (N0.getOpcode() == ISD::TRUNCATE &&
11082 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11083 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11084 N0.getOperand(0).hasOneUse() &&
11085 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11086 SDValue N0Op0 = N0.getOperand(0);
11087 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11088 EVT LargeVT = N0Op0.getValueType();
11089 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11090 if (LargeShift->getAPIntValue() == TruncBits) {
11091 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11092 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11093 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11094 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11095 SDValue SRA =
11096 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11097 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11098 }
11099 }
11100 }
11101
11102 // Simplify, based on bits shifted out of the LHS.
11104 return SDValue(N, 0);
11105
11106 // If the sign bit is known to be zero, switch this to a SRL.
11107 if (DAG.SignBitIsZero(N0))
11108 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11109
11110 if (N1C && !N1C->isOpaque())
11111 if (SDValue NewSRA = visitShiftByConstant(N))
11112 return NewSRA;
11113
11114 // Try to transform this shift into a multiply-high if
11115 // it matches the appropriate pattern detected in combineShiftToMULH.
11116 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11117 return MULH;
11118
11119 // Attempt to convert a sra of a load into a narrower sign-extending load.
11120 if (SDValue NarrowLoad = reduceLoadWidth(N))
11121 return NarrowLoad;
11122
11123 if (SDValue AVG = foldShiftToAvg(N, DL))
11124 return AVG;
11125
11126 return SDValue();
11127}
11128
11129SDValue DAGCombiner::visitSRL(SDNode *N) {
11130 SDValue N0 = N->getOperand(0);
11131 SDValue N1 = N->getOperand(1);
11132 if (SDValue V = DAG.simplifyShift(N0, N1))
11133 return V;
11134
11135 SDLoc DL(N);
11136 EVT VT = N0.getValueType();
11137 EVT ShiftVT = N1.getValueType();
11138 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11139
11140 // fold (srl c1, c2) -> c1 >>u c2
11141 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11142 return C;
11143
11144 // fold vector ops
11145 if (VT.isVector())
11146 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11147 return FoldedVOp;
11148
11149 if (SDValue NewSel = foldBinOpIntoSelect(N))
11150 return NewSel;
11151
11152 // if (srl x, c) is known to be zero, return 0
11153 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11154 if (N1C &&
11155 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11156 return DAG.getConstant(0, DL, VT);
11157
11158 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11159 if (N0.getOpcode() == ISD::SRL) {
11160 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11161 ConstantSDNode *RHS) {
11162 APInt c1 = LHS->getAPIntValue();
11163 APInt c2 = RHS->getAPIntValue();
11164 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11165 return (c1 + c2).uge(OpSizeInBits);
11166 };
11167 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11168 return DAG.getConstant(0, DL, VT);
11169
11170 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11171 ConstantSDNode *RHS) {
11172 APInt c1 = LHS->getAPIntValue();
11173 APInt c2 = RHS->getAPIntValue();
11174 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11175 return (c1 + c2).ult(OpSizeInBits);
11176 };
11177 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11178 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11179 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11180 }
11181 }
11182
11183 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11184 N0.getOperand(0).getOpcode() == ISD::SRL) {
11185 SDValue InnerShift = N0.getOperand(0);
11186 // TODO - support non-uniform vector shift amounts.
11187 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11188 uint64_t c1 = N001C->getZExtValue();
11189 uint64_t c2 = N1C->getZExtValue();
11190 EVT InnerShiftVT = InnerShift.getValueType();
11191 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11192 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11193 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11194 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11195 if (c1 + OpSizeInBits == InnerShiftSize) {
11196 if (c1 + c2 >= InnerShiftSize)
11197 return DAG.getConstant(0, DL, VT);
11198 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11199 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11200 InnerShift.getOperand(0), NewShiftAmt);
11201 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11202 }
11203 // In the more general case, we can clear the high bits after the shift:
11204 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11205 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11206 c1 + c2 < InnerShiftSize) {
11207 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11208 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11209 InnerShift.getOperand(0), NewShiftAmt);
11210 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11211 OpSizeInBits - c2),
11212 DL, InnerShiftVT);
11213 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11214 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11215 }
11216 }
11217 }
11218
11219 if (N0.getOpcode() == ISD::SHL) {
11220 // fold (srl (shl nuw x, c), c) -> x
11221 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11222 return N0.getOperand(0);
11223
11224 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11225 // (and (srl x, (sub c2, c1), MASK)
11226 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11228 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11229 ConstantSDNode *RHS) {
11230 const APInt &LHSC = LHS->getAPIntValue();
11231 const APInt &RHSC = RHS->getAPIntValue();
11232 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11233 LHSC.getZExtValue() <= RHSC.getZExtValue();
11234 };
11235 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11236 /*AllowUndefs*/ false,
11237 /*AllowTypeMismatch*/ true)) {
11238 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11239 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11240 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11241 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11242 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11243 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11244 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11245 }
11246 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11247 /*AllowUndefs*/ false,
11248 /*AllowTypeMismatch*/ true)) {
11249 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11250 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11251 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11252 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11253 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11254 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11255 }
11256 }
11257 }
11258
11259 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11260 // TODO - support non-uniform vector shift amounts.
11261 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11262 // Shifting in all undef bits?
11263 EVT SmallVT = N0.getOperand(0).getValueType();
11264 unsigned BitSize = SmallVT.getScalarSizeInBits();
11265 if (N1C->getAPIntValue().uge(BitSize))
11266 return DAG.getUNDEF(VT);
11267
11268 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11269 uint64_t ShiftAmt = N1C->getZExtValue();
11270 SDLoc DL0(N0);
11271 SDValue SmallShift =
11272 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11273 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11274 AddToWorklist(SmallShift.getNode());
11275 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11276 return DAG.getNode(ISD::AND, DL, VT,
11277 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11278 DAG.getConstant(Mask, DL, VT));
11279 }
11280 }
11281
11282 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11283 // bit, which is unmodified by sra.
11284 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11285 if (N0.getOpcode() == ISD::SRA)
11286 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11287 }
11288
11289 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11290 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11291 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11292 isPowerOf2_32(OpSizeInBits) &&
11293 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11294 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11295
11296 // If any of the input bits are KnownOne, then the input couldn't be all
11297 // zeros, thus the result of the srl will always be zero.
11298 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11299
11300 // If all of the bits input the to ctlz node are known to be zero, then
11301 // the result of the ctlz is "32" and the result of the shift is one.
11302 APInt UnknownBits = ~Known.Zero;
11303 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11304
11305 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11306 if (UnknownBits.isPowerOf2()) {
11307 // Okay, we know that only that the single bit specified by UnknownBits
11308 // could be set on input to the CTLZ node. If this bit is set, the SRL
11309 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11310 // to an SRL/XOR pair, which is likely to simplify more.
11311 unsigned ShAmt = UnknownBits.countr_zero();
11312 SDValue Op = N0.getOperand(0);
11313
11314 if (ShAmt) {
11315 SDLoc DL(N0);
11316 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11317 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11318 AddToWorklist(Op.getNode());
11319 }
11320 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11321 }
11322 }
11323
11324 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11325 if (N1.getOpcode() == ISD::TRUNCATE &&
11326 N1.getOperand(0).getOpcode() == ISD::AND) {
11327 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11328 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11329 }
11330
11331 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11332 // -> (logic_op (srl x, c1), (zext y))
11333 // c1 <= leadingzeros(zext(y))
11334 SDValue X, ZExtY;
11335 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11336 m_Value(X),
11339 m_Specific(N1))))))) {
11340 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11342 if (N1C->getZExtValue() <= NumLeadingZeros)
11343 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11344 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11345 }
11346
11347 // fold operands of srl based on knowledge that the low bits are not
11348 // demanded.
11350 return SDValue(N, 0);
11351
11352 if (N1C && !N1C->isOpaque())
11353 if (SDValue NewSRL = visitShiftByConstant(N))
11354 return NewSRL;
11355
11356 // Attempt to convert a srl of a load into a narrower zero-extending load.
11357 if (SDValue NarrowLoad = reduceLoadWidth(N))
11358 return NarrowLoad;
11359
11360 // Here is a common situation. We want to optimize:
11361 //
11362 // %a = ...
11363 // %b = and i32 %a, 2
11364 // %c = srl i32 %b, 1
11365 // brcond i32 %c ...
11366 //
11367 // into
11368 //
11369 // %a = ...
11370 // %b = and %a, 2
11371 // %c = setcc eq %b, 0
11372 // brcond %c ...
11373 //
11374 // However when after the source operand of SRL is optimized into AND, the SRL
11375 // itself may not be optimized further. Look for it and add the BRCOND into
11376 // the worklist.
11377 //
11378 // The also tends to happen for binary operations when SimplifyDemandedBits
11379 // is involved.
11380 //
11381 // FIXME: This is unecessary if we process the DAG in topological order,
11382 // which we plan to do. This workaround can be removed once the DAG is
11383 // processed in topological order.
11384 if (N->hasOneUse()) {
11385 SDNode *User = *N->user_begin();
11386
11387 // Look pass the truncate.
11388 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11389 User = *User->user_begin();
11390
11391 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11392 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11393 AddToWorklist(User);
11394 }
11395
11396 // Try to transform this shift into a multiply-high if
11397 // it matches the appropriate pattern detected in combineShiftToMULH.
11398 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11399 return MULH;
11400
11401 if (SDValue AVG = foldShiftToAvg(N, DL))
11402 return AVG;
11403
11404 return SDValue();
11405}
11406
11407SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11408 EVT VT = N->getValueType(0);
11409 SDValue N0 = N->getOperand(0);
11410 SDValue N1 = N->getOperand(1);
11411 SDValue N2 = N->getOperand(2);
11412 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11413 unsigned BitWidth = VT.getScalarSizeInBits();
11414 SDLoc DL(N);
11415
11416 // fold (fshl/fshr C0, C1, C2) -> C3
11417 if (SDValue C =
11418 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11419 return C;
11420
11421 // fold (fshl N0, N1, 0) -> N0
11422 // fold (fshr N0, N1, 0) -> N1
11424 if (DAG.MaskedValueIsZero(
11425 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11426 return IsFSHL ? N0 : N1;
11427
11428 auto IsUndefOrZero = [](SDValue V) {
11429 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11430 };
11431
11432 // TODO - support non-uniform vector shift amounts.
11433 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11434 EVT ShAmtTy = N2.getValueType();
11435
11436 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11437 if (Cst->getAPIntValue().uge(BitWidth)) {
11438 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11439 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11440 DAG.getConstant(RotAmt, DL, ShAmtTy));
11441 }
11442
11443 unsigned ShAmt = Cst->getZExtValue();
11444 if (ShAmt == 0)
11445 return IsFSHL ? N0 : N1;
11446
11447 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11448 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11449 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11450 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11451 if (IsUndefOrZero(N0))
11452 return DAG.getNode(
11453 ISD::SRL, DL, VT, N1,
11454 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11455 if (IsUndefOrZero(N1))
11456 return DAG.getNode(
11457 ISD::SHL, DL, VT, N0,
11458 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11459
11460 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11461 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11462 // TODO - bigendian support once we have test coverage.
11463 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11464 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11465 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11466 !DAG.getDataLayout().isBigEndian()) {
11467 auto *LHS = dyn_cast<LoadSDNode>(N0);
11468 auto *RHS = dyn_cast<LoadSDNode>(N1);
11469 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11470 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11471 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11473 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11474 SDLoc DL(RHS);
11475 uint64_t PtrOff =
11476 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11477 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11478 unsigned Fast = 0;
11479 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11480 RHS->getAddressSpace(), NewAlign,
11481 RHS->getMemOperand()->getFlags(), &Fast) &&
11482 Fast) {
11483 SDValue NewPtr = DAG.getMemBasePlusOffset(
11484 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11485 AddToWorklist(NewPtr.getNode());
11486 SDValue Load = DAG.getLoad(
11487 VT, DL, RHS->getChain(), NewPtr,
11488 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11489 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11490 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11491 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11492 return Load;
11493 }
11494 }
11495 }
11496 }
11497 }
11498
11499 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11500 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11501 // iff We know the shift amount is in range.
11502 // TODO: when is it worth doing SUB(BW, N2) as well?
11503 if (isPowerOf2_32(BitWidth)) {
11504 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11505 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11506 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11507 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11508 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11509 }
11510
11511 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11512 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11513 // TODO: Investigate flipping this rotate if only one is legal.
11514 // If funnel shift is legal as well we might be better off avoiding
11515 // non-constant (BW - N2).
11516 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11517 if (N0 == N1 && hasOperation(RotOpc, VT))
11518 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11519
11520 // Simplify, based on bits shifted out of N0/N1.
11522 return SDValue(N, 0);
11523
11524 return SDValue();
11525}
11526
11527SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11528 SDValue N0 = N->getOperand(0);
11529 SDValue N1 = N->getOperand(1);
11530 if (SDValue V = DAG.simplifyShift(N0, N1))
11531 return V;
11532
11533 SDLoc DL(N);
11534 EVT VT = N0.getValueType();
11535
11536 // fold (*shlsat c1, c2) -> c1<<c2
11537 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11538 return C;
11539
11540 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11541
11542 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11543 // fold (sshlsat x, c) -> (shl x, c)
11544 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11545 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11546 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11547
11548 // fold (ushlsat x, c) -> (shl x, c)
11549 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11550 N1C->getAPIntValue().ule(
11552 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11553 }
11554
11555 return SDValue();
11556}
11557
11558// Given a ABS node, detect the following patterns:
11559// (ABS (SUB (EXTEND a), (EXTEND b))).
11560// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11561// Generates UABD/SABD instruction.
11562SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11563 EVT SrcVT = N->getValueType(0);
11564
11565 if (N->getOpcode() == ISD::TRUNCATE)
11566 N = N->getOperand(0).getNode();
11567
11568 EVT VT = N->getValueType(0);
11569 SDValue Op0, Op1;
11570
11571 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11572 return SDValue();
11573
11574 SDValue AbsOp0 = N->getOperand(0);
11575 unsigned Opc0 = Op0.getOpcode();
11576
11577 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11578 // fallback to ValueTracking.
11579 if (Opc0 != Op1.getOpcode() ||
11580 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11581 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11582 // fold (abs (sub nsw x, y)) -> abds(x, y)
11583 // Don't fold this for unsupported types as we lose the NSW handling.
11584 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11585 (AbsOp0->getFlags().hasNoSignedWrap() ||
11586 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11587 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11588 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11589 }
11590 // fold (abs (sub x, y)) -> abdu(x, y)
11591 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11592 DAG.SignBitIsZero(Op1)) {
11593 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11594 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11595 }
11596 return SDValue();
11597 }
11598
11599 EVT VT0, VT1;
11600 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11601 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11602 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11603 } else {
11604 VT0 = Op0.getOperand(0).getValueType();
11605 VT1 = Op1.getOperand(0).getValueType();
11606 }
11607 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11608
11609 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11610 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11611 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11612 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11613 (VT1 == MaxVT || Op1->hasOneUse()) &&
11614 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11615 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11616 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11617 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11618 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11619 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11620 }
11621
11622 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11623 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11624 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11625 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11626 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11627 }
11628
11629 return SDValue();
11630}
11631
11632SDValue DAGCombiner::visitABS(SDNode *N) {
11633 SDValue N0 = N->getOperand(0);
11634 EVT VT = N->getValueType(0);
11635 SDLoc DL(N);
11636
11637 // fold (abs c1) -> c2
11638 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11639 return C;
11640 // fold (abs (abs x)) -> (abs x)
11641 if (N0.getOpcode() == ISD::ABS)
11642 return N0;
11643 // fold (abs x) -> x iff not-negative
11644 if (DAG.SignBitIsZero(N0))
11645 return N0;
11646
11647 if (SDValue ABD = foldABSToABD(N, DL))
11648 return ABD;
11649
11650 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11651 // iff zero_extend/truncate are free.
11652 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11653 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11654 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11655 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11656 hasOperation(ISD::ABS, ExtVT)) {
11657 return DAG.getNode(
11658 ISD::ZERO_EXTEND, DL, VT,
11659 DAG.getNode(ISD::ABS, DL, ExtVT,
11660 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11661 }
11662 }
11663
11664 return SDValue();
11665}
11666
11667SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11668 SDValue N0 = N->getOperand(0);
11669 EVT VT = N->getValueType(0);
11670 SDLoc DL(N);
11671
11672 // fold (bswap c1) -> c2
11673 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11674 return C;
11675 // fold (bswap (bswap x)) -> x
11676 if (N0.getOpcode() == ISD::BSWAP)
11677 return N0.getOperand(0);
11678
11679 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11680 // isn't supported, it will be expanded to bswap followed by a manual reversal
11681 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11682 // the two bswaps if the bitreverse gets expanded.
11683 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11684 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11685 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11686 }
11687
11688 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11689 // iff x >= bw/2 (i.e. lower half is known zero)
11690 unsigned BW = VT.getScalarSizeInBits();
11691 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11692 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11693 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11694 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11695 ShAmt->getZExtValue() >= (BW / 2) &&
11696 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11697 TLI.isTruncateFree(VT, HalfVT) &&
11698 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11699 SDValue Res = N0.getOperand(0);
11700 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11701 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11702 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11703 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11704 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11705 return DAG.getZExtOrTrunc(Res, DL, VT);
11706 }
11707 }
11708
11709 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11710 // inverse-shift-of-bswap:
11711 // bswap (X u<< C) --> (bswap X) u>> C
11712 // bswap (X u>> C) --> (bswap X) u<< C
11713 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11714 N0.hasOneUse()) {
11715 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11716 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11717 ShAmt->getZExtValue() % 8 == 0) {
11718 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11719 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11720 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11721 }
11722 }
11723
11724 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11725 return V;
11726
11727 return SDValue();
11728}
11729
11730SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11731 SDValue N0 = N->getOperand(0);
11732 EVT VT = N->getValueType(0);
11733 SDLoc DL(N);
11734
11735 // fold (bitreverse c1) -> c2
11736 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11737 return C;
11738
11739 // fold (bitreverse (bitreverse x)) -> x
11740 if (N0.getOpcode() == ISD::BITREVERSE)
11741 return N0.getOperand(0);
11742
11743 SDValue X, Y;
11744
11745 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11746 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11748 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11749
11750 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11751 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11753 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11754
11755 return SDValue();
11756}
11757
11758SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11759 SDValue N0 = N->getOperand(0);
11760 EVT VT = N->getValueType(0);
11761 SDLoc DL(N);
11762
11763 // fold (ctlz c1) -> c2
11764 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11765 return C;
11766
11767 // If the value is known never to be zero, switch to the undef version.
11768 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11769 if (DAG.isKnownNeverZero(N0))
11770 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11771
11772 return SDValue();
11773}
11774
11775SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11776 SDValue N0 = N->getOperand(0);
11777 EVT VT = N->getValueType(0);
11778 SDLoc DL(N);
11779
11780 // fold (ctlz_zero_undef c1) -> c2
11781 if (SDValue C =
11783 return C;
11784 return SDValue();
11785}
11786
11787SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11788 SDValue N0 = N->getOperand(0);
11789 EVT VT = N->getValueType(0);
11790 SDLoc DL(N);
11791
11792 // fold (cttz c1) -> c2
11793 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11794 return C;
11795
11796 // If the value is known never to be zero, switch to the undef version.
11797 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11798 if (DAG.isKnownNeverZero(N0))
11799 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11800
11801 return SDValue();
11802}
11803
11804SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11805 SDValue N0 = N->getOperand(0);
11806 EVT VT = N->getValueType(0);
11807 SDLoc DL(N);
11808
11809 // fold (cttz_zero_undef c1) -> c2
11810 if (SDValue C =
11812 return C;
11813 return SDValue();
11814}
11815
11816SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11817 SDValue N0 = N->getOperand(0);
11818 EVT VT = N->getValueType(0);
11819 unsigned NumBits = VT.getScalarSizeInBits();
11820 SDLoc DL(N);
11821
11822 // fold (ctpop c1) -> c2
11823 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11824 return C;
11825
11826 // If the source is being shifted, but doesn't affect any active bits,
11827 // then we can call CTPOP on the shift source directly.
11828 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11829 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11830 const APInt &Amt = AmtC->getAPIntValue();
11831 if (Amt.ult(NumBits)) {
11832 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11833 if ((N0.getOpcode() == ISD::SRL &&
11834 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11835 (N0.getOpcode() == ISD::SHL &&
11836 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11837 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11838 }
11839 }
11840 }
11841 }
11842
11843 // If the upper bits are known to be zero, then see if its profitable to
11844 // only count the lower bits.
11845 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11846 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11847 if (hasOperation(ISD::CTPOP, HalfVT) &&
11848 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11849 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11850 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11851 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11852 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11853 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11854 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11855 }
11856 }
11857 }
11858
11859 return SDValue();
11860}
11861
11863 SDValue RHS, const SDNodeFlags Flags,
11864 const TargetLowering &TLI) {
11865 EVT VT = LHS.getValueType();
11866 if (!VT.isFloatingPoint())
11867 return false;
11868
11869 return Flags.hasNoSignedZeros() &&
11871 (Flags.hasNoNaNs() ||
11872 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11873}
11874
11876 SDValue RHS, SDValue True, SDValue False,
11877 ISD::CondCode CC,
11878 const TargetLowering &TLI,
11879 SelectionDAG &DAG) {
11880 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11881 switch (CC) {
11882 case ISD::SETOLT:
11883 case ISD::SETOLE:
11884 case ISD::SETLT:
11885 case ISD::SETLE:
11886 case ISD::SETULT:
11887 case ISD::SETULE: {
11888 // Since it's known never nan to get here already, either fminnum or
11889 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11890 // expanded in terms of it.
11891 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11892 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11893 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11894
11895 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11896 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11897 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11898 return SDValue();
11899 }
11900 case ISD::SETOGT:
11901 case ISD::SETOGE:
11902 case ISD::SETGT:
11903 case ISD::SETGE:
11904 case ISD::SETUGT:
11905 case ISD::SETUGE: {
11906 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11907 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11908 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11909
11910 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11911 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11912 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11913 return SDValue();
11914 }
11915 default:
11916 return SDValue();
11917 }
11918}
11919
11920// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11921SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11922 const unsigned Opcode = N->getOpcode();
11923 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11924 return SDValue();
11925
11926 EVT VT = N->getValueType(0);
11927 bool IsUnsigned = Opcode == ISD::SRL;
11928
11929 // Captured values.
11930 SDValue A, B, Add;
11931
11932 // Match floor average as it is common to both floor/ceil avgs.
11933 if (sd_match(N, m_BinOp(Opcode,
11935 m_One()))) {
11936 // Decide whether signed or unsigned.
11937 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11938 if (!hasOperation(FloorISD, VT))
11939 return SDValue();
11940
11941 // Can't optimize adds that may wrap.
11942 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11943 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11944 return SDValue();
11945
11946 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11947 }
11948
11949 return SDValue();
11950}
11951
11952SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
11953 unsigned Opc = N->getOpcode();
11954 SDValue X, Y, Z;
11955 if (sd_match(
11957 return DAG.getNode(Opc, DL, VT, X,
11958 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
11959
11961 m_Value(Z)))))
11962 return DAG.getNode(Opc, DL, VT, X,
11963 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
11964
11965 return SDValue();
11966}
11967
11968/// Generate Min/Max node
11969SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11970 SDValue RHS, SDValue True,
11971 SDValue False, ISD::CondCode CC) {
11972 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11973 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11974
11975 // If we can't directly match this, try to see if we can pull an fneg out of
11976 // the select.
11978 True, DAG, LegalOperations, ForCodeSize);
11979 if (!NegTrue)
11980 return SDValue();
11981
11982 HandleSDNode NegTrueHandle(NegTrue);
11983
11984 // Try to unfold an fneg from the select if we are comparing the negated
11985 // constant.
11986 //
11987 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11988 //
11989 // TODO: Handle fabs
11990 if (LHS == NegTrue) {
11991 // If we can't directly match this, try to see if we can pull an fneg out of
11992 // the select.
11994 RHS, DAG, LegalOperations, ForCodeSize);
11995 if (NegRHS) {
11996 HandleSDNode NegRHSHandle(NegRHS);
11997 if (NegRHS == False) {
11998 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11999 False, CC, TLI, DAG);
12000 if (Combined)
12001 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
12002 }
12003 }
12004 }
12005
12006 return SDValue();
12007}
12008
12009/// If a (v)select has a condition value that is a sign-bit test, try to smear
12010/// the condition operand sign-bit across the value width and use it as a mask.
12012 SelectionDAG &DAG) {
12013 SDValue Cond = N->getOperand(0);
12014 SDValue C1 = N->getOperand(1);
12015 SDValue C2 = N->getOperand(2);
12017 return SDValue();
12018
12019 EVT VT = N->getValueType(0);
12020 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
12021 VT != Cond.getOperand(0).getValueType())
12022 return SDValue();
12023
12024 // The inverted-condition + commuted-select variants of these patterns are
12025 // canonicalized to these forms in IR.
12026 SDValue X = Cond.getOperand(0);
12027 SDValue CondC = Cond.getOperand(1);
12028 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12029 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
12031 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
12032 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12033 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12034 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
12035 }
12036 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
12037 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12038 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12039 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12040 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
12041 }
12042 return SDValue();
12043}
12044
12046 const TargetLowering &TLI) {
12047 if (!TLI.convertSelectOfConstantsToMath(VT))
12048 return false;
12049
12050 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
12051 return true;
12053 return true;
12054
12055 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12056 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
12057 return true;
12058 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
12059 return true;
12060
12061 return false;
12062}
12063
12064SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12065 SDValue Cond = N->getOperand(0);
12066 SDValue N1 = N->getOperand(1);
12067 SDValue N2 = N->getOperand(2);
12068 EVT VT = N->getValueType(0);
12069 EVT CondVT = Cond.getValueType();
12070 SDLoc DL(N);
12071
12072 if (!VT.isInteger())
12073 return SDValue();
12074
12075 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12076 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12077 if (!C1 || !C2)
12078 return SDValue();
12079
12080 if (CondVT != MVT::i1 || LegalOperations) {
12081 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12082 // We can't do this reliably if integer based booleans have different contents
12083 // to floating point based booleans. This is because we can't tell whether we
12084 // have an integer-based boolean or a floating-point-based boolean unless we
12085 // can find the SETCC that produced it and inspect its operands. This is
12086 // fairly easy if C is the SETCC node, but it can potentially be
12087 // undiscoverable (or not reasonably discoverable). For example, it could be
12088 // in another basic block or it could require searching a complicated
12089 // expression.
12090 if (CondVT.isInteger() &&
12091 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12093 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12095 C1->isZero() && C2->isOne()) {
12096 SDValue NotCond =
12097 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
12098 if (VT.bitsEq(CondVT))
12099 return NotCond;
12100 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12101 }
12102
12103 return SDValue();
12104 }
12105
12106 // Only do this before legalization to avoid conflicting with target-specific
12107 // transforms in the other direction (create a select from a zext/sext). There
12108 // is also a target-independent combine here in DAGCombiner in the other
12109 // direction for (select Cond, -1, 0) when the condition is not i1.
12110 assert(CondVT == MVT::i1 && !LegalOperations);
12111
12112 // select Cond, 1, 0 --> zext (Cond)
12113 if (C1->isOne() && C2->isZero())
12114 return DAG.getZExtOrTrunc(Cond, DL, VT);
12115
12116 // select Cond, -1, 0 --> sext (Cond)
12117 if (C1->isAllOnes() && C2->isZero())
12118 return DAG.getSExtOrTrunc(Cond, DL, VT);
12119
12120 // select Cond, 0, 1 --> zext (!Cond)
12121 if (C1->isZero() && C2->isOne()) {
12122 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12123 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12124 return NotCond;
12125 }
12126
12127 // select Cond, 0, -1 --> sext (!Cond)
12128 if (C1->isZero() && C2->isAllOnes()) {
12129 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12130 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12131 return NotCond;
12132 }
12133
12134 // Use a target hook because some targets may prefer to transform in the
12135 // other direction.
12137 return SDValue();
12138
12139 // For any constants that differ by 1, we can transform the select into
12140 // an extend and add.
12141 const APInt &C1Val = C1->getAPIntValue();
12142 const APInt &C2Val = C2->getAPIntValue();
12143
12144 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12145 if (C1Val - 1 == C2Val) {
12146 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12147 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12148 }
12149
12150 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12151 if (C1Val + 1 == C2Val) {
12152 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12153 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12154 }
12155
12156 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12157 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12158 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12159 SDValue ShAmtC =
12160 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12161 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12162 }
12163
12164 // select Cond, -1, C --> or (sext Cond), C
12165 if (C1->isAllOnes()) {
12166 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12167 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12168 }
12169
12170 // select Cond, C, -1 --> or (sext (not Cond)), C
12171 if (C2->isAllOnes()) {
12172 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12173 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12174 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12175 }
12176
12178 return V;
12179
12180 return SDValue();
12181}
12182
12183template <class MatchContextClass>
12185 SelectionDAG &DAG) {
12186 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12187 N->getOpcode() == ISD::VP_SELECT) &&
12188 "Expected a (v)(vp.)select");
12189 SDValue Cond = N->getOperand(0);
12190 SDValue T = N->getOperand(1), F = N->getOperand(2);
12191 EVT VT = N->getValueType(0);
12192 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12193 MatchContextClass matcher(DAG, TLI, N);
12194
12195 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12196 return SDValue();
12197
12198 // select Cond, Cond, F --> or Cond, freeze(F)
12199 // select Cond, 1, F --> or Cond, freeze(F)
12200 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12201 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12202
12203 // select Cond, T, Cond --> and Cond, freeze(T)
12204 // select Cond, T, 0 --> and Cond, freeze(T)
12205 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12206 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12207
12208 // select Cond, T, 1 --> or (not Cond), freeze(T)
12209 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12210 SDValue NotCond =
12211 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12212 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12213 }
12214
12215 // select Cond, 0, F --> and (not Cond), freeze(F)
12216 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12217 SDValue NotCond =
12218 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12219 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12220 }
12221
12222 return SDValue();
12223}
12224
12226 SDValue N0 = N->getOperand(0);
12227 SDValue N1 = N->getOperand(1);
12228 SDValue N2 = N->getOperand(2);
12229 EVT VT = N->getValueType(0);
12230 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12231
12232 SDValue Cond0, Cond1;
12233 ISD::CondCode CC;
12234 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12235 m_CondCode(CC)))) ||
12236 VT != Cond0.getValueType())
12237 return SDValue();
12238
12239 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12240 // compare is inverted from that pattern ("Cond0 s> -1").
12241 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12242 ; // This is the pattern we are looking for.
12243 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12244 std::swap(N1, N2);
12245 else
12246 return SDValue();
12247
12248 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12249 if (isNullOrNullSplat(N2)) {
12250 SDLoc DL(N);
12251 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12252 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12253 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12254 }
12255
12256 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12257 if (isAllOnesOrAllOnesSplat(N1)) {
12258 SDLoc DL(N);
12259 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12260 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12261 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12262 }
12263
12264 // If we have to invert the sign bit mask, only do that transform if the
12265 // target has a bitwise 'and not' instruction (the invert is free).
12266 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12267 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12268 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12269 SDLoc DL(N);
12270 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12271 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12272 SDValue Not = DAG.getNOT(DL, Sra, VT);
12273 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12274 }
12275
12276 // TODO: There's another pattern in this family, but it may require
12277 // implementing hasOrNot() to check for profitability:
12278 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12279
12280 return SDValue();
12281}
12282
12283// Match SELECTs with absolute difference patterns.
12284// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12285// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12286// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12287// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12288SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12289 SDValue False, ISD::CondCode CC,
12290 const SDLoc &DL) {
12291 bool IsSigned = isSignedIntSetCC(CC);
12292 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12293 EVT VT = LHS.getValueType();
12294
12295 if (LegalOperations && !hasOperation(ABDOpc, VT))
12296 return SDValue();
12297
12298 switch (CC) {
12299 case ISD::SETGT:
12300 case ISD::SETGE:
12301 case ISD::SETUGT:
12302 case ISD::SETUGE:
12303 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12305 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12306 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12307 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12308 hasOperation(ABDOpc, VT))
12309 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12310 break;
12311 case ISD::SETLT:
12312 case ISD::SETLE:
12313 case ISD::SETULT:
12314 case ISD::SETULE:
12315 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12317 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12318 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12319 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12320 hasOperation(ABDOpc, VT))
12321 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12322 break;
12323 default:
12324 break;
12325 }
12326
12327 return SDValue();
12328}
12329
12330// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12331// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12332SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12333 SDValue False, ISD::CondCode CC,
12334 const SDLoc &DL) {
12335 APInt C;
12336 EVT VT = True.getValueType();
12337 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12338 if (CC == ISD::SETUGT && LHS == False &&
12339 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12340 SDValue AddC = DAG.getConstant(~C, DL, VT);
12341 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12342 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12343 }
12344 if (CC == ISD::SETULT && LHS == True &&
12345 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12346 SDValue AddC = DAG.getConstant(-C, DL, VT);
12347 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12348 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12349 }
12350 }
12351 return SDValue();
12352}
12353
12354SDValue DAGCombiner::visitSELECT(SDNode *N) {
12355 SDValue N0 = N->getOperand(0);
12356 SDValue N1 = N->getOperand(1);
12357 SDValue N2 = N->getOperand(2);
12358 EVT VT = N->getValueType(0);
12359 EVT VT0 = N0.getValueType();
12360 SDLoc DL(N);
12361 SDNodeFlags Flags = N->getFlags();
12362
12363 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12364 return V;
12365
12367 return V;
12368
12369 // select (not Cond), N1, N2 -> select Cond, N2, N1
12370 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12371 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12372
12373 if (SDValue V = foldSelectOfConstants(N))
12374 return V;
12375
12376 // If we can fold this based on the true/false value, do so.
12377 if (SimplifySelectOps(N, N1, N2))
12378 return SDValue(N, 0); // Don't revisit N.
12379
12380 if (VT0 == MVT::i1) {
12381 // The code in this block deals with the following 2 equivalences:
12382 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12383 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12384 // The target can specify its preferred form with the
12385 // shouldNormalizeToSelectSequence() callback. However we always transform
12386 // to the right anyway if we find the inner select exists in the DAG anyway
12387 // and we always transform to the left side if we know that we can further
12388 // optimize the combination of the conditions.
12389 bool normalizeToSequence =
12391 // select (and Cond0, Cond1), X, Y
12392 // -> select Cond0, (select Cond1, X, Y), Y
12393 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12394 SDValue Cond0 = N0->getOperand(0);
12395 SDValue Cond1 = N0->getOperand(1);
12396 SDValue InnerSelect =
12397 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12398 if (normalizeToSequence || !InnerSelect.use_empty())
12399 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12400 InnerSelect, N2, Flags);
12401 // Cleanup on failure.
12402 if (InnerSelect.use_empty())
12403 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12404 }
12405 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12406 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12407 SDValue Cond0 = N0->getOperand(0);
12408 SDValue Cond1 = N0->getOperand(1);
12409 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12410 Cond1, N1, N2, Flags);
12411 if (normalizeToSequence || !InnerSelect.use_empty())
12412 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12413 InnerSelect, Flags);
12414 // Cleanup on failure.
12415 if (InnerSelect.use_empty())
12416 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12417 }
12418
12419 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12420 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12421 SDValue N1_0 = N1->getOperand(0);
12422 SDValue N1_1 = N1->getOperand(1);
12423 SDValue N1_2 = N1->getOperand(2);
12424 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12425 // Create the actual and node if we can generate good code for it.
12426 if (!normalizeToSequence) {
12427 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12428 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12429 N2, Flags);
12430 }
12431 // Otherwise see if we can optimize the "and" to a better pattern.
12432 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12433 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12434 N2, Flags);
12435 }
12436 }
12437 }
12438 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12439 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12440 SDValue N2_0 = N2->getOperand(0);
12441 SDValue N2_1 = N2->getOperand(1);
12442 SDValue N2_2 = N2->getOperand(2);
12443 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12444 // Create the actual or node if we can generate good code for it.
12445 if (!normalizeToSequence) {
12446 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12447 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12448 N2_2, Flags);
12449 }
12450 // Otherwise see if we can optimize to a better pattern.
12451 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12452 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12453 N2_2, Flags);
12454 }
12455 }
12456
12457 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12458 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12459 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12460 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12461 N2.getOperand(1) == N1.getOperand(0) &&
12462 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12463 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12464
12465 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12466 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12467 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12468 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12469 N2.getOperand(1) == N1.getOperand(0) &&
12470 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12471 return DAG.getNegative(
12472 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12473 DL, VT);
12474 }
12475
12476 // Fold selects based on a setcc into other things, such as min/max/abs.
12477 if (N0.getOpcode() == ISD::SETCC) {
12478 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12480
12481 // select (fcmp lt x, y), x, y -> fminnum x, y
12482 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12483 //
12484 // This is OK if we don't care what happens if either operand is a NaN.
12485 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12486 if (SDValue FMinMax =
12487 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12488 return FMinMax;
12489
12490 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12491 // This is conservatively limited to pre-legal-operations to give targets
12492 // a chance to reverse the transform if they want to do that. Also, it is
12493 // unlikely that the pattern would be formed late, so it's probably not
12494 // worth going through the other checks.
12495 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12496 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12497 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12498 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12499 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12500 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12501 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12502 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12503 //
12504 // The IR equivalent of this transform would have this form:
12505 // %a = add %x, C
12506 // %c = icmp ugt %x, ~C
12507 // %r = select %c, -1, %a
12508 // =>
12509 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12510 // %u0 = extractvalue %u, 0
12511 // %u1 = extractvalue %u, 1
12512 // %r = select %u1, -1, %u0
12513 SDVTList VTs = DAG.getVTList(VT, VT0);
12514 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12515 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12516 }
12517 }
12518
12519 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12520 (!LegalOperations &&
12522 // Any flags available in a select/setcc fold will be on the setcc as they
12523 // migrated from fcmp
12524 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12525 N0.getOperand(2), N0->getFlags());
12526 }
12527
12528 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12529 return ABD;
12530
12531 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12532 return NewSel;
12533
12534 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12535 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12536 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12537 return UMin;
12538 }
12539
12540 if (!VT.isVector())
12541 if (SDValue BinOp = foldSelectOfBinops(N))
12542 return BinOp;
12543
12544 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12545 return R;
12546
12547 return SDValue();
12548}
12549
12550// This function assumes all the vselect's arguments are CONCAT_VECTOR
12551// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12553 SDLoc DL(N);
12554 SDValue Cond = N->getOperand(0);
12555 SDValue LHS = N->getOperand(1);
12556 SDValue RHS = N->getOperand(2);
12557 EVT VT = N->getValueType(0);
12558 int NumElems = VT.getVectorNumElements();
12559 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12560 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12561 Cond.getOpcode() == ISD::BUILD_VECTOR);
12562
12563 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12564 // binary ones here.
12565 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12566 return SDValue();
12567
12568 // We're sure we have an even number of elements due to the
12569 // concat_vectors we have as arguments to vselect.
12570 // Skip BV elements until we find one that's not an UNDEF
12571 // After we find an UNDEF element, keep looping until we get to half the
12572 // length of the BV and see if all the non-undef nodes are the same.
12573 ConstantSDNode *BottomHalf = nullptr;
12574 for (int i = 0; i < NumElems / 2; ++i) {
12575 if (Cond->getOperand(i)->isUndef())
12576 continue;
12577
12578 if (BottomHalf == nullptr)
12579 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12580 else if (Cond->getOperand(i).getNode() != BottomHalf)
12581 return SDValue();
12582 }
12583
12584 // Do the same for the second half of the BuildVector
12585 ConstantSDNode *TopHalf = nullptr;
12586 for (int i = NumElems / 2; i < NumElems; ++i) {
12587 if (Cond->getOperand(i)->isUndef())
12588 continue;
12589
12590 if (TopHalf == nullptr)
12591 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12592 else if (Cond->getOperand(i).getNode() != TopHalf)
12593 return SDValue();
12594 }
12595
12596 assert(TopHalf && BottomHalf &&
12597 "One half of the selector was all UNDEFs and the other was all the "
12598 "same value. This should have been addressed before this function.");
12599 return DAG.getNode(
12601 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12602 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12603}
12604
12605bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12606 SelectionDAG &DAG, const SDLoc &DL) {
12607
12608 // Only perform the transformation when existing operands can be reused.
12609 if (IndexIsScaled)
12610 return false;
12611
12612 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12613 return false;
12614
12615 EVT VT = BasePtr.getValueType();
12616
12617 if (SDValue SplatVal = DAG.getSplatValue(Index);
12618 SplatVal && !isNullConstant(SplatVal) &&
12619 SplatVal.getValueType() == VT) {
12620 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12621 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12622 return true;
12623 }
12624
12625 if (Index.getOpcode() != ISD::ADD)
12626 return false;
12627
12628 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12629 SplatVal && SplatVal.getValueType() == VT) {
12630 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12631 Index = Index.getOperand(1);
12632 return true;
12633 }
12634 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12635 SplatVal && SplatVal.getValueType() == VT) {
12636 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12637 Index = Index.getOperand(0);
12638 return true;
12639 }
12640 return false;
12641}
12642
12643// Fold sext/zext of index into index type.
12644bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12645 SelectionDAG &DAG) {
12646 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12647
12648 // It's always safe to look through zero extends.
12649 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12650 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12651 IndexType = ISD::UNSIGNED_SCALED;
12652 Index = Index.getOperand(0);
12653 return true;
12654 }
12655 if (ISD::isIndexTypeSigned(IndexType)) {
12656 IndexType = ISD::UNSIGNED_SCALED;
12657 return true;
12658 }
12659 }
12660
12661 // It's only safe to look through sign extends when Index is signed.
12662 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12663 ISD::isIndexTypeSigned(IndexType) &&
12664 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12665 Index = Index.getOperand(0);
12666 return true;
12667 }
12668
12669 return false;
12670}
12671
12672SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12673 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12674 SDValue Mask = MSC->getMask();
12675 SDValue Chain = MSC->getChain();
12676 SDValue Index = MSC->getIndex();
12677 SDValue Scale = MSC->getScale();
12678 SDValue StoreVal = MSC->getValue();
12679 SDValue BasePtr = MSC->getBasePtr();
12680 SDValue VL = MSC->getVectorLength();
12681 ISD::MemIndexType IndexType = MSC->getIndexType();
12682 SDLoc DL(N);
12683
12684 // Zap scatters with a zero mask.
12686 return Chain;
12687
12688 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12689 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12690 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12691 DL, Ops, MSC->getMemOperand(), IndexType);
12692 }
12693
12694 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12695 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12696 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12697 DL, Ops, MSC->getMemOperand(), IndexType);
12698 }
12699
12700 return SDValue();
12701}
12702
12703SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12704 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12705 SDValue Mask = MSC->getMask();
12706 SDValue Chain = MSC->getChain();
12707 SDValue Index = MSC->getIndex();
12708 SDValue Scale = MSC->getScale();
12709 SDValue StoreVal = MSC->getValue();
12710 SDValue BasePtr = MSC->getBasePtr();
12711 ISD::MemIndexType IndexType = MSC->getIndexType();
12712 SDLoc DL(N);
12713
12714 // Zap scatters with a zero mask.
12716 return Chain;
12717
12718 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12719 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12720 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12721 DL, Ops, MSC->getMemOperand(), IndexType,
12722 MSC->isTruncatingStore());
12723 }
12724
12725 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12726 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12727 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12728 DL, Ops, MSC->getMemOperand(), IndexType,
12729 MSC->isTruncatingStore());
12730 }
12731
12732 return SDValue();
12733}
12734
12735SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12736 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12737 SDValue Mask = MST->getMask();
12738 SDValue Chain = MST->getChain();
12739 SDValue Value = MST->getValue();
12740 SDValue Ptr = MST->getBasePtr();
12741
12742 // Zap masked stores with a zero mask.
12744 return Chain;
12745
12746 // Remove a masked store if base pointers and masks are equal.
12747 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12748 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12749 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12750 !MST->getBasePtr().isUndef() &&
12751 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12752 MST1->getMemoryVT().getStoreSize()) ||
12754 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12755 MST->getMemoryVT().getStoreSize())) {
12756 CombineTo(MST1, MST1->getChain());
12757 if (N->getOpcode() != ISD::DELETED_NODE)
12758 AddToWorklist(N);
12759 return SDValue(N, 0);
12760 }
12761 }
12762
12763 // If this is a masked load with an all ones mask, we can use a unmasked load.
12764 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12765 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12766 !MST->isCompressingStore() && !MST->isTruncatingStore())
12767 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12768 MST->getBasePtr(), MST->getPointerInfo(),
12769 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12770 MST->getAAInfo());
12771
12772 // Try transforming N to an indexed store.
12773 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12774 return SDValue(N, 0);
12775
12776 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12777 Value.getValueType().isInteger() &&
12779 !cast<ConstantSDNode>(Value)->isOpaque())) {
12780 APInt TruncDemandedBits =
12781 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12783
12784 // See if we can simplify the operation with
12785 // SimplifyDemandedBits, which only works if the value has a single use.
12786 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12787 // Re-visit the store if anything changed and the store hasn't been merged
12788 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12789 // node back to the worklist if necessary, but we also need to re-visit
12790 // the Store node itself.
12791 if (N->getOpcode() != ISD::DELETED_NODE)
12792 AddToWorklist(N);
12793 return SDValue(N, 0);
12794 }
12795 }
12796
12797 // If this is a TRUNC followed by a masked store, fold this into a masked
12798 // truncating store. We can do this even if this is already a masked
12799 // truncstore.
12800 // TODO: Try combine to masked compress store if possiable.
12801 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12802 MST->isUnindexed() && !MST->isCompressingStore() &&
12803 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12804 MST->getMemoryVT(), LegalOperations)) {
12805 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12806 Value.getOperand(0).getValueType());
12807 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12808 MST->getOffset(), Mask, MST->getMemoryVT(),
12809 MST->getMemOperand(), MST->getAddressingMode(),
12810 /*IsTruncating=*/true);
12811 }
12812
12813 return SDValue();
12814}
12815
12816SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12817 auto *SST = cast<VPStridedStoreSDNode>(N);
12818 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12819 // Combine strided stores with unit-stride to a regular VP store.
12820 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12821 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12822 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12823 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12824 SST->getVectorLength(), SST->getMemoryVT(),
12825 SST->getMemOperand(), SST->getAddressingMode(),
12826 SST->isTruncatingStore(), SST->isCompressingStore());
12827 }
12828 return SDValue();
12829}
12830
12831SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12832 SDLoc DL(N);
12833 SDValue Vec = N->getOperand(0);
12834 SDValue Mask = N->getOperand(1);
12835 SDValue Passthru = N->getOperand(2);
12836 EVT VecVT = Vec.getValueType();
12837
12838 bool HasPassthru = !Passthru.isUndef();
12839
12840 APInt SplatVal;
12841 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12842 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12843
12844 if (Vec.isUndef() || Mask.isUndef())
12845 return Passthru;
12846
12847 // No need for potentially expensive compress if the mask is constant.
12850 EVT ScalarVT = VecVT.getVectorElementType();
12851 unsigned NumSelected = 0;
12852 unsigned NumElmts = VecVT.getVectorNumElements();
12853 for (unsigned I = 0; I < NumElmts; ++I) {
12854 SDValue MaskI = Mask.getOperand(I);
12855 // We treat undef mask entries as "false".
12856 if (MaskI.isUndef())
12857 continue;
12858
12859 if (TLI.isConstTrueVal(MaskI)) {
12860 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12861 DAG.getVectorIdxConstant(I, DL));
12862 Ops.push_back(VecI);
12863 NumSelected++;
12864 }
12865 }
12866 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12867 SDValue Val =
12868 HasPassthru
12869 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12870 DAG.getVectorIdxConstant(Rest, DL))
12871 : DAG.getUNDEF(ScalarVT);
12872 Ops.push_back(Val);
12873 }
12874 return DAG.getBuildVector(VecVT, DL, Ops);
12875 }
12876
12877 return SDValue();
12878}
12879
12880SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12881 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12882 SDValue Mask = MGT->getMask();
12883 SDValue Chain = MGT->getChain();
12884 SDValue Index = MGT->getIndex();
12885 SDValue Scale = MGT->getScale();
12886 SDValue BasePtr = MGT->getBasePtr();
12887 SDValue VL = MGT->getVectorLength();
12888 ISD::MemIndexType IndexType = MGT->getIndexType();
12889 SDLoc DL(N);
12890
12891 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12892 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12893 return DAG.getGatherVP(
12894 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12895 Ops, MGT->getMemOperand(), IndexType);
12896 }
12897
12898 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12899 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12900 return DAG.getGatherVP(
12901 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12902 Ops, MGT->getMemOperand(), IndexType);
12903 }
12904
12905 return SDValue();
12906}
12907
12908SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12909 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12910 SDValue Mask = MGT->getMask();
12911 SDValue Chain = MGT->getChain();
12912 SDValue Index = MGT->getIndex();
12913 SDValue Scale = MGT->getScale();
12914 SDValue PassThru = MGT->getPassThru();
12915 SDValue BasePtr = MGT->getBasePtr();
12916 ISD::MemIndexType IndexType = MGT->getIndexType();
12917 SDLoc DL(N);
12918
12919 // Zap gathers with a zero mask.
12921 return CombineTo(N, PassThru, MGT->getChain());
12922
12923 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12924 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12925 return DAG.getMaskedGather(
12926 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12927 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12928 }
12929
12930 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12931 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12932 return DAG.getMaskedGather(
12933 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12934 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12935 }
12936
12937 return SDValue();
12938}
12939
12940SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12941 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12942 SDValue Mask = MLD->getMask();
12943
12944 // Zap masked loads with a zero mask.
12946 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12947
12948 // If this is a masked load with an all ones mask, we can use a unmasked load.
12949 // FIXME: Can we do this for indexed, expanding, or extending loads?
12950 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12951 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12952 SDValue NewLd = DAG.getLoad(
12953 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12954 MLD->getPointerInfo(), MLD->getBaseAlign(),
12955 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12956 return CombineTo(N, NewLd, NewLd.getValue(1));
12957 }
12958
12959 // Try transforming N to an indexed load.
12960 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12961 return SDValue(N, 0);
12962
12963 return SDValue();
12964}
12965
12966SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
12967 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
12968 SDValue Chain = HG->getChain();
12969 SDValue Inc = HG->getInc();
12970 SDValue Mask = HG->getMask();
12971 SDValue BasePtr = HG->getBasePtr();
12972 SDValue Index = HG->getIndex();
12973 SDLoc DL(HG);
12974
12975 EVT MemVT = HG->getMemoryVT();
12976 EVT DataVT = Index.getValueType();
12977 MachineMemOperand *MMO = HG->getMemOperand();
12978 ISD::MemIndexType IndexType = HG->getIndexType();
12979
12981 return Chain;
12982
12983 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
12984 refineIndexType(Index, IndexType, DataVT, DAG)) {
12985 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12986 HG->getScale(), HG->getIntID()};
12987 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12988 MMO, IndexType);
12989 }
12990
12991 return SDValue();
12992}
12993
12994SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
12995 if (SDValue Res = foldPartialReduceMLAMulOp(N))
12996 return Res;
12997 if (SDValue Res = foldPartialReduceAdd(N))
12998 return Res;
12999 return SDValue();
13000}
13001
13002// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1))
13003// -> partial_reduce_*mla(acc, a, b)
13004//
13005// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
13006// -> partial_reduce_*mla(acc, x, C)
13007SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
13008 SDLoc DL(N);
13009 auto *Context = DAG.getContext();
13010 SDValue Acc = N->getOperand(0);
13011 SDValue Op1 = N->getOperand(1);
13012 SDValue Op2 = N->getOperand(2);
13013
13014 unsigned Opc = Op1->getOpcode();
13015 if (Opc != ISD::MUL && Opc != ISD::SHL)
13016 return SDValue();
13017
13018 SDValue LHS = Op1->getOperand(0);
13019 SDValue RHS = Op1->getOperand(1);
13020
13021 // Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
13022 if (Opc == ISD::SHL) {
13023 APInt C;
13024 if (!ISD::isConstantSplatVector(RHS.getNode(), C))
13025 return SDValue();
13026
13027 RHS =
13028 DAG.getSplatVector(RHS.getValueType(), DL,
13029 DAG.getConstant(APInt(C.getBitWidth(), 1).shl(C), DL,
13030 RHS.getValueType().getScalarType()));
13031 Opc = ISD::MUL;
13032 }
13033
13034 APInt C;
13035 if (Opc != ISD::MUL || !ISD::isConstantSplatVector(Op2.getNode(), C) ||
13036 !C.isOne())
13037 return SDValue();
13038
13039 unsigned LHSOpcode = LHS->getOpcode();
13040 if (!ISD::isExtOpcode(LHSOpcode))
13041 return SDValue();
13042
13043 SDValue LHSExtOp = LHS->getOperand(0);
13044 EVT LHSExtOpVT = LHSExtOp.getValueType();
13045
13046 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
13047 // -> partial_reduce_*mla(acc, x, C)
13048 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
13049 // TODO: Make use of partial_reduce_sumla here
13050 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
13051 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13052 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
13053 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
13054 return SDValue();
13055
13056 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13057 ? ISD::PARTIAL_REDUCE_SMLA
13058 : ISD::PARTIAL_REDUCE_UMLA;
13059
13060 // Only perform these combines if the target supports folding
13061 // the extends into the operation.
13063 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13064 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13065 return SDValue();
13066
13067 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
13068 DAG.getConstant(CTrunc, DL, LHSExtOpVT));
13069 }
13070
13071 unsigned RHSOpcode = RHS->getOpcode();
13072 if (!ISD::isExtOpcode(RHSOpcode))
13073 return SDValue();
13074
13075 SDValue RHSExtOp = RHS->getOperand(0);
13076 if (LHSExtOpVT != RHSExtOp.getValueType())
13077 return SDValue();
13078
13079 unsigned NewOpc;
13080 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13081 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13082 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13083 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13084 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13085 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13086 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13087 NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
13088 std::swap(LHSExtOp, RHSExtOp);
13089 } else
13090 return SDValue();
13091 // For a 2-stage extend the signedness of both of the extends must match
13092 // If the mul has the same type, there is no outer extend, and thus we
13093 // can simply use the inner extends to pick the result node.
13094 // TODO: extend to handle nonneg zext as sext
13095 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13096 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13097 NewOpc != N->getOpcode())
13098 return SDValue();
13099
13100 // Only perform these combines if the target supports folding
13101 // the extends into the operation.
13103 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13104 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13105 return SDValue();
13106
13107 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
13108}
13109
13110// partial.reduce.umla(acc, zext(op), splat(1))
13111// -> partial.reduce.umla(acc, op, splat(trunc(1)))
13112// partial.reduce.smla(acc, sext(op), splat(1))
13113// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13114// partial.reduce.sumla(acc, sext(op), splat(1))
13115// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13116SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13117 SDLoc DL(N);
13118 SDValue Acc = N->getOperand(0);
13119 SDValue Op1 = N->getOperand(1);
13120 SDValue Op2 = N->getOperand(2);
13121
13122 APInt ConstantOne;
13123 if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
13124 !ConstantOne.isOne())
13125 return SDValue();
13126
13127 unsigned Op1Opcode = Op1.getOpcode();
13128 if (!ISD::isExtOpcode(Op1Opcode))
13129 return SDValue();
13130
13131 bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
13132 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13133 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13134 if (Op1IsSigned != NodeIsSigned &&
13135 Op1.getValueType().getVectorElementType() != AccElemVT)
13136 return SDValue();
13137
13138 unsigned NewOpcode =
13139 Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
13140
13141 SDValue UnextOp1 = Op1.getOperand(0);
13142 EVT UnextOp1VT = UnextOp1.getValueType();
13143 auto *Context = DAG.getContext();
13145 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13146 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13147 return SDValue();
13148
13149 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13150 DAG.getConstant(1, DL, UnextOp1VT));
13151}
13152
13153SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13154 auto *SLD = cast<VPStridedLoadSDNode>(N);
13155 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13156 // Combine strided loads with unit-stride to a regular VP load.
13157 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13158 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13159 SDValue NewLd = DAG.getLoadVP(
13160 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13161 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13162 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13163 SLD->getMemOperand(), SLD->isExpandingLoad());
13164 return CombineTo(N, NewLd, NewLd.getValue(1));
13165 }
13166 return SDValue();
13167}
13168
13169/// A vector select of 2 constant vectors can be simplified to math/logic to
13170/// avoid a variable select instruction and possibly avoid constant loads.
13171SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13172 SDValue Cond = N->getOperand(0);
13173 SDValue N1 = N->getOperand(1);
13174 SDValue N2 = N->getOperand(2);
13175 EVT VT = N->getValueType(0);
13176 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13180 return SDValue();
13181
13182 // Check if we can use the condition value to increment/decrement a single
13183 // constant value. This simplifies a select to an add and removes a constant
13184 // load/materialization from the general case.
13185 bool AllAddOne = true;
13186 bool AllSubOne = true;
13187 unsigned Elts = VT.getVectorNumElements();
13188 for (unsigned i = 0; i != Elts; ++i) {
13189 SDValue N1Elt = N1.getOperand(i);
13190 SDValue N2Elt = N2.getOperand(i);
13191 if (N1Elt.isUndef())
13192 continue;
13193 // N2 should not contain undef values since it will be reused in the fold.
13194 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13195 AllAddOne = false;
13196 AllSubOne = false;
13197 break;
13198 }
13199
13200 const APInt &C1 = N1Elt->getAsAPIntVal();
13201 const APInt &C2 = N2Elt->getAsAPIntVal();
13202 if (C1 != C2 + 1)
13203 AllAddOne = false;
13204 if (C1 != C2 - 1)
13205 AllSubOne = false;
13206 }
13207
13208 // Further simplifications for the extra-special cases where the constants are
13209 // all 0 or all -1 should be implemented as folds of these patterns.
13210 SDLoc DL(N);
13211 if (AllAddOne || AllSubOne) {
13212 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13213 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13214 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13215 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13216 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13217 }
13218
13219 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13220 APInt Pow2C;
13221 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13222 isNullOrNullSplat(N2)) {
13223 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13224 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13225 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13226 }
13227
13229 return V;
13230
13231 // The general case for select-of-constants:
13232 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13233 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13234 // leave that to a machine-specific pass.
13235 return SDValue();
13236}
13237
13238SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13239 SDValue N0 = N->getOperand(0);
13240 SDValue N1 = N->getOperand(1);
13241 SDValue N2 = N->getOperand(2);
13242 SDLoc DL(N);
13243
13244 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13245 return V;
13246
13248 return V;
13249
13250 return SDValue();
13251}
13252
13254 SDValue FVal,
13255 const TargetLowering &TLI,
13256 SelectionDAG &DAG,
13257 const SDLoc &DL) {
13258 EVT VT = TVal.getValueType();
13259 if (!TLI.isTypeLegal(VT))
13260 return SDValue();
13261
13262 EVT CondVT = Cond.getValueType();
13263 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13264
13265 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13266 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13267 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13268 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13269
13270 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13271 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13272 return SDValue();
13273
13274 // select Cond, 0, 0 → 0
13275 if (IsTAllZero && IsFAllZero) {
13276 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13277 : DAG.getConstant(0, DL, VT);
13278 }
13279
13280 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13281 APInt TValAPInt;
13282 if (Cond.getOpcode() == ISD::SETCC &&
13283 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13284 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13285 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13286 TValAPInt.isOne() &&
13287 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13289 return SDValue();
13290 }
13291
13292 // To use the condition operand as a bitwise mask, it must have elements that
13293 // are the same size as the select elements. i.e, the condition operand must
13294 // have already been promoted from the IR select condition type <N x i1>.
13295 // Don't check if the types themselves are equal because that excludes
13296 // vector floating-point selects.
13297 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13298 return SDValue();
13299
13300 // Cond value must be 'sign splat' to be converted to a logical op.
13301 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13302 return SDValue();
13303
13304 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13305 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13306 Cond.getOpcode() == ISD::SETCC &&
13307 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13308 CondVT) {
13309 if (IsTAllZero || IsFAllOne) {
13310 SDValue CC = Cond.getOperand(2);
13312 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13313 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13314 InverseCC);
13315 std::swap(TVal, FVal);
13316 std::swap(IsTAllOne, IsFAllOne);
13317 std::swap(IsTAllZero, IsFAllZero);
13318 }
13319 }
13320
13322 "Select condition no longer all-sign bits");
13323
13324 // select Cond, -1, 0 → bitcast Cond
13325 if (IsTAllOne && IsFAllZero)
13326 return DAG.getBitcast(VT, Cond);
13327
13328 // select Cond, -1, x → or Cond, x
13329 if (IsTAllOne) {
13330 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13331 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13332 return DAG.getBitcast(VT, Or);
13333 }
13334
13335 // select Cond, x, 0 → and Cond, x
13336 if (IsFAllZero) {
13337 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13338 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13339 return DAG.getBitcast(VT, And);
13340 }
13341
13342 // select Cond, 0, x -> and not(Cond), x
13343 if (IsTAllZero &&
13345 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13346 SDValue And =
13347 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13348 return DAG.getBitcast(VT, And);
13349 }
13350
13351 return SDValue();
13352}
13353
13354SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13355 SDValue N0 = N->getOperand(0);
13356 SDValue N1 = N->getOperand(1);
13357 SDValue N2 = N->getOperand(2);
13358 EVT VT = N->getValueType(0);
13359 SDLoc DL(N);
13360
13361 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13362 return V;
13363
13365 return V;
13366
13367 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13368 if (!TLI.isTargetCanonicalSelect(N))
13369 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13370 return DAG.getSelect(DL, VT, F, N2, N1);
13371
13372 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13373 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13376 TLI.getBooleanContents(N0.getValueType()) ==
13378 return DAG.getNode(
13379 ISD::ADD, DL, N1.getValueType(), N2,
13380 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13381 }
13382
13383 // Canonicalize integer abs.
13384 // vselect (setg[te] X, 0), X, -X ->
13385 // vselect (setgt X, -1), X, -X ->
13386 // vselect (setl[te] X, 0), -X, X ->
13387 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13388 if (N0.getOpcode() == ISD::SETCC) {
13389 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13391 bool isAbs = false;
13392 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13393
13394 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13395 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13396 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13398 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13399 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13401
13402 if (isAbs) {
13404 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13405
13406 SDValue Shift = DAG.getNode(
13407 ISD::SRA, DL, VT, LHS,
13408 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13409 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13410 AddToWorklist(Shift.getNode());
13411 AddToWorklist(Add.getNode());
13412 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13413 }
13414
13415 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13416 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13417 //
13418 // This is OK if we don't care about what happens if either operand is a
13419 // NaN.
13420 //
13421 if (N0.hasOneUse() &&
13422 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13423 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13424 return FMinMax;
13425 }
13426
13427 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13428 return S;
13429 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13430 return S;
13431
13432 // If this select has a condition (setcc) with narrower operands than the
13433 // select, try to widen the compare to match the select width.
13434 // TODO: This should be extended to handle any constant.
13435 // TODO: This could be extended to handle non-loading patterns, but that
13436 // requires thorough testing to avoid regressions.
13437 if (isNullOrNullSplat(RHS)) {
13438 EVT NarrowVT = LHS.getValueType();
13440 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13441 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13442 unsigned WideWidth = WideVT.getScalarSizeInBits();
13443 bool IsSigned = isSignedIntSetCC(CC);
13444 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13445 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13446 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13447 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13448 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13449 // Both compare operands can be widened for free. The LHS can use an
13450 // extended load, and the RHS is a constant:
13451 // vselect (ext (setcc load(X), C)), N1, N2 -->
13452 // vselect (setcc extload(X), C'), N1, N2
13453 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13454 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13455 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13456 EVT WideSetCCVT = getSetCCResultType(WideVT);
13457 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13458 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13459 }
13460 }
13461
13462 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13463 return ABD;
13464
13465 // Match VSELECTs into add with unsigned saturation.
13466 if (hasOperation(ISD::UADDSAT, VT)) {
13467 // Check if one of the arms of the VSELECT is vector with all bits set.
13468 // If it's on the left side invert the predicate to simplify logic below.
13469 SDValue Other;
13470 ISD::CondCode SatCC = CC;
13472 Other = N2;
13473 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13474 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13475 Other = N1;
13476 }
13477
13478 if (Other && Other.getOpcode() == ISD::ADD) {
13479 SDValue CondLHS = LHS, CondRHS = RHS;
13480 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13481
13482 // Canonicalize condition operands.
13483 if (SatCC == ISD::SETUGE) {
13484 std::swap(CondLHS, CondRHS);
13485 SatCC = ISD::SETULE;
13486 }
13487
13488 // We can test against either of the addition operands.
13489 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13490 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13491 if (SatCC == ISD::SETULE && Other == CondRHS &&
13492 (OpLHS == CondLHS || OpRHS == CondLHS))
13493 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13494
13495 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13496 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13497 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13498 CondLHS == OpLHS) {
13499 // If the RHS is a constant we have to reverse the const
13500 // canonicalization.
13501 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13502 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13503 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13504 };
13505 if (SatCC == ISD::SETULE &&
13506 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13507 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13508 }
13509 }
13510 }
13511
13512 // Match VSELECTs into sub with unsigned saturation.
13513 if (hasOperation(ISD::USUBSAT, VT)) {
13514 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13515 // the left side invert the predicate to simplify logic below.
13516 SDValue Other;
13517 ISD::CondCode SatCC = CC;
13519 Other = N2;
13520 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13522 Other = N1;
13523 }
13524
13525 // zext(x) >= y ? trunc(zext(x) - y) : 0
13526 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13527 // zext(x) > y ? trunc(zext(x) - y) : 0
13528 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13529 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13530 Other.getOperand(0).getOpcode() == ISD::SUB &&
13531 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13532 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13533 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13534 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13535 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13536 DAG, DL))
13537 return R;
13538 }
13539
13540 if (Other && Other.getNumOperands() == 2) {
13541 SDValue CondRHS = RHS;
13542 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13543
13544 if (OpLHS == LHS) {
13545 // Look for a general sub with unsigned saturation first.
13546 // x >= y ? x-y : 0 --> usubsat x, y
13547 // x > y ? x-y : 0 --> usubsat x, y
13548 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13549 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13550 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13551
13552 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13553 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13554 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13555 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13556 // If the RHS is a constant we have to reverse the const
13557 // canonicalization.
13558 // x > C-1 ? x+-C : 0 --> usubsat x, C
13559 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13560 return (!Op && !Cond) ||
13561 (Op && Cond &&
13562 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13563 };
13564 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13565 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13566 /*AllowUndefs*/ true)) {
13567 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13568 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13569 }
13570
13571 // Another special case: If C was a sign bit, the sub has been
13572 // canonicalized into a xor.
13573 // FIXME: Would it be better to use computeKnownBits to
13574 // determine whether it's safe to decanonicalize the xor?
13575 // x s< 0 ? x^C : 0 --> usubsat x, C
13576 APInt SplatValue;
13577 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13578 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13580 SplatValue.isSignMask()) {
13581 // Note that we have to rebuild the RHS constant here to
13582 // ensure we don't rely on particular values of undef lanes.
13583 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13584 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13585 }
13586 }
13587 }
13588 }
13589 }
13590 }
13591
13592 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13593 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13594 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13595 return UMin;
13596 }
13597
13598 if (SimplifySelectOps(N, N1, N2))
13599 return SDValue(N, 0); // Don't revisit N.
13600
13601 // Fold (vselect all_ones, N1, N2) -> N1
13603 return N1;
13604 // Fold (vselect all_zeros, N1, N2) -> N2
13606 return N2;
13607
13608 // The ConvertSelectToConcatVector function is assuming both the above
13609 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13610 // and addressed.
13611 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13614 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13615 return CV;
13616 }
13617
13618 if (SDValue V = foldVSelectOfConstants(N))
13619 return V;
13620
13621 if (hasOperation(ISD::SRA, VT))
13623 return V;
13624
13626 return SDValue(N, 0);
13627
13628 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13629 return V;
13630
13631 return SDValue();
13632}
13633
13634SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13635 SDValue N0 = N->getOperand(0);
13636 SDValue N1 = N->getOperand(1);
13637 SDValue N2 = N->getOperand(2);
13638 SDValue N3 = N->getOperand(3);
13639 SDValue N4 = N->getOperand(4);
13640 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13641 SDLoc DL(N);
13642
13643 // fold select_cc lhs, rhs, x, x, cc -> x
13644 if (N2 == N3)
13645 return N2;
13646
13647 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13648 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13649 isNullConstant(N1))
13650 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13651
13652 // Determine if the condition we're dealing with is constant
13653 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13654 CC, DL, false)) {
13655 AddToWorklist(SCC.getNode());
13656
13657 // cond always true -> true val
13658 // cond always false -> false val
13659 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13660 return SCCC->isZero() ? N3 : N2;
13661
13662 // When the condition is UNDEF, just return the first operand. This is
13663 // coherent the DAG creation, no setcc node is created in this case
13664 if (SCC->isUndef())
13665 return N2;
13666
13667 // Fold to a simpler select_cc
13668 if (SCC.getOpcode() == ISD::SETCC) {
13669 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13670 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13671 SCC.getOperand(2), SCC->getFlags());
13672 }
13673 }
13674
13675 // If we can fold this based on the true/false value, do so.
13676 if (SimplifySelectOps(N, N2, N3))
13677 return SDValue(N, 0); // Don't revisit N.
13678
13679 // fold select_cc into other things, such as min/max/abs
13680 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13681}
13682
13683SDValue DAGCombiner::visitSETCC(SDNode *N) {
13684 // setcc is very commonly used as an argument to brcond. This pattern
13685 // also lend itself to numerous combines and, as a result, it is desired
13686 // we keep the argument to a brcond as a setcc as much as possible.
13687 bool PreferSetCC =
13688 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13689
13690 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13691 EVT VT = N->getValueType(0);
13692 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13693 SDLoc DL(N);
13694
13695 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13696 // If we prefer to have a setcc, and we don't, we'll try our best to
13697 // recreate one using rebuildSetCC.
13698 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13699 SDValue NewSetCC = rebuildSetCC(Combined);
13700
13701 // We don't have anything interesting to combine to.
13702 if (NewSetCC.getNode() == N)
13703 return SDValue();
13704
13705 if (NewSetCC)
13706 return NewSetCC;
13707 }
13708 return Combined;
13709 }
13710
13711 // Optimize
13712 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13713 // or
13714 // 2) (icmp eq/ne X, (rotate X, C1))
13715 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13716 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13717 // Then:
13718 // If C1 is a power of 2, then the rotate and shift+and versions are
13719 // equivilent, so we can interchange them depending on target preference.
13720 // Otherwise, if we have the shift+and version we can interchange srl/shl
13721 // which inturn affects the constant C0. We can use this to get better
13722 // constants again determined by target preference.
13723 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13724 auto IsAndWithShift = [](SDValue A, SDValue B) {
13725 return A.getOpcode() == ISD::AND &&
13726 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13727 A.getOperand(0) == B.getOperand(0);
13728 };
13729 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13730 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13731 B.getOperand(0) == A;
13732 };
13733 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13734 bool IsRotate = false;
13735
13736 // Find either shift+and or rotate pattern.
13737 if (IsAndWithShift(N0, N1)) {
13738 AndOrOp = N0;
13739 ShiftOrRotate = N1;
13740 } else if (IsAndWithShift(N1, N0)) {
13741 AndOrOp = N1;
13742 ShiftOrRotate = N0;
13743 } else if (IsRotateWithOp(N0, N1)) {
13744 IsRotate = true;
13745 AndOrOp = N0;
13746 ShiftOrRotate = N1;
13747 } else if (IsRotateWithOp(N1, N0)) {
13748 IsRotate = true;
13749 AndOrOp = N1;
13750 ShiftOrRotate = N0;
13751 }
13752
13753 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13754 (IsRotate || AndOrOp.hasOneUse())) {
13755 EVT OpVT = N0.getValueType();
13756 // Get constant shift/rotate amount and possibly mask (if its shift+and
13757 // variant).
13758 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13759 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13760 /*AllowTrunc*/ false);
13761 if (CNode == nullptr)
13762 return std::nullopt;
13763 return CNode->getAPIntValue();
13764 };
13765 std::optional<APInt> AndCMask =
13766 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13767 std::optional<APInt> ShiftCAmt =
13768 GetAPIntValue(ShiftOrRotate.getOperand(1));
13769 unsigned NumBits = OpVT.getScalarSizeInBits();
13770
13771 // We found constants.
13772 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13773 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13774 // Check that the constants meet the constraints.
13775 bool CanTransform = IsRotate;
13776 if (!CanTransform) {
13777 // Check that mask and shift compliment eachother
13778 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13779 // Check that we are comparing all bits
13780 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13781 // Check that the and mask is correct for the shift
13782 CanTransform &=
13783 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13784 }
13785
13786 // See if target prefers another shift/rotate opcode.
13787 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13788 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13789 // Transform is valid and we have a new preference.
13790 if (CanTransform && NewShiftOpc != ShiftOpc) {
13791 SDValue NewShiftOrRotate =
13792 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13793 ShiftOrRotate.getOperand(1));
13794 SDValue NewAndOrOp = SDValue();
13795
13796 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13797 APInt NewMask =
13798 NewShiftOpc == ISD::SHL
13799 ? APInt::getHighBitsSet(NumBits,
13800 NumBits - ShiftCAmt->getZExtValue())
13801 : APInt::getLowBitsSet(NumBits,
13802 NumBits - ShiftCAmt->getZExtValue());
13803 NewAndOrOp =
13804 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13805 DAG.getConstant(NewMask, DL, OpVT));
13806 } else {
13807 NewAndOrOp = ShiftOrRotate.getOperand(0);
13808 }
13809
13810 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13811 }
13812 }
13813 }
13814 }
13815 return SDValue();
13816}
13817
13818SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13819 SDValue LHS = N->getOperand(0);
13820 SDValue RHS = N->getOperand(1);
13821 SDValue Carry = N->getOperand(2);
13822 SDValue Cond = N->getOperand(3);
13823
13824 // If Carry is false, fold to a regular SETCC.
13825 if (isNullConstant(Carry))
13826 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13827
13828 return SDValue();
13829}
13830
13831/// Check if N satisfies:
13832/// N is used once.
13833/// N is a Load.
13834/// The load is compatible with ExtOpcode. It means
13835/// If load has explicit zero/sign extension, ExpOpcode must have the same
13836/// extension.
13837/// Otherwise returns true.
13838static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13839 if (!N.hasOneUse())
13840 return false;
13841
13842 if (!isa<LoadSDNode>(N))
13843 return false;
13844
13845 LoadSDNode *Load = cast<LoadSDNode>(N);
13846 ISD::LoadExtType LoadExt = Load->getExtensionType();
13847 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13848 return true;
13849
13850 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13851 // extension.
13852 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13853 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13854 return false;
13855
13856 return true;
13857}
13858
13859/// Fold
13860/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13861/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13862/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13863/// This function is called by the DAGCombiner when visiting sext/zext/aext
13864/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13866 SelectionDAG &DAG, const SDLoc &DL,
13867 CombineLevel Level) {
13868 unsigned Opcode = N->getOpcode();
13869 SDValue N0 = N->getOperand(0);
13870 EVT VT = N->getValueType(0);
13871 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13872 Opcode == ISD::ANY_EXTEND) &&
13873 "Expected EXTEND dag node in input!");
13874
13875 SDValue Cond, Op1, Op2;
13877 m_Value(Op2)))))
13878 return SDValue();
13879
13880 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
13881 return SDValue();
13882
13883 auto ExtLoadOpcode = ISD::EXTLOAD;
13884 if (Opcode == ISD::SIGN_EXTEND)
13885 ExtLoadOpcode = ISD::SEXTLOAD;
13886 else if (Opcode == ISD::ZERO_EXTEND)
13887 ExtLoadOpcode = ISD::ZEXTLOAD;
13888
13889 // Illegal VSELECT may ISel fail if happen after legalization (DAG
13890 // Combine2), so we should conservatively check the OperationAction.
13891 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
13892 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
13893 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
13894 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
13895 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
13897 return SDValue();
13898
13899 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
13900 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
13901 return DAG.getSelect(DL, VT, Cond, Ext1, Ext2);
13902}
13903
13904/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
13905/// a build_vector of constants.
13906/// This function is called by the DAGCombiner when visiting sext/zext/aext
13907/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13908/// Vector extends are not folded if operations are legal; this is to
13909/// avoid introducing illegal build_vector dag nodes.
13911 const TargetLowering &TLI,
13912 SelectionDAG &DAG, bool LegalTypes) {
13913 unsigned Opcode = N->getOpcode();
13914 SDValue N0 = N->getOperand(0);
13915 EVT VT = N->getValueType(0);
13916
13917 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
13918 "Expected EXTEND dag node in input!");
13919
13920 // fold (sext c1) -> c1
13921 // fold (zext c1) -> c1
13922 // fold (aext c1) -> c1
13923 if (isa<ConstantSDNode>(N0))
13924 return DAG.getNode(Opcode, DL, VT, N0);
13925
13926 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13927 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
13928 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
13929 if (N0->getOpcode() == ISD::SELECT) {
13930 SDValue Op1 = N0->getOperand(1);
13931 SDValue Op2 = N0->getOperand(2);
13932 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
13933 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
13934 // For any_extend, choose sign extension of the constants to allow a
13935 // possible further transform to sign_extend_inreg.i.e.
13936 //
13937 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
13938 // t2: i64 = any_extend t1
13939 // -->
13940 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
13941 // -->
13942 // t4: i64 = sign_extend_inreg t3
13943 unsigned FoldOpc = Opcode;
13944 if (FoldOpc == ISD::ANY_EXTEND)
13945 FoldOpc = ISD::SIGN_EXTEND;
13946 return DAG.getSelect(DL, VT, N0->getOperand(0),
13947 DAG.getNode(FoldOpc, DL, VT, Op1),
13948 DAG.getNode(FoldOpc, DL, VT, Op2));
13949 }
13950 }
13951
13952 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
13953 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
13954 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
13955 EVT SVT = VT.getScalarType();
13956 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
13958 return SDValue();
13959
13960 // We can fold this node into a build_vector.
13961 unsigned VTBits = SVT.getSizeInBits();
13962 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
13964 unsigned NumElts = VT.getVectorNumElements();
13965
13966 for (unsigned i = 0; i != NumElts; ++i) {
13967 SDValue Op = N0.getOperand(i);
13968 if (Op.isUndef()) {
13969 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
13970 Elts.push_back(DAG.getUNDEF(SVT));
13971 else
13972 Elts.push_back(DAG.getConstant(0, DL, SVT));
13973 continue;
13974 }
13975
13976 SDLoc DL(Op);
13977 // Get the constant value and if needed trunc it to the size of the type.
13978 // Nodes like build_vector might have constants wider than the scalar type.
13979 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
13980 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
13981 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
13982 else
13983 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
13984 }
13985
13986 return DAG.getBuildVector(VT, DL, Elts);
13987}
13988
13989// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
13990// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
13991// transformation. Returns true if extension are possible and the above
13992// mentioned transformation is profitable.
13994 unsigned ExtOpc,
13995 SmallVectorImpl<SDNode *> &ExtendNodes,
13996 const TargetLowering &TLI) {
13997 bool HasCopyToRegUses = false;
13998 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
13999 for (SDUse &Use : N0->uses()) {
14000 SDNode *User = Use.getUser();
14001 if (User == N)
14002 continue;
14003 if (Use.getResNo() != N0.getResNo())
14004 continue;
14005 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
14006 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
14008 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
14009 // Sign bits will be lost after a zext.
14010 return false;
14011 bool Add = false;
14012 for (unsigned i = 0; i != 2; ++i) {
14013 SDValue UseOp = User->getOperand(i);
14014 if (UseOp == N0)
14015 continue;
14016 if (!isa<ConstantSDNode>(UseOp))
14017 return false;
14018 Add = true;
14019 }
14020 if (Add)
14021 ExtendNodes.push_back(User);
14022 continue;
14023 }
14024 // If truncates aren't free and there are users we can't
14025 // extend, it isn't worthwhile.
14026 if (!isTruncFree)
14027 return false;
14028 // Remember if this value is live-out.
14029 if (User->getOpcode() == ISD::CopyToReg)
14030 HasCopyToRegUses = true;
14031 }
14032
14033 if (HasCopyToRegUses) {
14034 bool BothLiveOut = false;
14035 for (SDUse &Use : N->uses()) {
14036 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
14037 BothLiveOut = true;
14038 break;
14039 }
14040 }
14041 if (BothLiveOut)
14042 // Both unextended and extended values are live out. There had better be
14043 // a good reason for the transformation.
14044 return !ExtendNodes.empty();
14045 }
14046 return true;
14047}
14048
14049void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14050 SDValue OrigLoad, SDValue ExtLoad,
14051 ISD::NodeType ExtType) {
14052 // Extend SetCC uses if necessary.
14053 SDLoc DL(ExtLoad);
14054 for (SDNode *SetCC : SetCCs) {
14056
14057 for (unsigned j = 0; j != 2; ++j) {
14058 SDValue SOp = SetCC->getOperand(j);
14059 if (SOp == OrigLoad)
14060 Ops.push_back(ExtLoad);
14061 else
14062 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
14063 }
14064
14065 Ops.push_back(SetCC->getOperand(2));
14066 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
14067 }
14068}
14069
14070// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14071SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14072 SDValue N0 = N->getOperand(0);
14073 EVT DstVT = N->getValueType(0);
14074 EVT SrcVT = N0.getValueType();
14075
14076 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14077 N->getOpcode() == ISD::ZERO_EXTEND) &&
14078 "Unexpected node type (not an extend)!");
14079
14080 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
14081 // For example, on a target with legal v4i32, but illegal v8i32, turn:
14082 // (v8i32 (sext (v8i16 (load x))))
14083 // into:
14084 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14085 // (v4i32 (sextload (x + 16)))))
14086 // Where uses of the original load, i.e.:
14087 // (v8i16 (load x))
14088 // are replaced with:
14089 // (v8i16 (truncate
14090 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14091 // (v4i32 (sextload (x + 16)))))))
14092 //
14093 // This combine is only applicable to illegal, but splittable, vectors.
14094 // All legal types, and illegal non-vector types, are handled elsewhere.
14095 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14096 //
14097 if (N0->getOpcode() != ISD::LOAD)
14098 return SDValue();
14099
14100 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14101
14102 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14103 !N0.hasOneUse() || !LN0->isSimple() ||
14104 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14106 return SDValue();
14107
14109 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14110 return SDValue();
14111
14112 ISD::LoadExtType ExtType =
14113 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14114
14115 // Try to split the vector types to get down to legal types.
14116 EVT SplitSrcVT = SrcVT;
14117 EVT SplitDstVT = DstVT;
14118 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
14119 SplitSrcVT.getVectorNumElements() > 1) {
14120 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14121 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14122 }
14123
14124 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
14125 return SDValue();
14126
14127 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14128
14129 SDLoc DL(N);
14130 const unsigned NumSplits =
14131 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14132 const unsigned Stride = SplitSrcVT.getStoreSize();
14135
14136 SDValue BasePtr = LN0->getBasePtr();
14137 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14138 const unsigned Offset = Idx * Stride;
14139
14141 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14142 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14143 SplitSrcVT, LN0->getBaseAlign(),
14144 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14145
14146 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14147
14148 Loads.push_back(SplitLoad.getValue(0));
14149 Chains.push_back(SplitLoad.getValue(1));
14150 }
14151
14152 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14153 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14154
14155 // Simplify TF.
14156 AddToWorklist(NewChain.getNode());
14157
14158 CombineTo(N, NewValue);
14159
14160 // Replace uses of the original load (before extension)
14161 // with a truncate of the concatenated sextloaded vectors.
14162 SDValue Trunc =
14163 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14164 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14165 CombineTo(N0.getNode(), Trunc, NewChain);
14166 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14167}
14168
14169// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14170// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14171SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14172 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14173 EVT VT = N->getValueType(0);
14174 EVT OrigVT = N->getOperand(0).getValueType();
14175 if (TLI.isZExtFree(OrigVT, VT))
14176 return SDValue();
14177
14178 // and/or/xor
14179 SDValue N0 = N->getOperand(0);
14180 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14181 N0.getOperand(1).getOpcode() != ISD::Constant ||
14182 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14183 return SDValue();
14184
14185 // shl/shr
14186 SDValue N1 = N0->getOperand(0);
14187 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14188 N1.getOperand(1).getOpcode() != ISD::Constant ||
14189 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14190 return SDValue();
14191
14192 // load
14193 if (!isa<LoadSDNode>(N1.getOperand(0)))
14194 return SDValue();
14195 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14196 EVT MemVT = Load->getMemoryVT();
14197 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14198 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14199 return SDValue();
14200
14201
14202 // If the shift op is SHL, the logic op must be AND, otherwise the result
14203 // will be wrong.
14204 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14205 return SDValue();
14206
14207 if (!N0.hasOneUse() || !N1.hasOneUse())
14208 return SDValue();
14209
14211 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14212 ISD::ZERO_EXTEND, SetCCs, TLI))
14213 return SDValue();
14214
14215 // Actually do the transformation.
14216 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14217 Load->getChain(), Load->getBasePtr(),
14218 Load->getMemoryVT(), Load->getMemOperand());
14219
14220 SDLoc DL1(N1);
14221 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14222 N1.getOperand(1));
14223
14224 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14225 SDLoc DL0(N0);
14226 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14227 DAG.getConstant(Mask, DL0, VT));
14228
14229 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14230 CombineTo(N, And);
14231 if (SDValue(Load, 0).hasOneUse()) {
14232 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14233 } else {
14234 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14235 Load->getValueType(0), ExtLoad);
14236 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14237 }
14238
14239 // N0 is dead at this point.
14240 recursivelyDeleteUnusedNodes(N0.getNode());
14241
14242 return SDValue(N,0); // Return N so it doesn't get rechecked!
14243}
14244
14245/// If we're narrowing or widening the result of a vector select and the final
14246/// size is the same size as a setcc (compare) feeding the select, then try to
14247/// apply the cast operation to the select's operands because matching vector
14248/// sizes for a select condition and other operands should be more efficient.
14249SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14250 unsigned CastOpcode = Cast->getOpcode();
14251 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14252 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14253 CastOpcode == ISD::FP_ROUND) &&
14254 "Unexpected opcode for vector select narrowing/widening");
14255
14256 // We only do this transform before legal ops because the pattern may be
14257 // obfuscated by target-specific operations after legalization. Do not create
14258 // an illegal select op, however, because that may be difficult to lower.
14259 EVT VT = Cast->getValueType(0);
14260 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14261 return SDValue();
14262
14263 SDValue VSel = Cast->getOperand(0);
14264 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14265 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14266 return SDValue();
14267
14268 // Does the setcc have the same vector size as the casted select?
14269 SDValue SetCC = VSel.getOperand(0);
14270 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14271 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14272 return SDValue();
14273
14274 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14275 SDValue A = VSel.getOperand(1);
14276 SDValue B = VSel.getOperand(2);
14277 SDValue CastA, CastB;
14278 SDLoc DL(Cast);
14279 if (CastOpcode == ISD::FP_ROUND) {
14280 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14281 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14282 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14283 } else {
14284 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14285 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14286 }
14287 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14288}
14289
14290// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14291// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14293 const TargetLowering &TLI, EVT VT,
14294 bool LegalOperations, SDNode *N,
14295 SDValue N0, ISD::LoadExtType ExtLoadType) {
14296 SDNode *N0Node = N0.getNode();
14297 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14298 : ISD::isZEXTLoad(N0Node);
14299 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14300 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14301 return SDValue();
14302
14303 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14304 EVT MemVT = LN0->getMemoryVT();
14305 if ((LegalOperations || !LN0->isSimple() ||
14306 VT.isVector()) &&
14307 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14308 return SDValue();
14309
14310 SDValue ExtLoad =
14311 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14312 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14313 Combiner.CombineTo(N, ExtLoad);
14314 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14315 if (LN0->use_empty())
14316 Combiner.recursivelyDeleteUnusedNodes(LN0);
14317 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14318}
14319
14320// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14321// Only generate vector extloads when 1) they're legal, and 2) they are
14322// deemed desirable by the target. NonNegZExt can be set to true if a zero
14323// extend has the nonneg flag to allow use of sextload if profitable.
14325 const TargetLowering &TLI, EVT VT,
14326 bool LegalOperations, SDNode *N, SDValue N0,
14327 ISD::LoadExtType ExtLoadType,
14328 ISD::NodeType ExtOpc,
14329 bool NonNegZExt = false) {
14331 return {};
14332
14333 // If this is zext nneg, see if it would make sense to treat it as a sext.
14334 if (NonNegZExt) {
14335 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14336 "Unexpected load type or opcode");
14337 for (SDNode *User : N0->users()) {
14338 if (User->getOpcode() == ISD::SETCC) {
14340 if (ISD::isSignedIntSetCC(CC)) {
14341 ExtLoadType = ISD::SEXTLOAD;
14342 ExtOpc = ISD::SIGN_EXTEND;
14343 break;
14344 }
14345 }
14346 }
14347 }
14348
14349 // TODO: isFixedLengthVector() should be removed and any negative effects on
14350 // code generation being the result of that target's implementation of
14351 // isVectorLoadExtDesirable().
14352 if ((LegalOperations || VT.isFixedLengthVector() ||
14353 !cast<LoadSDNode>(N0)->isSimple()) &&
14354 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14355 return {};
14356
14357 bool DoXform = true;
14359 if (!N0.hasOneUse())
14360 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14361 if (VT.isVector())
14362 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14363 if (!DoXform)
14364 return {};
14365
14366 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14367 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14368 LN0->getBasePtr(), N0.getValueType(),
14369 LN0->getMemOperand());
14370 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14371 // If the load value is used only by N, replace it via CombineTo N.
14372 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14373 Combiner.CombineTo(N, ExtLoad);
14374 if (NoReplaceTrunc) {
14375 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14376 Combiner.recursivelyDeleteUnusedNodes(LN0);
14377 } else {
14378 SDValue Trunc =
14379 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14380 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14381 }
14382 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14383}
14384
14385static SDValue
14387 bool LegalOperations, SDNode *N, SDValue N0,
14388 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14389 if (!N0.hasOneUse())
14390 return SDValue();
14391
14393 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14394 return SDValue();
14395
14396 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14397 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14398 return SDValue();
14399
14400 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14401 return SDValue();
14402
14403 SDLoc dl(Ld);
14404 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14405 SDValue NewLoad = DAG.getMaskedLoad(
14406 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14407 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14408 ExtLoadType, Ld->isExpandingLoad());
14409 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14410 return NewLoad;
14411}
14412
14413// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14415 const TargetLowering &TLI, EVT VT,
14416 SDValue N0,
14417 ISD::LoadExtType ExtLoadType) {
14418 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14419 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14420 return {};
14421 EVT MemoryVT = ALoad->getMemoryVT();
14422 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14423 return {};
14424 // Can't fold into ALoad if it is already extending differently.
14425 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14426 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14427 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14428 return {};
14429
14430 EVT OrigVT = ALoad->getValueType(0);
14431 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14432 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14433 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14434 ALoad->getBasePtr(), ALoad->getMemOperand()));
14436 SDValue(ALoad, 0),
14437 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14438 // Update the chain uses.
14439 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14440 return SDValue(NewALoad, 0);
14441}
14442
14444 bool LegalOperations) {
14445 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14446 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14447
14448 SDValue SetCC = N->getOperand(0);
14449 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14450 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14451 return SDValue();
14452
14453 SDValue X = SetCC.getOperand(0);
14454 SDValue Ones = SetCC.getOperand(1);
14455 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14456 EVT VT = N->getValueType(0);
14457 EVT XVT = X.getValueType();
14458 // setge X, C is canonicalized to setgt, so we do not need to match that
14459 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14460 // not require the 'not' op.
14461 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14462 // Invert and smear/shift the sign bit:
14463 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14464 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14465 SDLoc DL(N);
14466 unsigned ShCt = VT.getSizeInBits() - 1;
14467 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14468 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14469 SDValue NotX = DAG.getNOT(DL, X, VT);
14470 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14471 auto ShiftOpcode =
14472 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14473 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14474 }
14475 }
14476 return SDValue();
14477}
14478
14479SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14480 SDValue N0 = N->getOperand(0);
14481 if (N0.getOpcode() != ISD::SETCC)
14482 return SDValue();
14483
14484 SDValue N00 = N0.getOperand(0);
14485 SDValue N01 = N0.getOperand(1);
14487 EVT VT = N->getValueType(0);
14488 EVT N00VT = N00.getValueType();
14489 SDLoc DL(N);
14490
14491 // Propagate fast-math-flags.
14492 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14493
14494 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14495 // the same size as the compared operands. Try to optimize sext(setcc())
14496 // if this is the case.
14497 if (VT.isVector() && !LegalOperations &&
14498 TLI.getBooleanContents(N00VT) ==
14500 EVT SVT = getSetCCResultType(N00VT);
14501
14502 // If we already have the desired type, don't change it.
14503 if (SVT != N0.getValueType()) {
14504 // We know that the # elements of the results is the same as the
14505 // # elements of the compare (and the # elements of the compare result
14506 // for that matter). Check to see that they are the same size. If so,
14507 // we know that the element size of the sext'd result matches the
14508 // element size of the compare operands.
14509 if (VT.getSizeInBits() == SVT.getSizeInBits())
14510 return DAG.getSetCC(DL, VT, N00, N01, CC);
14511
14512 // If the desired elements are smaller or larger than the source
14513 // elements, we can use a matching integer vector type and then
14514 // truncate/sign extend.
14515 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14516 if (SVT == MatchingVecType) {
14517 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14518 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14519 }
14520 }
14521
14522 // Try to eliminate the sext of a setcc by zexting the compare operands.
14523 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14525 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14526 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14527 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14528
14529 // We have an unsupported narrow vector compare op that would be legal
14530 // if extended to the destination type. See if the compare operands
14531 // can be freely extended to the destination type.
14532 auto IsFreeToExtend = [&](SDValue V) {
14533 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14534 return true;
14535 // Match a simple, non-extended load that can be converted to a
14536 // legal {z/s}ext-load.
14537 // TODO: Allow widening of an existing {z/s}ext-load?
14538 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14539 ISD::isUNINDEXEDLoad(V.getNode()) &&
14540 cast<LoadSDNode>(V)->isSimple() &&
14541 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14542 return false;
14543
14544 // Non-chain users of this value must either be the setcc in this
14545 // sequence or extends that can be folded into the new {z/s}ext-load.
14546 for (SDUse &Use : V->uses()) {
14547 // Skip uses of the chain and the setcc.
14548 SDNode *User = Use.getUser();
14549 if (Use.getResNo() != 0 || User == N0.getNode())
14550 continue;
14551 // Extra users must have exactly the same cast we are about to create.
14552 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14553 // is enhanced similarly.
14554 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14555 return false;
14556 }
14557 return true;
14558 };
14559
14560 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14561 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14562 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14563 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14564 }
14565 }
14566 }
14567
14568 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14569 // Here, T can be 1 or -1, depending on the type of the setcc and
14570 // getBooleanContents().
14571 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14572
14573 // To determine the "true" side of the select, we need to know the high bit
14574 // of the value returned by the setcc if it evaluates to true.
14575 // If the type of the setcc is i1, then the true case of the select is just
14576 // sext(i1 1), that is, -1.
14577 // If the type of the setcc is larger (say, i8) then the value of the high
14578 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14579 // of the appropriate width.
14580 SDValue ExtTrueVal = (SetCCWidth == 1)
14581 ? DAG.getAllOnesConstant(DL, VT)
14582 : DAG.getBoolConstant(true, DL, VT, N00VT);
14583 SDValue Zero = DAG.getConstant(0, DL, VT);
14584 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14585 return SCC;
14586
14587 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14588 EVT SetCCVT = getSetCCResultType(N00VT);
14589 // Don't do this transform for i1 because there's a select transform
14590 // that would reverse it.
14591 // TODO: We should not do this transform at all without a target hook
14592 // because a sext is likely cheaper than a select?
14593 if (SetCCVT.getScalarSizeInBits() != 1 &&
14594 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14595 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14596 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14597 }
14598 }
14599
14600 return SDValue();
14601}
14602
14603SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14604 SDValue N0 = N->getOperand(0);
14605 EVT VT = N->getValueType(0);
14606 SDLoc DL(N);
14607
14608 if (VT.isVector())
14609 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14610 return FoldedVOp;
14611
14612 // sext(undef) = 0 because the top bit will all be the same.
14613 if (N0.isUndef())
14614 return DAG.getConstant(0, DL, VT);
14615
14616 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14617 return Res;
14618
14619 // fold (sext (sext x)) -> (sext x)
14620 // fold (sext (aext x)) -> (sext x)
14621 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14622 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14623
14624 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14625 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14628 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14629 N0.getOperand(0));
14630
14631 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14632 SDValue N00 = N0.getOperand(0);
14633 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14634 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14635 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14636 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14637 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14638 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14639 }
14640
14641 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14642 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14643 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14644 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14645 N0->getOperand(1));
14646 }
14647 }
14648 }
14649
14650 if (N0.getOpcode() == ISD::TRUNCATE) {
14651 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14652 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14653 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14654 SDNode *oye = N0.getOperand(0).getNode();
14655 if (NarrowLoad.getNode() != N0.getNode()) {
14656 CombineTo(N0.getNode(), NarrowLoad);
14657 // CombineTo deleted the truncate, if needed, but not what's under it.
14658 AddToWorklist(oye);
14659 }
14660 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14661 }
14662
14663 // See if the value being truncated is already sign extended. If so, just
14664 // eliminate the trunc/sext pair.
14665 SDValue Op = N0.getOperand(0);
14666 unsigned OpBits = Op.getScalarValueSizeInBits();
14667 unsigned MidBits = N0.getScalarValueSizeInBits();
14668 unsigned DestBits = VT.getScalarSizeInBits();
14669
14670 if (N0->getFlags().hasNoSignedWrap() ||
14671 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14672 if (OpBits == DestBits) {
14673 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14674 // bits, it is already ready.
14675 return Op;
14676 }
14677
14678 if (OpBits < DestBits) {
14679 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14680 // bits, just sext from i32.
14681 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14682 }
14683
14684 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14685 // bits, just truncate to i32.
14686 SDNodeFlags Flags;
14687 Flags.setNoSignedWrap(true);
14688 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14689 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14690 }
14691
14692 // fold (sext (truncate x)) -> (sextinreg x).
14693 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14694 N0.getValueType())) {
14695 if (OpBits < DestBits)
14696 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14697 else if (OpBits > DestBits)
14698 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14699 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14700 DAG.getValueType(N0.getValueType()));
14701 }
14702 }
14703
14704 // Try to simplify (sext (load x)).
14705 if (SDValue foldedExt =
14706 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14708 return foldedExt;
14709
14710 if (SDValue foldedExt =
14711 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14713 return foldedExt;
14714
14715 // fold (sext (load x)) to multiple smaller sextloads.
14716 // Only on illegal but splittable vectors.
14717 if (SDValue ExtLoad = CombineExtLoad(N))
14718 return ExtLoad;
14719
14720 // Try to simplify (sext (sextload x)).
14721 if (SDValue foldedExt = tryToFoldExtOfExtload(
14722 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14723 return foldedExt;
14724
14725 // Try to simplify (sext (atomic_load x)).
14726 if (SDValue foldedExt =
14727 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14728 return foldedExt;
14729
14730 // fold (sext (and/or/xor (load x), cst)) ->
14731 // (and/or/xor (sextload x), (sext cst))
14732 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14733 isa<LoadSDNode>(N0.getOperand(0)) &&
14734 N0.getOperand(1).getOpcode() == ISD::Constant &&
14735 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14736 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14737 EVT MemVT = LN00->getMemoryVT();
14738 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14739 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14741 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14742 ISD::SIGN_EXTEND, SetCCs, TLI);
14743 if (DoXform) {
14744 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14745 LN00->getChain(), LN00->getBasePtr(),
14746 LN00->getMemoryVT(),
14747 LN00->getMemOperand());
14748 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14749 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14750 ExtLoad, DAG.getConstant(Mask, DL, VT));
14751 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14752 bool NoReplaceTruncAnd = !N0.hasOneUse();
14753 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14754 CombineTo(N, And);
14755 // If N0 has multiple uses, change other uses as well.
14756 if (NoReplaceTruncAnd) {
14757 SDValue TruncAnd =
14759 CombineTo(N0.getNode(), TruncAnd);
14760 }
14761 if (NoReplaceTrunc) {
14762 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14763 } else {
14764 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14765 LN00->getValueType(0), ExtLoad);
14766 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14767 }
14768 return SDValue(N,0); // Return N so it doesn't get rechecked!
14769 }
14770 }
14771 }
14772
14773 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14774 return V;
14775
14776 if (SDValue V = foldSextSetcc(N))
14777 return V;
14778
14779 // fold (sext x) -> (zext x) if the sign bit is known zero.
14780 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14781 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14782 DAG.SignBitIsZero(N0))
14783 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14784
14785 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14786 return NewVSel;
14787
14788 // Eliminate this sign extend by doing a negation in the destination type:
14789 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14790 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14794 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14795 return DAG.getNegative(Zext, DL, VT);
14796 }
14797 // Eliminate this sign extend by doing a decrement in the destination type:
14798 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14799 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14803 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14804 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14805 }
14806
14807 // fold sext (not i1 X) -> add (zext i1 X), -1
14808 // TODO: This could be extended to handle bool vectors.
14809 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14810 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14811 TLI.isOperationLegal(ISD::ADD, VT)))) {
14812 // If we can eliminate the 'not', the sext form should be better
14813 if (SDValue NewXor = visitXOR(N0.getNode())) {
14814 // Returning N0 is a form of in-visit replacement that may have
14815 // invalidated N0.
14816 if (NewXor.getNode() == N0.getNode()) {
14817 // Return SDValue here as the xor should have already been replaced in
14818 // this sext.
14819 return SDValue();
14820 }
14821
14822 // Return a new sext with the new xor.
14823 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14824 }
14825
14826 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14827 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14828 }
14829
14830 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14831 return Res;
14832
14833 return SDValue();
14834}
14835
14836/// Given an extending node with a pop-count operand, if the target does not
14837/// support a pop-count in the narrow source type but does support it in the
14838/// destination type, widen the pop-count to the destination type.
14839static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14840 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14841 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14842 "Expected extend op");
14843
14844 SDValue CtPop = Extend->getOperand(0);
14845 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14846 return SDValue();
14847
14848 EVT VT = Extend->getValueType(0);
14849 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14852 return SDValue();
14853
14854 // zext (ctpop X) --> ctpop (zext X)
14855 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14856 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14857}
14858
14859// If we have (zext (abs X)) where X is a type that will be promoted by type
14860// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14861static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14862 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14863
14864 EVT VT = Extend->getValueType(0);
14865 if (VT.isVector())
14866 return SDValue();
14867
14868 SDValue Abs = Extend->getOperand(0);
14869 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14870 return SDValue();
14871
14872 EVT AbsVT = Abs.getValueType();
14873 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14874 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14876 return SDValue();
14877
14878 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
14879
14880 SDValue SExt =
14881 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
14882 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
14883 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
14884}
14885
14886SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
14887 SDValue N0 = N->getOperand(0);
14888 EVT VT = N->getValueType(0);
14889 SDLoc DL(N);
14890
14891 if (VT.isVector())
14892 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14893 return FoldedVOp;
14894
14895 // zext(undef) = 0
14896 if (N0.isUndef())
14897 return DAG.getConstant(0, DL, VT);
14898
14899 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14900 return Res;
14901
14902 // fold (zext (zext x)) -> (zext x)
14903 // fold (zext (aext x)) -> (zext x)
14904 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14905 SDNodeFlags Flags;
14906 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14907 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14908 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
14909 }
14910
14911 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14912 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14915 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
14916
14917 // fold (zext (truncate x)) -> (zext x) or
14918 // (zext (truncate x)) -> (truncate x)
14919 // This is valid when the truncated bits of x are already zero.
14920 SDValue Op;
14921 KnownBits Known;
14922 if (isTruncateOf(DAG, N0, Op, Known)) {
14923 APInt TruncatedBits =
14924 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
14925 APInt(Op.getScalarValueSizeInBits(), 0) :
14926 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
14927 N0.getScalarValueSizeInBits(),
14928 std::min(Op.getScalarValueSizeInBits(),
14929 VT.getScalarSizeInBits()));
14930 if (TruncatedBits.isSubsetOf(Known.Zero)) {
14931 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14932 DAG.salvageDebugInfo(*N0.getNode());
14933
14934 return ZExtOrTrunc;
14935 }
14936 }
14937
14938 // fold (zext (truncate x)) -> (and x, mask)
14939 if (N0.getOpcode() == ISD::TRUNCATE) {
14940 // fold (zext (truncate (load x))) -> (zext (smaller load x))
14941 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
14942 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14943 SDNode *oye = N0.getOperand(0).getNode();
14944 if (NarrowLoad.getNode() != N0.getNode()) {
14945 CombineTo(N0.getNode(), NarrowLoad);
14946 // CombineTo deleted the truncate, if needed, but not what's under it.
14947 AddToWorklist(oye);
14948 }
14949 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14950 }
14951
14952 EVT SrcVT = N0.getOperand(0).getValueType();
14953 EVT MinVT = N0.getValueType();
14954
14955 if (N->getFlags().hasNonNeg()) {
14956 SDValue Op = N0.getOperand(0);
14957 unsigned OpBits = SrcVT.getScalarSizeInBits();
14958 unsigned MidBits = MinVT.getScalarSizeInBits();
14959 unsigned DestBits = VT.getScalarSizeInBits();
14960
14961 if (N0->getFlags().hasNoSignedWrap() ||
14962 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14963 if (OpBits == DestBits) {
14964 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14965 // bits, it is already ready.
14966 return Op;
14967 }
14968
14969 if (OpBits < DestBits) {
14970 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14971 // bits, just sext from i32.
14972 // FIXME: This can probably be ZERO_EXTEND nneg?
14973 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14974 }
14975
14976 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14977 // bits, just truncate to i32.
14978 SDNodeFlags Flags;
14979 Flags.setNoSignedWrap(true);
14980 Flags.setNoUnsignedWrap(true);
14981 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14982 }
14983 }
14984
14985 // Try to mask before the extension to avoid having to generate a larger mask,
14986 // possibly over several sub-vectors.
14987 if (SrcVT.bitsLT(VT) && VT.isVector()) {
14988 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
14990 SDValue Op = N0.getOperand(0);
14991 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
14992 AddToWorklist(Op.getNode());
14993 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
14994 // Transfer the debug info; the new node is equivalent to N0.
14995 DAG.transferDbgValues(N0, ZExtOrTrunc);
14996 return ZExtOrTrunc;
14997 }
14998 }
14999
15000 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
15001 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15002 AddToWorklist(Op.getNode());
15003 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
15004 // We may safely transfer the debug info describing the truncate node over
15005 // to the equivalent and operation.
15006 DAG.transferDbgValues(N0, And);
15007 return And;
15008 }
15009 }
15010
15011 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
15012 // if either of the casts is not free.
15013 if (N0.getOpcode() == ISD::AND &&
15014 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15015 N0.getOperand(1).getOpcode() == ISD::Constant &&
15016 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
15017 !TLI.isZExtFree(N0.getValueType(), VT))) {
15018 SDValue X = N0.getOperand(0).getOperand(0);
15019 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
15020 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15021 return DAG.getNode(ISD::AND, DL, VT,
15022 X, DAG.getConstant(Mask, DL, VT));
15023 }
15024
15025 // Try to simplify (zext (load x)).
15026 if (SDValue foldedExt = tryToFoldExtOfLoad(
15027 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
15028 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
15029 return foldedExt;
15030
15031 if (SDValue foldedExt =
15032 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15034 return foldedExt;
15035
15036 // fold (zext (load x)) to multiple smaller zextloads.
15037 // Only on illegal but splittable vectors.
15038 if (SDValue ExtLoad = CombineExtLoad(N))
15039 return ExtLoad;
15040
15041 // Try to simplify (zext (atomic_load x)).
15042 if (SDValue foldedExt =
15043 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
15044 return foldedExt;
15045
15046 // fold (zext (and/or/xor (load x), cst)) ->
15047 // (and/or/xor (zextload x), (zext cst))
15048 // Unless (and (load x) cst) will match as a zextload already and has
15049 // additional users, or the zext is already free.
15050 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
15051 isa<LoadSDNode>(N0.getOperand(0)) &&
15052 N0.getOperand(1).getOpcode() == ISD::Constant &&
15053 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15054 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15055 EVT MemVT = LN00->getMemoryVT();
15056 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
15057 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15058 bool DoXform = true;
15060 if (!N0.hasOneUse()) {
15061 if (N0.getOpcode() == ISD::AND) {
15062 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
15063 EVT LoadResultTy = AndC->getValueType(0);
15064 EVT ExtVT;
15065 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
15066 DoXform = false;
15067 }
15068 }
15069 if (DoXform)
15070 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15071 ISD::ZERO_EXTEND, SetCCs, TLI);
15072 if (DoXform) {
15073 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
15074 LN00->getChain(), LN00->getBasePtr(),
15075 LN00->getMemoryVT(),
15076 LN00->getMemOperand());
15077 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15078 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15079 ExtLoad, DAG.getConstant(Mask, DL, VT));
15080 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
15081 bool NoReplaceTruncAnd = !N0.hasOneUse();
15082 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15083 CombineTo(N, And);
15084 // If N0 has multiple uses, change other uses as well.
15085 if (NoReplaceTruncAnd) {
15086 SDValue TruncAnd =
15088 CombineTo(N0.getNode(), TruncAnd);
15089 }
15090 if (NoReplaceTrunc) {
15091 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15092 } else {
15093 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15094 LN00->getValueType(0), ExtLoad);
15095 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15096 }
15097 return SDValue(N,0); // Return N so it doesn't get rechecked!
15098 }
15099 }
15100 }
15101
15102 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15103 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15104 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15105 return ZExtLoad;
15106
15107 // Try to simplify (zext (zextload x)).
15108 if (SDValue foldedExt = tryToFoldExtOfExtload(
15109 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15110 return foldedExt;
15111
15112 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15113 return V;
15114
15115 if (N0.getOpcode() == ISD::SETCC) {
15116 // Propagate fast-math-flags.
15117 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15118
15119 // Only do this before legalize for now.
15120 if (!LegalOperations && VT.isVector() &&
15121 N0.getValueType().getVectorElementType() == MVT::i1) {
15122 EVT N00VT = N0.getOperand(0).getValueType();
15123 if (getSetCCResultType(N00VT) == N0.getValueType())
15124 return SDValue();
15125
15126 // We know that the # elements of the results is the same as the #
15127 // elements of the compare (and the # elements of the compare result for
15128 // that matter). Check to see that they are the same size. If so, we know
15129 // that the element size of the sext'd result matches the element size of
15130 // the compare operands.
15131 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15132 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15133 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15134 N0.getOperand(1), N0.getOperand(2));
15135 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15136 }
15137
15138 // If the desired elements are smaller or larger than the source
15139 // elements we can use a matching integer vector type and then
15140 // truncate/any extend followed by zext_in_reg.
15141 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15142 SDValue VsetCC =
15143 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15144 N0.getOperand(1), N0.getOperand(2));
15145 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15146 N0.getValueType());
15147 }
15148
15149 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15150 EVT N0VT = N0.getValueType();
15151 EVT N00VT = N0.getOperand(0).getValueType();
15152 if (SDValue SCC = SimplifySelectCC(
15153 DL, N0.getOperand(0), N0.getOperand(1),
15154 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15155 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15156 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15157 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15158 }
15159
15160 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15161 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15162 !TLI.isZExtFree(N0, VT)) {
15163 SDValue ShVal = N0.getOperand(0);
15164 SDValue ShAmt = N0.getOperand(1);
15165 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15166 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15167 if (N0.getOpcode() == ISD::SHL) {
15168 // If the original shl may be shifting out bits, do not perform this
15169 // transformation.
15170 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15171 ShVal.getOperand(0).getValueSizeInBits();
15172 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15173 // If the shift is too large, then see if we can deduce that the
15174 // shift is safe anyway.
15175
15176 // Check if the bits being shifted out are known to be zero.
15177 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15178 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15179 return SDValue();
15180 }
15181 }
15182
15183 // Ensure that the shift amount is wide enough for the shifted value.
15184 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15185 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15186
15187 return DAG.getNode(N0.getOpcode(), DL, VT,
15188 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15189 }
15190 }
15191 }
15192
15193 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15194 return NewVSel;
15195
15196 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15197 return NewCtPop;
15198
15199 if (SDValue V = widenAbs(N, DAG))
15200 return V;
15201
15202 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15203 return Res;
15204
15205 // CSE zext nneg with sext if the zext is not free.
15206 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15207 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15208 if (CSENode)
15209 return SDValue(CSENode, 0);
15210 }
15211
15212 return SDValue();
15213}
15214
15215SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15216 SDValue N0 = N->getOperand(0);
15217 EVT VT = N->getValueType(0);
15218 SDLoc DL(N);
15219
15220 // aext(undef) = undef
15221 if (N0.isUndef())
15222 return DAG.getUNDEF(VT);
15223
15224 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15225 return Res;
15226
15227 // fold (aext (aext x)) -> (aext x)
15228 // fold (aext (zext x)) -> (zext x)
15229 // fold (aext (sext x)) -> (sext x)
15230 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15231 N0.getOpcode() == ISD::SIGN_EXTEND) {
15232 SDNodeFlags Flags;
15233 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15234 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15235 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15236 }
15237
15238 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15239 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15240 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15244 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15245
15246 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15247 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15248 if (N0.getOpcode() == ISD::TRUNCATE) {
15249 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15250 SDNode *oye = N0.getOperand(0).getNode();
15251 if (NarrowLoad.getNode() != N0.getNode()) {
15252 CombineTo(N0.getNode(), NarrowLoad);
15253 // CombineTo deleted the truncate, if needed, but not what's under it.
15254 AddToWorklist(oye);
15255 }
15256 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15257 }
15258 }
15259
15260 // fold (aext (truncate x))
15261 if (N0.getOpcode() == ISD::TRUNCATE)
15262 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15263
15264 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15265 // if the trunc is not free.
15266 if (N0.getOpcode() == ISD::AND &&
15267 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15268 N0.getOperand(1).getOpcode() == ISD::Constant &&
15269 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15270 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15271 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15272 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15273 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15274 }
15275
15276 // fold (aext (load x)) -> (aext (truncate (extload x)))
15277 // None of the supported targets knows how to perform load and any_ext
15278 // on vectors in one instruction, so attempt to fold to zext instead.
15279 if (VT.isVector()) {
15280 // Try to simplify (zext (load x)).
15281 if (SDValue foldedExt =
15282 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15284 return foldedExt;
15285 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15288 bool DoXform = true;
15290 if (!N0.hasOneUse())
15291 DoXform =
15292 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15293 if (DoXform) {
15294 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15295 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15296 LN0->getBasePtr(), N0.getValueType(),
15297 LN0->getMemOperand());
15298 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15299 // If the load value is used only by N, replace it via CombineTo N.
15300 bool NoReplaceTrunc = N0.hasOneUse();
15301 CombineTo(N, ExtLoad);
15302 if (NoReplaceTrunc) {
15303 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15304 recursivelyDeleteUnusedNodes(LN0);
15305 } else {
15306 SDValue Trunc =
15307 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15308 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15309 }
15310 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15311 }
15312 }
15313
15314 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15315 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15316 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15317 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15318 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15319 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15320 ISD::LoadExtType ExtType = LN0->getExtensionType();
15321 EVT MemVT = LN0->getMemoryVT();
15322 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15323 SDValue ExtLoad =
15324 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15325 MemVT, LN0->getMemOperand());
15326 CombineTo(N, ExtLoad);
15327 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15328 recursivelyDeleteUnusedNodes(LN0);
15329 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15330 }
15331 }
15332
15333 if (N0.getOpcode() == ISD::SETCC) {
15334 // Propagate fast-math-flags.
15335 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15336
15337 // For vectors:
15338 // aext(setcc) -> vsetcc
15339 // aext(setcc) -> truncate(vsetcc)
15340 // aext(setcc) -> aext(vsetcc)
15341 // Only do this before legalize for now.
15342 if (VT.isVector() && !LegalOperations) {
15343 EVT N00VT = N0.getOperand(0).getValueType();
15344 if (getSetCCResultType(N00VT) == N0.getValueType())
15345 return SDValue();
15346
15347 // We know that the # elements of the results is the same as the
15348 // # elements of the compare (and the # elements of the compare result
15349 // for that matter). Check to see that they are the same size. If so,
15350 // we know that the element size of the sext'd result matches the
15351 // element size of the compare operands.
15352 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15353 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15354 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15355
15356 // If the desired elements are smaller or larger than the source
15357 // elements we can use a matching integer vector type and then
15358 // truncate/any extend
15359 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15360 SDValue VsetCC = DAG.getSetCC(
15361 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15362 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15363 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15364 }
15365
15366 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15367 if (SDValue SCC = SimplifySelectCC(
15368 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15369 DAG.getConstant(0, DL, VT),
15370 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15371 return SCC;
15372 }
15373
15374 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15375 return NewCtPop;
15376
15377 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15378 return Res;
15379
15380 return SDValue();
15381}
15382
15383SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15384 unsigned Opcode = N->getOpcode();
15385 SDValue N0 = N->getOperand(0);
15386 SDValue N1 = N->getOperand(1);
15387 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15388
15389 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15390 if (N0.getOpcode() == Opcode &&
15391 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15392 return N0;
15393
15394 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15395 N0.getOperand(0).getOpcode() == Opcode) {
15396 // We have an assert, truncate, assert sandwich. Make one stronger assert
15397 // by asserting on the smallest asserted type to the larger source type.
15398 // This eliminates the later assert:
15399 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15400 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15401 SDLoc DL(N);
15402 SDValue BigA = N0.getOperand(0);
15403 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15404 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15405 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15406 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15407 BigA.getOperand(0), MinAssertVTVal);
15408 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15409 }
15410
15411 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15412 // than X. Just move the AssertZext in front of the truncate and drop the
15413 // AssertSExt.
15414 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15416 Opcode == ISD::AssertZext) {
15417 SDValue BigA = N0.getOperand(0);
15418 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15419 if (AssertVT.bitsLT(BigA_AssertVT)) {
15420 SDLoc DL(N);
15421 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15422 BigA.getOperand(0), N1);
15423 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15424 }
15425 }
15426
15427 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15429 const APInt &Mask = N0.getConstantOperandAPInt(1);
15430
15431 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15432 // than X, and the And doesn't change the lower iX bits, we can move the
15433 // AssertZext in front of the And and drop the AssertSext.
15434 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15435 SDValue BigA = N0.getOperand(0);
15436 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15437 if (AssertVT.bitsLT(BigA_AssertVT) &&
15438 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15439 SDLoc DL(N);
15440 SDValue NewAssert =
15441 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15442 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15443 N0.getOperand(1));
15444 }
15445 }
15446
15447 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15448 // fail.
15449 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15450 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15451 return N0;
15452 }
15453
15454 return SDValue();
15455}
15456
15457SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15458 SDLoc DL(N);
15459
15460 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15461 SDValue N0 = N->getOperand(0);
15462
15463 // Fold (assertalign (assertalign x, AL0), AL1) ->
15464 // (assertalign x, max(AL0, AL1))
15465 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15466 return DAG.getAssertAlign(DL, N0.getOperand(0),
15467 std::max(AL, AAN->getAlign()));
15468
15469 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15470 // this assert down to source operands so that those arithmetic ops could be
15471 // exposed to the DAG combining.
15472 switch (N0.getOpcode()) {
15473 default:
15474 break;
15475 case ISD::ADD:
15476 case ISD::PTRADD:
15477 case ISD::SUB: {
15478 unsigned AlignShift = Log2(AL);
15479 SDValue LHS = N0.getOperand(0);
15480 SDValue RHS = N0.getOperand(1);
15481 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15482 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15483 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15484 if (LHSAlignShift < AlignShift)
15485 LHS = DAG.getAssertAlign(DL, LHS, AL);
15486 if (RHSAlignShift < AlignShift)
15487 RHS = DAG.getAssertAlign(DL, RHS, AL);
15488 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15489 }
15490 break;
15491 }
15492 }
15493
15494 return SDValue();
15495}
15496
15497/// If the result of a load is shifted/masked/truncated to an effectively
15498/// narrower type, try to transform the load to a narrower type and/or
15499/// use an extending load.
15500SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15501 unsigned Opc = N->getOpcode();
15502
15504 SDValue N0 = N->getOperand(0);
15505 EVT VT = N->getValueType(0);
15506 EVT ExtVT = VT;
15507
15508 // This transformation isn't valid for vector loads.
15509 if (VT.isVector())
15510 return SDValue();
15511
15512 // The ShAmt variable is used to indicate that we've consumed a right
15513 // shift. I.e. we want to narrow the width of the load by skipping to load the
15514 // ShAmt least significant bits.
15515 unsigned ShAmt = 0;
15516 // A special case is when the least significant bits from the load are masked
15517 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15518 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15519 // the result.
15520 unsigned ShiftedOffset = 0;
15521 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15522 // extended to VT.
15523 if (Opc == ISD::SIGN_EXTEND_INREG) {
15524 ExtType = ISD::SEXTLOAD;
15525 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15526 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15527 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15528 // value, or it may be shifting a higher subword, half or byte into the
15529 // lowest bits.
15530
15531 // Only handle shift with constant shift amount, and the shiftee must be a
15532 // load.
15533 auto *LN = dyn_cast<LoadSDNode>(N0);
15534 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15535 if (!N1C || !LN)
15536 return SDValue();
15537 // If the shift amount is larger than the memory type then we're not
15538 // accessing any of the loaded bytes.
15539 ShAmt = N1C->getZExtValue();
15540 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15541 if (MemoryWidth <= ShAmt)
15542 return SDValue();
15543 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15544 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15545 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15546 // If original load is a SEXTLOAD then we can't simply replace it by a
15547 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15548 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15549 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15550 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15551 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15552 LN->getExtensionType() != ExtType)
15553 return SDValue();
15554 } else if (Opc == ISD::AND) {
15555 // An AND with a constant mask is the same as a truncate + zero-extend.
15556 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15557 if (!AndC)
15558 return SDValue();
15559
15560 const APInt &Mask = AndC->getAPIntValue();
15561 unsigned ActiveBits = 0;
15562 if (Mask.isMask()) {
15563 ActiveBits = Mask.countr_one();
15564 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15565 ShiftedOffset = ShAmt;
15566 } else {
15567 return SDValue();
15568 }
15569
15570 ExtType = ISD::ZEXTLOAD;
15571 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15572 }
15573
15574 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15575 // a right shift. Here we redo some of those checks, to possibly adjust the
15576 // ExtVT even further based on "a masking AND". We could also end up here for
15577 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15578 // need to be done here as well.
15579 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15580 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15581 // Bail out when the SRL has more than one use. This is done for historical
15582 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15583 // check below? And maybe it could be non-profitable to do the transform in
15584 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15585 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15586 if (!SRL.hasOneUse())
15587 return SDValue();
15588
15589 // Only handle shift with constant shift amount, and the shiftee must be a
15590 // load.
15591 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15592 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15593 if (!SRL1C || !LN)
15594 return SDValue();
15595
15596 // If the shift amount is larger than the input type then we're not
15597 // accessing any of the loaded bytes. If the load was a zextload/extload
15598 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15599 ShAmt = SRL1C->getZExtValue();
15600 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15601 if (ShAmt >= MemoryWidth)
15602 return SDValue();
15603
15604 // Because a SRL must be assumed to *need* to zero-extend the high bits
15605 // (as opposed to anyext the high bits), we can't combine the zextload
15606 // lowering of SRL and an sextload.
15607 if (LN->getExtensionType() == ISD::SEXTLOAD)
15608 return SDValue();
15609
15610 // Avoid reading outside the memory accessed by the original load (could
15611 // happened if we only adjust the load base pointer by ShAmt). Instead we
15612 // try to narrow the load even further. The typical scenario here is:
15613 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15614 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15615 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15616 // Don't replace sextload by zextload.
15617 if (ExtType == ISD::SEXTLOAD)
15618 return SDValue();
15619 // Narrow the load.
15620 ExtType = ISD::ZEXTLOAD;
15621 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15622 }
15623
15624 // If the SRL is only used by a masking AND, we may be able to adjust
15625 // the ExtVT to make the AND redundant.
15626 SDNode *Mask = *(SRL->user_begin());
15627 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15628 isa<ConstantSDNode>(Mask->getOperand(1))) {
15629 unsigned Offset, ActiveBits;
15630 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15631 if (ShiftMask.isMask()) {
15632 EVT MaskedVT =
15633 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15634 // If the mask is smaller, recompute the type.
15635 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15636 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15637 ExtVT = MaskedVT;
15638 } else if (ExtType == ISD::ZEXTLOAD &&
15639 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15640 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15641 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15642 // If the mask is shifted we can use a narrower load and a shl to insert
15643 // the trailing zeros.
15644 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15645 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15646 ExtVT = MaskedVT;
15647 ShAmt = Offset + ShAmt;
15648 ShiftedOffset = Offset;
15649 }
15650 }
15651 }
15652
15653 N0 = SRL.getOperand(0);
15654 }
15655
15656 // If the load is shifted left (and the result isn't shifted back right), we
15657 // can fold a truncate through the shift. The typical scenario is that N
15658 // points at a TRUNCATE here so the attempted fold is:
15659 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15660 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15661 unsigned ShLeftAmt = 0;
15662 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15663 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15664 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15665 ShLeftAmt = N01->getZExtValue();
15666 N0 = N0.getOperand(0);
15667 }
15668 }
15669
15670 // If we haven't found a load, we can't narrow it.
15671 if (!isa<LoadSDNode>(N0))
15672 return SDValue();
15673
15674 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15675 // Reducing the width of a volatile load is illegal. For atomics, we may be
15676 // able to reduce the width provided we never widen again. (see D66309)
15677 if (!LN0->isSimple() ||
15678 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15679 return SDValue();
15680
15681 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15682 unsigned LVTStoreBits =
15684 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15685 return LVTStoreBits - EVTStoreBits - ShAmt;
15686 };
15687
15688 // We need to adjust the pointer to the load by ShAmt bits in order to load
15689 // the correct bytes.
15690 unsigned PtrAdjustmentInBits =
15691 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15692
15693 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15694 SDLoc DL(LN0);
15695 // The original load itself didn't wrap, so an offset within it doesn't.
15696 SDValue NewPtr =
15699 AddToWorklist(NewPtr.getNode());
15700
15701 SDValue Load;
15702 if (ExtType == ISD::NON_EXTLOAD) {
15703 const MDNode *OldRanges = LN0->getRanges();
15704 const MDNode *NewRanges = nullptr;
15705 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15706 // metadata is not the full-set for the new width then create a NewRanges
15707 // metadata for the truncated load
15708 if (ShAmt == 0 && OldRanges) {
15709 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15710 unsigned BitSize = VT.getScalarSizeInBits();
15711
15712 // It is possible for an 8-bit extending load with 8-bit range
15713 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15714 // ensure that truncation is strictly smaller.
15715 if (CR.getBitWidth() > BitSize) {
15716 ConstantRange TruncatedCR = CR.truncate(BitSize);
15717 if (!TruncatedCR.isFullSet()) {
15718 Metadata *Bounds[2] = {
15720 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15722 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15723 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15724 }
15725 } else if (CR.getBitWidth() == BitSize)
15726 NewRanges = OldRanges;
15727 }
15728 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15729 LN0->getPointerInfo().getWithOffset(PtrOff),
15730 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15731 LN0->getAAInfo(), NewRanges);
15732 } else
15733 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15734 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15735 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15736 LN0->getAAInfo());
15737
15738 // Replace the old load's chain with the new load's chain.
15739 WorklistRemover DeadNodes(*this);
15740 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15741
15742 // Shift the result left, if we've swallowed a left shift.
15744 if (ShLeftAmt != 0) {
15745 // If the shift amount is as large as the result size (but, presumably,
15746 // no larger than the source) then the useful bits of the result are
15747 // zero; we can't simply return the shortened shift, because the result
15748 // of that operation is undefined.
15749 if (ShLeftAmt >= VT.getScalarSizeInBits())
15750 Result = DAG.getConstant(0, DL, VT);
15751 else
15752 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15753 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15754 }
15755
15756 if (ShiftedOffset != 0) {
15757 // We're using a shifted mask, so the load now has an offset. This means
15758 // that data has been loaded into the lower bytes than it would have been
15759 // before, so we need to shl the loaded data into the correct position in the
15760 // register.
15761 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15762 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15763 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15764 }
15765
15766 // Return the new loaded value.
15767 return Result;
15768}
15769
15770SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15771 SDValue N0 = N->getOperand(0);
15772 SDValue N1 = N->getOperand(1);
15773 EVT VT = N->getValueType(0);
15774 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15775 unsigned VTBits = VT.getScalarSizeInBits();
15776 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15777 SDLoc DL(N);
15778
15779 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15780 if (N0.isUndef())
15781 return DAG.getConstant(0, DL, VT);
15782
15783 // fold (sext_in_reg c1) -> c1
15784 if (SDValue C =
15786 return C;
15787
15788 // If the input is already sign extended, just drop the extension.
15789 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15790 return N0;
15791
15792 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15793 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15794 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15795 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15796
15797 // fold (sext_in_reg (sext x)) -> (sext x)
15798 // fold (sext_in_reg (aext x)) -> (sext x)
15799 // if x is small enough or if we know that x has more than 1 sign bit and the
15800 // sign_extend_inreg is extending from one of them.
15801 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15802 SDValue N00 = N0.getOperand(0);
15803 unsigned N00Bits = N00.getScalarValueSizeInBits();
15804 if ((N00Bits <= ExtVTBits ||
15805 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15806 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15807 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15808 }
15809
15810 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15811 // if x is small enough or if we know that x has more than 1 sign bit and the
15812 // sign_extend_inreg is extending from one of them.
15814 SDValue N00 = N0.getOperand(0);
15815 unsigned N00Bits = N00.getScalarValueSizeInBits();
15816 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15817 if ((N00Bits == ExtVTBits ||
15818 (!IsZext && (N00Bits < ExtVTBits ||
15819 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15820 (!LegalOperations ||
15822 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15823 }
15824
15825 // fold (sext_in_reg (zext x)) -> (sext x)
15826 // iff we are extending the source sign bit.
15827 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15828 SDValue N00 = N0.getOperand(0);
15829 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15830 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15831 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15832 }
15833
15834 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15835 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15836 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15837
15838 // fold operands of sext_in_reg based on knowledge that the top bits are not
15839 // demanded.
15841 return SDValue(N, 0);
15842
15843 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15844 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15845 if (SDValue NarrowLoad = reduceLoadWidth(N))
15846 return NarrowLoad;
15847
15848 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15849 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15850 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15851 if (N0.getOpcode() == ISD::SRL) {
15852 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15853 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15854 // We can turn this into an SRA iff the input to the SRL is already sign
15855 // extended enough.
15856 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15857 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15858 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15859 N0.getOperand(1));
15860 }
15861 }
15862
15863 // fold (sext_inreg (extload x)) -> (sextload x)
15864 // If sextload is not supported by target, we can only do the combine when
15865 // load has one use. Doing otherwise can block folding the extload with other
15866 // extends that the target does support.
15868 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15869 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15870 N0.hasOneUse()) ||
15871 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15872 auto *LN0 = cast<LoadSDNode>(N0);
15873 SDValue ExtLoad =
15874 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15875 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15876 CombineTo(N, ExtLoad);
15877 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15878 AddToWorklist(ExtLoad.getNode());
15879 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15880 }
15881
15882 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
15884 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15885 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
15886 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15887 auto *LN0 = cast<LoadSDNode>(N0);
15888 SDValue ExtLoad =
15889 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15890 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15891 CombineTo(N, ExtLoad);
15892 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15893 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15894 }
15895
15896 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
15897 // ignore it if the masked load is already sign extended
15898 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
15899 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
15900 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
15901 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
15902 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
15903 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
15904 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
15905 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
15906 CombineTo(N, ExtMaskedLoad);
15907 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
15908 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15909 }
15910 }
15911
15912 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
15913 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
15914 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
15916 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
15917 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
15918
15919 SDValue ExtLoad = DAG.getMaskedGather(
15920 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
15921 GN0->getIndexType(), ISD::SEXTLOAD);
15922
15923 CombineTo(N, ExtLoad);
15924 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
15925 AddToWorklist(ExtLoad.getNode());
15926 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15927 }
15928 }
15929
15930 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
15931 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
15932 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
15933 N0.getOperand(1), false))
15934 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
15935 }
15936
15937 // Fold (iM_signext_inreg
15938 // (extract_subvector (zext|anyext|sext iN_v to _) _)
15939 // from iN)
15940 // -> (extract_subvector (signext iN_v to iM))
15941 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
15943 SDValue InnerExt = N0.getOperand(0);
15944 EVT InnerExtVT = InnerExt->getValueType(0);
15945 SDValue Extendee = InnerExt->getOperand(0);
15946
15947 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
15948 (!LegalOperations ||
15949 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
15950 SDValue SignExtExtendee =
15951 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
15952 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
15953 N0.getOperand(1));
15954 }
15955 }
15956
15957 return SDValue();
15958}
15959
15961 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
15962 bool LegalOperations) {
15963 unsigned InregOpcode = N->getOpcode();
15964 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
15965
15966 SDValue Src = N->getOperand(0);
15967 EVT VT = N->getValueType(0);
15968 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
15969 Src.getValueType().getVectorElementType(),
15971
15972 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
15973 "Expected EXTEND_VECTOR_INREG dag node in input!");
15974
15975 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
15976 // FIXME: one-use check may be overly restrictive
15977 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
15978 return SDValue();
15979
15980 // Profitability check: we must be extending exactly one of it's operands.
15981 // FIXME: this is probably overly restrictive.
15982 Src = Src.getOperand(0);
15983 if (Src.getValueType() != SrcVT)
15984 return SDValue();
15985
15986 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
15987 return SDValue();
15988
15989 return DAG.getNode(Opcode, DL, VT, Src);
15990}
15991
15992SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
15993 SDValue N0 = N->getOperand(0);
15994 EVT VT = N->getValueType(0);
15995 SDLoc DL(N);
15996
15997 if (N0.isUndef()) {
15998 // aext_vector_inreg(undef) = undef because the top bits are undefined.
15999 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
16000 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
16001 ? DAG.getUNDEF(VT)
16002 : DAG.getConstant(0, DL, VT);
16003 }
16004
16005 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
16006 return Res;
16007
16009 return SDValue(N, 0);
16010
16012 LegalOperations))
16013 return R;
16014
16015 return SDValue();
16016}
16017
16018SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
16019 EVT VT = N->getValueType(0);
16020 SDValue N0 = N->getOperand(0);
16021
16022 SDValue FPVal;
16023 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
16025 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
16026 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
16027 DAG.getValueType(VT.getScalarType()));
16028
16029 return SDValue();
16030}
16031
16032/// Detect patterns of truncation with unsigned saturation:
16033///
16034/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
16035/// Return the source value x to be truncated or SDValue() if the pattern was
16036/// not matched.
16037///
16039 unsigned NumDstBits = VT.getScalarSizeInBits();
16040 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16041 // Saturation with truncation. We truncate from InVT to VT.
16042 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16043
16044 SDValue Min;
16045 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16046 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
16047 return Min;
16048
16049 return SDValue();
16050}
16051
16052/// Detect patterns of truncation with signed saturation:
16053/// (truncate (smin (smax (x, signed_min_of_dest_type),
16054/// signed_max_of_dest_type)) to dest_type)
16055/// or:
16056/// (truncate (smax (smin (x, signed_max_of_dest_type),
16057/// signed_min_of_dest_type)) to dest_type).
16058///
16059/// Return the source value to be truncated or SDValue() if the pattern was not
16060/// matched.
16062 unsigned NumDstBits = VT.getScalarSizeInBits();
16063 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16064 // Saturation with truncation. We truncate from InVT to VT.
16065 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16066
16067 SDValue Val;
16068 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16069 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16070
16071 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
16072 m_SpecificInt(SignedMax))))
16073 return Val;
16074
16075 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
16076 m_SpecificInt(SignedMin))))
16077 return Val;
16078
16079 return SDValue();
16080}
16081
16082/// Detect patterns of truncation with unsigned saturation:
16084 const SDLoc &DL) {
16085 unsigned NumDstBits = VT.getScalarSizeInBits();
16086 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16087 // Saturation with truncation. We truncate from InVT to VT.
16088 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16089
16090 SDValue Val;
16091 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16092 // Min == 0, Max is unsigned max of destination type.
16093 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16094 m_Zero())))
16095 return Val;
16096
16097 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16098 m_SpecificInt(UnsignedMax))))
16099 return Val;
16100
16101 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16102 m_SpecificInt(UnsignedMax))))
16103 return Val;
16104
16105 return SDValue();
16106}
16107
16108static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16109 SDLoc &DL, const TargetLowering &TLI,
16110 SelectionDAG &DAG) {
16111 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16112 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
16113 TLI.isTypeDesirableForOp(Opc, VT));
16114 };
16115
16116 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
16117 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16118 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
16119 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
16120 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16121 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16122 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16123 } else if (Src.getOpcode() == ISD::UMIN) {
16124 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16125 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16126 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16127 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
16128 if (SDValue USatVal = detectUSatUPattern(Src, VT))
16129 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
16130 }
16131
16132 return SDValue();
16133}
16134
16135SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16136 SDValue N0 = N->getOperand(0);
16137 EVT VT = N->getValueType(0);
16138 EVT SrcVT = N0.getValueType();
16139 bool isLE = DAG.getDataLayout().isLittleEndian();
16140 SDLoc DL(N);
16141
16142 // trunc(undef) = undef
16143 if (N0.isUndef())
16144 return DAG.getUNDEF(VT);
16145
16146 // fold (truncate (truncate x)) -> (truncate x)
16147 if (N0.getOpcode() == ISD::TRUNCATE)
16148 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16149
16150 // fold saturated truncate
16151 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16152 return SaturatedTR;
16153
16154 // fold (truncate c1) -> c1
16155 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16156 return C;
16157
16158 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16159 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16160 N0.getOpcode() == ISD::SIGN_EXTEND ||
16161 N0.getOpcode() == ISD::ANY_EXTEND) {
16162 // if the source is smaller than the dest, we still need an extend.
16163 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16164 SDNodeFlags Flags;
16165 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16166 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16167 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16168 }
16169 // if the source is larger than the dest, than we just need the truncate.
16170 if (N0.getOperand(0).getValueType().bitsGT(VT))
16171 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16172 // if the source and dest are the same type, we can drop both the extend
16173 // and the truncate.
16174 return N0.getOperand(0);
16175 }
16176
16177 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16178 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16179 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16180 N0.hasOneUse()) {
16181 SDValue X = N0.getOperand(0);
16182 SDValue ExtVal = N0.getOperand(1);
16183 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16184 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16185 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16186 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16187 }
16188 }
16189
16190 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16191 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16192 return SDValue();
16193
16194 // Fold extract-and-trunc into a narrow extract. For example:
16195 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16196 // i32 y = TRUNCATE(i64 x)
16197 // -- becomes --
16198 // v16i8 b = BITCAST (v2i64 val)
16199 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16200 //
16201 // Note: We only run this optimization after type legalization (which often
16202 // creates this pattern) and before operation legalization after which
16203 // we need to be more careful about the vector instructions that we generate.
16204 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16205 N0->hasOneUse()) {
16206 EVT TrTy = N->getValueType(0);
16207 SDValue Src = N0;
16208
16209 // Check for cases where we shift down an upper element before truncation.
16210 int EltOffset = 0;
16211 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16212 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16213 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16214 Src = Src.getOperand(0);
16215 EltOffset = *ShAmt / TrTy.getSizeInBits();
16216 }
16217 }
16218 }
16219
16220 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16221 EVT VecTy = Src.getOperand(0).getValueType();
16222 EVT ExTy = Src.getValueType();
16223
16224 auto EltCnt = VecTy.getVectorElementCount();
16225 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16226 auto NewEltCnt = EltCnt * SizeRatio;
16227
16228 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16229 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16230
16231 SDValue EltNo = Src->getOperand(1);
16232 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16233 int Elt = EltNo->getAsZExtVal();
16234 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16235 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16236 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16237 DAG.getBitcast(NVT, Src.getOperand(0)),
16238 DAG.getVectorIdxConstant(Index, DL));
16239 }
16240 }
16241 }
16242
16243 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16244 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16245 TLI.isTruncateFree(SrcVT, VT)) {
16246 if (!LegalOperations ||
16247 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16248 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16249 SDLoc SL(N0);
16250 SDValue Cond = N0.getOperand(0);
16251 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16252 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16253 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16254 }
16255 }
16256
16257 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16258 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16259 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16260 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16261 SDValue Amt = N0.getOperand(1);
16262 KnownBits Known = DAG.computeKnownBits(Amt);
16263 unsigned Size = VT.getScalarSizeInBits();
16264 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16265 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16266 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16267 if (AmtVT != Amt.getValueType()) {
16268 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16269 AddToWorklist(Amt.getNode());
16270 }
16271 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16272 }
16273 }
16274
16275 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16276 return V;
16277
16278 if (SDValue ABD = foldABSToABD(N, DL))
16279 return ABD;
16280
16281 // Attempt to pre-truncate BUILD_VECTOR sources.
16282 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16283 N0.hasOneUse() &&
16284 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16285 // Avoid creating illegal types if running after type legalizer.
16286 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16287 EVT SVT = VT.getScalarType();
16288 SmallVector<SDValue, 8> TruncOps;
16289 for (const SDValue &Op : N0->op_values()) {
16290 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16291 TruncOps.push_back(TruncOp);
16292 }
16293 return DAG.getBuildVector(VT, DL, TruncOps);
16294 }
16295
16296 // trunc (splat_vector x) -> splat_vector (trunc x)
16297 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16298 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16299 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16300 EVT SVT = VT.getScalarType();
16301 return DAG.getSplatVector(
16302 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16303 }
16304
16305 // Fold a series of buildvector, bitcast, and truncate if possible.
16306 // For example fold
16307 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16308 // (2xi32 (buildvector x, y)).
16309 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16310 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16312 N0.getOperand(0).hasOneUse()) {
16313 SDValue BuildVect = N0.getOperand(0);
16314 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16315 EVT TruncVecEltTy = VT.getVectorElementType();
16316
16317 // Check that the element types match.
16318 if (BuildVectEltTy == TruncVecEltTy) {
16319 // Now we only need to compute the offset of the truncated elements.
16320 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16321 unsigned TruncVecNumElts = VT.getVectorNumElements();
16322 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16323 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16324
16325 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16326 "Invalid number of elements");
16327
16329 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16330 i += TruncEltOffset)
16331 Opnds.push_back(BuildVect.getOperand(i));
16332
16333 return DAG.getBuildVector(VT, DL, Opnds);
16334 }
16335 }
16336
16337 // fold (truncate (load x)) -> (smaller load x)
16338 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16339 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16340 if (SDValue Reduced = reduceLoadWidth(N))
16341 return Reduced;
16342
16343 // Handle the case where the truncated result is at least as wide as the
16344 // loaded type.
16345 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16346 auto *LN0 = cast<LoadSDNode>(N0);
16347 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16348 SDValue NewLoad = DAG.getExtLoad(
16349 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16350 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16351 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16352 return NewLoad;
16353 }
16354 }
16355 }
16356
16357 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16358 // where ... are all 'undef'.
16359 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16361 SDValue V;
16362 unsigned Idx = 0;
16363 unsigned NumDefs = 0;
16364
16365 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16366 SDValue X = N0.getOperand(i);
16367 if (!X.isUndef()) {
16368 V = X;
16369 Idx = i;
16370 NumDefs++;
16371 }
16372 // Stop if more than one members are non-undef.
16373 if (NumDefs > 1)
16374 break;
16375
16378 X.getValueType().getVectorElementCount()));
16379 }
16380
16381 if (NumDefs == 0)
16382 return DAG.getUNDEF(VT);
16383
16384 if (NumDefs == 1) {
16385 assert(V.getNode() && "The single defined operand is empty!");
16387 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16388 if (i != Idx) {
16389 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16390 continue;
16391 }
16392 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16393 AddToWorklist(NV.getNode());
16394 Opnds.push_back(NV);
16395 }
16396 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16397 }
16398 }
16399
16400 // Fold truncate of a bitcast of a vector to an extract of the low vector
16401 // element.
16402 //
16403 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16404 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16405 SDValue VecSrc = N0.getOperand(0);
16406 EVT VecSrcVT = VecSrc.getValueType();
16407 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16408 (!LegalOperations ||
16409 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16410 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16411 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16412 DAG.getVectorIdxConstant(Idx, DL));
16413 }
16414 }
16415
16416 // Simplify the operands using demanded-bits information.
16418 return SDValue(N, 0);
16419
16420 // fold (truncate (extract_subvector(ext x))) ->
16421 // (extract_subvector x)
16422 // TODO: This can be generalized to cover cases where the truncate and extract
16423 // do not fully cancel each other out.
16424 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16425 SDValue N00 = N0.getOperand(0);
16426 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16427 N00.getOpcode() == ISD::ZERO_EXTEND ||
16428 N00.getOpcode() == ISD::ANY_EXTEND) {
16429 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16431 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16432 N00.getOperand(0), N0.getOperand(1));
16433 }
16434 }
16435
16436 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16437 return NewVSel;
16438
16439 // Narrow a suitable binary operation with a non-opaque constant operand by
16440 // moving it ahead of the truncate. This is limited to pre-legalization
16441 // because targets may prefer a wider type during later combines and invert
16442 // this transform.
16443 switch (N0.getOpcode()) {
16444 case ISD::ADD:
16445 case ISD::SUB:
16446 case ISD::MUL:
16447 case ISD::AND:
16448 case ISD::OR:
16449 case ISD::XOR:
16450 if (!LegalOperations && N0.hasOneUse() &&
16451 (N0.getOperand(0) == N0.getOperand(1) ||
16453 isConstantOrConstantVector(N0.getOperand(1), true))) {
16454 // TODO: We already restricted this to pre-legalization, but for vectors
16455 // we are extra cautious to not create an unsupported operation.
16456 // Target-specific changes are likely needed to avoid regressions here.
16457 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16458 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16459 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16460 SDNodeFlags Flags;
16461 // Propagate nuw for sub.
16462 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16464 N0->getOperand(0),
16466 VT.getScalarSizeInBits())))
16467 Flags.setNoUnsignedWrap(true);
16468 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16469 }
16470 }
16471 break;
16472 case ISD::ADDE:
16473 case ISD::UADDO_CARRY:
16474 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16475 // (trunc uaddo_carry(X, Y, Carry)) ->
16476 // (uaddo_carry trunc(X), trunc(Y), Carry)
16477 // When the adde's carry is not used.
16478 // We only do for uaddo_carry before legalize operation
16479 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16480 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16481 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16482 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16483 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16484 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16485 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16486 }
16487 break;
16488 case ISD::USUBSAT:
16489 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16490 // enough to know that the upper bits are zero we must ensure that we don't
16491 // introduce an extra truncate.
16492 if (!LegalOperations && N0.hasOneUse() &&
16495 VT.getScalarSizeInBits() &&
16496 hasOperation(N0.getOpcode(), VT)) {
16497 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16498 DAG, DL);
16499 }
16500 break;
16501 case ISD::AVGCEILS:
16502 case ISD::AVGCEILU:
16503 // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
16504 // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
16505 if (N0.hasOneUse()) {
16506 SDValue Op0 = N0.getOperand(0);
16507 SDValue Op1 = N0.getOperand(1);
16508 if (N0.getOpcode() == ISD::AVGCEILU) {
16510 Op0.getOpcode() == ISD::SIGN_EXTEND &&
16511 Op1.getOpcode() == ISD::SIGN_EXTEND &&
16512 Op0.getOperand(0).getValueType() == VT &&
16513 Op1.getOperand(0).getValueType() == VT)
16514 return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0),
16515 Op1.getOperand(0));
16516 } else {
16518 Op0.getOpcode() == ISD::ZERO_EXTEND &&
16519 Op1.getOpcode() == ISD::ZERO_EXTEND &&
16520 Op0.getOperand(0).getValueType() == VT &&
16521 Op1.getOperand(0).getValueType() == VT)
16522 return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0),
16523 Op1.getOperand(0));
16524 }
16525 }
16526 [[fallthrough]];
16527 case ISD::AVGFLOORS:
16528 case ISD::AVGFLOORU:
16529 case ISD::ABDS:
16530 case ISD::ABDU:
16531 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16532 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16533 if (!LegalOperations && N0.hasOneUse() &&
16534 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16535 EVT TruncVT = VT;
16536 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16537 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16538
16539 SDValue A = N0.getOperand(0);
16540 SDValue B = N0.getOperand(1);
16541 bool CanFold = false;
16542
16543 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16544 N0.getOpcode() == ISD::ABDU) {
16545 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16546 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16547 DAG.MaskedValueIsZero(A, UpperBits);
16548 } else {
16549 unsigned NeededBits = SrcBits - TruncBits;
16550 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16551 DAG.ComputeNumSignBits(A) > NeededBits;
16552 }
16553
16554 if (CanFold) {
16555 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16556 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16557 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16558 }
16559 }
16560 break;
16561 }
16562
16563 return SDValue();
16564}
16565
16566static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16567 SDValue Elt = N->getOperand(i);
16568 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16569 return Elt.getNode();
16570 return Elt.getOperand(Elt.getResNo()).getNode();
16571}
16572
16573/// build_pair (load, load) -> load
16574/// if load locations are consecutive.
16575SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16576 assert(N->getOpcode() == ISD::BUILD_PAIR);
16577
16578 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16579 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16580
16581 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16582 // most significant part in elt 1. So when combining into one large load, we
16583 // need to consider the endianness.
16584 if (DAG.getDataLayout().isBigEndian())
16585 std::swap(LD1, LD2);
16586
16587 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16588 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16589 LD1->getAddressSpace() != LD2->getAddressSpace())
16590 return SDValue();
16591
16592 unsigned LD1Fast = 0;
16593 EVT LD1VT = LD1->getValueType(0);
16594 unsigned LD1Bytes = LD1VT.getStoreSize();
16595 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16596 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16597 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16598 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16599 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16600 LD1->getPointerInfo(), LD1->getAlign());
16601
16602 return SDValue();
16603}
16604
16605static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16606 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16607 // and Lo parts; on big-endian machines it doesn't.
16608 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16609}
16610
16611SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16612 const TargetLowering &TLI) {
16613 // If this is not a bitcast to an FP type or if the target doesn't have
16614 // IEEE754-compliant FP logic, we're done.
16615 EVT VT = N->getValueType(0);
16616 SDValue N0 = N->getOperand(0);
16617 EVT SourceVT = N0.getValueType();
16618
16619 if (!VT.isFloatingPoint())
16620 return SDValue();
16621
16622 // TODO: Handle cases where the integer constant is a different scalar
16623 // bitwidth to the FP.
16624 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16625 return SDValue();
16626
16627 unsigned FPOpcode;
16628 APInt SignMask;
16629 switch (N0.getOpcode()) {
16630 case ISD::AND:
16631 FPOpcode = ISD::FABS;
16632 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16633 break;
16634 case ISD::XOR:
16635 FPOpcode = ISD::FNEG;
16636 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16637 break;
16638 case ISD::OR:
16639 FPOpcode = ISD::FABS;
16640 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16641 break;
16642 default:
16643 return SDValue();
16644 }
16645
16646 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16647 return SDValue();
16648
16649 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16650 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16651 // removing this would require more changes.
16652 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16653 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16654 return true;
16655
16656 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16657 };
16658
16659 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16660 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16661 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16662 // fneg (fabs X)
16663 SDValue LogicOp0 = N0.getOperand(0);
16664 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16665 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16666 IsBitCastOrFree(LogicOp0, VT)) {
16667 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16668 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16669 NumFPLogicOpsConv++;
16670 if (N0.getOpcode() == ISD::OR)
16671 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16672 return FPOp;
16673 }
16674
16675 return SDValue();
16676}
16677
16678SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16679 SDValue N0 = N->getOperand(0);
16680 EVT VT = N->getValueType(0);
16681
16682 if (N0.isUndef())
16683 return DAG.getUNDEF(VT);
16684
16685 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16686 // Only do this before legalize types, unless both types are integer and the
16687 // scalar type is legal. Only do this before legalize ops, since the target
16688 // maybe depending on the bitcast.
16689 // First check to see if this is all constant.
16690 // TODO: Support FP bitcasts after legalize types.
16691 if (VT.isVector() &&
16692 (!LegalTypes ||
16693 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16694 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16695 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16696 cast<BuildVectorSDNode>(N0)->isConstant())
16697 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16699
16700 // If the input is a constant, let getNode fold it.
16701 if (isIntOrFPConstant(N0)) {
16702 // If we can't allow illegal operations, we need to check that this is just
16703 // a fp -> int or int -> conversion and that the resulting operation will
16704 // be legal.
16705 if (!LegalOperations ||
16706 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16708 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16709 TLI.isOperationLegal(ISD::Constant, VT))) {
16710 SDValue C = DAG.getBitcast(VT, N0);
16711 if (C.getNode() != N)
16712 return C;
16713 }
16714 }
16715
16716 // (conv (conv x, t1), t2) -> (conv x, t2)
16717 if (N0.getOpcode() == ISD::BITCAST)
16718 return DAG.getBitcast(VT, N0.getOperand(0));
16719
16720 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16721 // iff the current bitwise logicop type isn't legal
16722 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16723 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16724 auto IsFreeBitcast = [VT](SDValue V) {
16725 return (V.getOpcode() == ISD::BITCAST &&
16726 V.getOperand(0).getValueType() == VT) ||
16728 V->hasOneUse());
16729 };
16730 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16731 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16732 DAG.getBitcast(VT, N0.getOperand(0)),
16733 DAG.getBitcast(VT, N0.getOperand(1)));
16734 }
16735
16736 // fold (conv (load x)) -> (load (conv*)x)
16737 // If the resultant load doesn't need a higher alignment than the original!
16738 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16739 // Do not remove the cast if the types differ in endian layout.
16741 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
16742 // If the load is volatile, we only want to change the load type if the
16743 // resulting load is legal. Otherwise we might increase the number of
16744 // memory accesses. We don't care if the original type was legal or not
16745 // as we assume software couldn't rely on the number of accesses of an
16746 // illegal type.
16747 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
16748 TLI.isOperationLegal(ISD::LOAD, VT))) {
16749 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16750
16751 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16752 *LN0->getMemOperand())) {
16753 // If the range metadata type does not match the new memory
16754 // operation type, remove the range metadata.
16755 if (const MDNode *MD = LN0->getRanges()) {
16756 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16757 if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
16758 !VT.isInteger()) {
16759 LN0->getMemOperand()->clearRanges();
16760 }
16761 }
16762 SDValue Load =
16763 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
16764 LN0->getMemOperand());
16765 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16766 return Load;
16767 }
16768 }
16769
16770 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16771 return V;
16772
16773 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16774 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16775 //
16776 // For ppc_fp128:
16777 // fold (bitcast (fneg x)) ->
16778 // flipbit = signbit
16779 // (xor (bitcast x) (build_pair flipbit, flipbit))
16780 //
16781 // fold (bitcast (fabs x)) ->
16782 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16783 // (xor (bitcast x) (build_pair flipbit, flipbit))
16784 // This often reduces constant pool loads.
16785 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16786 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16787 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16788 !N0.getValueType().isVector()) {
16789 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16790 AddToWorklist(NewConv.getNode());
16791
16792 SDLoc DL(N);
16793 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16794 assert(VT.getSizeInBits() == 128);
16795 SDValue SignBit = DAG.getConstant(
16796 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16797 SDValue FlipBit;
16798 if (N0.getOpcode() == ISD::FNEG) {
16799 FlipBit = SignBit;
16800 AddToWorklist(FlipBit.getNode());
16801 } else {
16802 assert(N0.getOpcode() == ISD::FABS);
16803 SDValue Hi =
16804 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16806 SDLoc(NewConv)));
16807 AddToWorklist(Hi.getNode());
16808 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16809 AddToWorklist(FlipBit.getNode());
16810 }
16811 SDValue FlipBits =
16812 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16813 AddToWorklist(FlipBits.getNode());
16814 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16815 }
16816 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16817 if (N0.getOpcode() == ISD::FNEG)
16818 return DAG.getNode(ISD::XOR, DL, VT,
16819 NewConv, DAG.getConstant(SignBit, DL, VT));
16820 assert(N0.getOpcode() == ISD::FABS);
16821 return DAG.getNode(ISD::AND, DL, VT,
16822 NewConv, DAG.getConstant(~SignBit, DL, VT));
16823 }
16824
16825 // fold (bitconvert (fcopysign cst, x)) ->
16826 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16827 // Note that we don't handle (copysign x, cst) because this can always be
16828 // folded to an fneg or fabs.
16829 //
16830 // For ppc_fp128:
16831 // fold (bitcast (fcopysign cst, x)) ->
16832 // flipbit = (and (extract_element
16833 // (xor (bitcast cst), (bitcast x)), 0),
16834 // signbit)
16835 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16836 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16838 !VT.isVector()) {
16839 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16840 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16841 if (isTypeLegal(IntXVT)) {
16842 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16843 AddToWorklist(X.getNode());
16844
16845 // If X has a different width than the result/lhs, sext it or truncate it.
16846 unsigned VTWidth = VT.getSizeInBits();
16847 if (OrigXWidth < VTWidth) {
16848 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16849 AddToWorklist(X.getNode());
16850 } else if (OrigXWidth > VTWidth) {
16851 // To get the sign bit in the right place, we have to shift it right
16852 // before truncating.
16853 SDLoc DL(X);
16854 X = DAG.getNode(ISD::SRL, DL,
16855 X.getValueType(), X,
16856 DAG.getConstant(OrigXWidth-VTWidth, DL,
16857 X.getValueType()));
16858 AddToWorklist(X.getNode());
16859 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16860 AddToWorklist(X.getNode());
16861 }
16862
16863 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16864 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
16865 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16866 AddToWorklist(Cst.getNode());
16867 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
16868 AddToWorklist(X.getNode());
16869 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
16870 AddToWorklist(XorResult.getNode());
16871 SDValue XorResult64 = DAG.getNode(
16872 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
16874 SDLoc(XorResult)));
16875 AddToWorklist(XorResult64.getNode());
16876 SDValue FlipBit =
16877 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
16878 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
16879 AddToWorklist(FlipBit.getNode());
16880 SDValue FlipBits =
16881 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16882 AddToWorklist(FlipBits.getNode());
16883 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
16884 }
16885 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16886 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
16887 X, DAG.getConstant(SignBit, SDLoc(X), VT));
16888 AddToWorklist(X.getNode());
16889
16890 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
16891 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
16892 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
16893 AddToWorklist(Cst.getNode());
16894
16895 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
16896 }
16897 }
16898
16899 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
16900 if (N0.getOpcode() == ISD::BUILD_PAIR)
16901 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
16902 return CombineLD;
16903
16904 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
16905 // => int_vt (any_extend elt_vt:x)
16906 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
16907 SDValue SrcScalar = N0.getOperand(0);
16908 if (SrcScalar.getValueType().isScalarInteger())
16909 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
16910 }
16911
16912 // Remove double bitcasts from shuffles - this is often a legacy of
16913 // XformToShuffleWithZero being used to combine bitmaskings (of
16914 // float vectors bitcast to integer vectors) into shuffles.
16915 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
16916 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
16917 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
16920 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
16921
16922 // If operands are a bitcast, peek through if it casts the original VT.
16923 // If operands are a constant, just bitcast back to original VT.
16924 auto PeekThroughBitcast = [&](SDValue Op) {
16925 if (Op.getOpcode() == ISD::BITCAST &&
16926 Op.getOperand(0).getValueType() == VT)
16927 return SDValue(Op.getOperand(0));
16928 if (Op.isUndef() || isAnyConstantBuildVector(Op))
16929 return DAG.getBitcast(VT, Op);
16930 return SDValue();
16931 };
16932
16933 // FIXME: If either input vector is bitcast, try to convert the shuffle to
16934 // the result type of this bitcast. This would eliminate at least one
16935 // bitcast. See the transform in InstCombine.
16936 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
16937 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
16938 if (!(SV0 && SV1))
16939 return SDValue();
16940
16941 int MaskScale =
16943 SmallVector<int, 8> NewMask;
16944 for (int M : SVN->getMask())
16945 for (int i = 0; i != MaskScale; ++i)
16946 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
16947
16948 SDValue LegalShuffle =
16949 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
16950 if (LegalShuffle)
16951 return LegalShuffle;
16952 }
16953
16954 return SDValue();
16955}
16956
16957SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
16958 EVT VT = N->getValueType(0);
16959 return CombineConsecutiveLoads(N, VT);
16960}
16961
16962SDValue DAGCombiner::visitFREEZE(SDNode *N) {
16963 SDValue N0 = N->getOperand(0);
16964
16965 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
16966 return N0;
16967
16968 // If we have frozen and unfrozen users of N0, update so everything uses N.
16969 if (!N0.isUndef() && !N0.hasOneUse()) {
16970 SDValue FrozenN0(N, 0);
16971 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
16972 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
16973 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
16974 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
16975 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
16976 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
16977 DAG.UpdateNodeOperands(N, N0);
16978 return FrozenN0;
16979 }
16980
16981 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
16982 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
16983 // example https://reviews.llvm.org/D136529#4120959.
16984 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
16985 return SDValue();
16986
16987 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
16988 // Try to push freeze through instructions that propagate but don't produce
16989 // poison as far as possible. If an operand of freeze follows three
16990 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16991 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16992 // the freeze through to the operands that are not guaranteed non-poison.
16993 // NOTE: we will strip poison-generating flags, so ignore them here.
16994 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
16995 /*ConsiderFlags*/ false) ||
16996 N0->getNumValues() != 1 || !N0->hasOneUse())
16997 return SDValue();
16998
16999 // TOOD: we should always allow multiple operands, however this increases the
17000 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
17001 // below causing later nodes that share frozen operands to fold again and no
17002 // longer being able to confirm other operands are not poison due to recursion
17003 // depth limits on isGuaranteedNotToBeUndefOrPoison.
17004 bool AllowMultipleMaybePoisonOperands =
17005 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
17006 N0.getOpcode() == ISD::BUILD_VECTOR ||
17008 N0.getOpcode() == ISD::BUILD_PAIR ||
17011
17012 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
17013 // ones" or "constant" into something that depends on FrozenUndef. We can
17014 // instead pick undef values to keep those properties, while at the same time
17015 // folding away the freeze.
17016 // If we implement a more general solution for folding away freeze(undef) in
17017 // the future, then this special handling can be removed.
17018 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
17019 SDLoc DL(N0);
17020 EVT VT = N0.getValueType();
17022 return DAG.getAllOnesConstant(DL, VT);
17025 for (const SDValue &Op : N0->op_values())
17026 NewVecC.push_back(
17027 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
17028 return DAG.getBuildVector(VT, DL, NewVecC);
17029 }
17030 }
17031
17032 SmallSet<SDValue, 8> MaybePoisonOperands;
17033 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
17034 for (auto [OpNo, Op] : enumerate(N0->ops())) {
17035 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
17036 continue;
17037 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
17038 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
17039 if (IsNewMaybePoisonOperand)
17040 MaybePoisonOperandNumbers.push_back(OpNo);
17041 if (!HadMaybePoisonOperands)
17042 continue;
17043 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
17044 // Multiple maybe-poison ops when not allowed - bail out.
17045 return SDValue();
17046 }
17047 }
17048 // NOTE: the whole op may be not guaranteed to not be undef or poison because
17049 // it could create undef or poison due to it's poison-generating flags.
17050 // So not finding any maybe-poison operands is fine.
17051
17052 for (unsigned OpNo : MaybePoisonOperandNumbers) {
17053 // N0 can mutate during iteration, so make sure to refetch the maybe poison
17054 // operands via the operand numbers. The typical scenario is that we have
17055 // something like this
17056 // t262: i32 = freeze t181
17057 // t150: i32 = ctlz_zero_undef t262
17058 // t184: i32 = ctlz_zero_undef t181
17059 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
17060 // When freezing the t181 operand we get t262 back, and then the
17061 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
17062 // also recursively replace t184 by t150.
17063 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
17064 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
17065 if (MaybePoisonOperand.isUndef())
17066 continue;
17067 // First, freeze each offending operand.
17068 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
17069 // Then, change all other uses of unfrozen operand to use frozen operand.
17070 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
17071 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
17072 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
17073 // But, that also updated the use in the freeze we just created, thus
17074 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17075 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
17076 MaybePoisonOperand);
17077 }
17078
17079 // This node has been merged with another.
17080 if (N->getOpcode() == ISD::DELETED_NODE)
17081 return SDValue(N, 0);
17082 }
17083
17084 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17085
17086 // The whole node may have been updated, so the value we were holding
17087 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
17088 N0 = N->getOperand(0);
17089
17090 // Finally, recreate the node, it's operands were updated to use
17091 // frozen operands, so we just need to use it's "original" operands.
17093 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
17094 // leave for a future patch.
17095 for (SDValue &Op : Ops) {
17096 if (Op.isUndef())
17097 Op = DAG.getFreeze(Op);
17098 }
17099
17100 SDLoc DL(N0);
17101
17102 // Special case handling for ShuffleVectorSDNode nodes.
17103 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
17104 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
17105 SVN->getMask());
17106
17107 // NOTE: this strips poison generating flags.
17108 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
17109 // ninf, nsz, or fast.
17110 // However, contract, reassoc, afn, and arcp should be preserved,
17111 // as these fast-math flags do not introduce poison values.
17112 SDNodeFlags SrcFlags = N0->getFlags();
17113 SDNodeFlags SafeFlags;
17114 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17115 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17116 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17117 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17118 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
17119}
17120
17121// Returns true if floating point contraction is allowed on the FMUL-SDValue
17122// `N`
17124 assert(N.getOpcode() == ISD::FMUL);
17125
17126 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
17127 N->getFlags().hasAllowContract();
17128}
17129
17130/// Try to perform FMA combining on a given FADD node.
17131template <class MatchContextClass>
17132SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17133 SDValue N0 = N->getOperand(0);
17134 SDValue N1 = N->getOperand(1);
17135 EVT VT = N->getValueType(0);
17136 SDLoc SL(N);
17137 MatchContextClass matcher(DAG, TLI, N);
17138 const TargetOptions &Options = DAG.getTarget().Options;
17139
17140 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17141
17142 // Floating-point multiply-add with intermediate rounding.
17143 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17144 // FIXME: Add VP_FMAD opcode.
17145 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17146
17147 // Floating-point multiply-add without intermediate rounding.
17148 bool HasFMA =
17149 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17151
17152 // No valid opcode, do not combine.
17153 if (!HasFMAD && !HasFMA)
17154 return SDValue();
17155
17156 bool AllowFusionGlobally =
17157 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17158 // If the addition is not contractable, do not combine.
17159 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17160 return SDValue();
17161
17162 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17163 // beneficial. It does not reduce latency. It increases register pressure. It
17164 // replaces an fadd with an fma which is a more complex instruction, so is
17165 // likely to have a larger encoding, use more functional units, etc.
17166 if (N0 == N1)
17167 return SDValue();
17168
17169 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17170 return SDValue();
17171
17172 // Always prefer FMAD to FMA for precision.
17173 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17175
17176 auto isFusedOp = [&](SDValue N) {
17177 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17178 };
17179
17180 // Is the node an FMUL and contractable either due to global flags or
17181 // SDNodeFlags.
17182 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17183 if (!matcher.match(N, ISD::FMUL))
17184 return false;
17185 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17186 };
17187 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17188 // prefer to fold the multiply with fewer uses.
17190 if (N0->use_size() > N1->use_size())
17191 std::swap(N0, N1);
17192 }
17193
17194 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17195 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17196 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17197 N0.getOperand(1), N1);
17198 }
17199
17200 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17201 // Note: Commutes FADD operands.
17202 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17203 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17204 N1.getOperand(1), N0);
17205 }
17206
17207 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17208 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17209 // This also works with nested fma instructions:
17210 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17211 // fma A, B, (fma C, D, fma (E, F, G))
17212 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17213 // fma A, B, (fma C, D, fma (E, F, G)).
17214 // This requires reassociation because it changes the order of operations.
17215 bool CanReassociate = N->getFlags().hasAllowReassociation();
17216 if (CanReassociate) {
17217 SDValue FMA, E;
17218 if (isFusedOp(N0) && N0.hasOneUse()) {
17219 FMA = N0;
17220 E = N1;
17221 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17222 FMA = N1;
17223 E = N0;
17224 }
17225
17226 SDValue TmpFMA = FMA;
17227 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17228 SDValue FMul = TmpFMA->getOperand(2);
17229 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17230 SDValue C = FMul.getOperand(0);
17231 SDValue D = FMul.getOperand(1);
17232 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17234 // Replacing the inner FMul could cause the outer FMA to be simplified
17235 // away.
17236 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17237 }
17238
17239 TmpFMA = TmpFMA->getOperand(2);
17240 }
17241 }
17242
17243 // Look through FP_EXTEND nodes to do more combining.
17244
17245 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17246 if (matcher.match(N0, ISD::FP_EXTEND)) {
17247 SDValue N00 = N0.getOperand(0);
17248 if (isContractableFMUL(N00) &&
17249 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17250 N00.getValueType())) {
17251 return matcher.getNode(
17252 PreferredFusedOpcode, SL, VT,
17253 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17254 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17255 }
17256 }
17257
17258 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17259 // Note: Commutes FADD operands.
17260 if (matcher.match(N1, ISD::FP_EXTEND)) {
17261 SDValue N10 = N1.getOperand(0);
17262 if (isContractableFMUL(N10) &&
17263 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17264 N10.getValueType())) {
17265 return matcher.getNode(
17266 PreferredFusedOpcode, SL, VT,
17267 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17268 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17269 }
17270 }
17271
17272 // More folding opportunities when target permits.
17273 if (Aggressive) {
17274 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17275 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17276 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17277 SDValue Z) {
17278 return matcher.getNode(
17279 PreferredFusedOpcode, SL, VT, X, Y,
17280 matcher.getNode(PreferredFusedOpcode, SL, VT,
17281 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17282 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17283 };
17284 if (isFusedOp(N0)) {
17285 SDValue N02 = N0.getOperand(2);
17286 if (matcher.match(N02, ISD::FP_EXTEND)) {
17287 SDValue N020 = N02.getOperand(0);
17288 if (isContractableFMUL(N020) &&
17289 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17290 N020.getValueType())) {
17291 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17292 N020.getOperand(0), N020.getOperand(1),
17293 N1);
17294 }
17295 }
17296 }
17297
17298 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17299 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17300 // FIXME: This turns two single-precision and one double-precision
17301 // operation into two double-precision operations, which might not be
17302 // interesting for all targets, especially GPUs.
17303 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17304 SDValue Z) {
17305 return matcher.getNode(
17306 PreferredFusedOpcode, SL, VT,
17307 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17308 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17309 matcher.getNode(PreferredFusedOpcode, SL, VT,
17310 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17311 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17312 };
17313 if (N0.getOpcode() == ISD::FP_EXTEND) {
17314 SDValue N00 = N0.getOperand(0);
17315 if (isFusedOp(N00)) {
17316 SDValue N002 = N00.getOperand(2);
17317 if (isContractableFMUL(N002) &&
17318 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17319 N00.getValueType())) {
17320 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17321 N002.getOperand(0), N002.getOperand(1),
17322 N1);
17323 }
17324 }
17325 }
17326
17327 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17328 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17329 if (isFusedOp(N1)) {
17330 SDValue N12 = N1.getOperand(2);
17331 if (N12.getOpcode() == ISD::FP_EXTEND) {
17332 SDValue N120 = N12.getOperand(0);
17333 if (isContractableFMUL(N120) &&
17334 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17335 N120.getValueType())) {
17336 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17337 N120.getOperand(0), N120.getOperand(1),
17338 N0);
17339 }
17340 }
17341 }
17342
17343 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17344 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17345 // FIXME: This turns two single-precision and one double-precision
17346 // operation into two double-precision operations, which might not be
17347 // interesting for all targets, especially GPUs.
17348 if (N1.getOpcode() == ISD::FP_EXTEND) {
17349 SDValue N10 = N1.getOperand(0);
17350 if (isFusedOp(N10)) {
17351 SDValue N102 = N10.getOperand(2);
17352 if (isContractableFMUL(N102) &&
17353 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17354 N10.getValueType())) {
17355 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17356 N102.getOperand(0), N102.getOperand(1),
17357 N0);
17358 }
17359 }
17360 }
17361 }
17362
17363 return SDValue();
17364}
17365
17366/// Try to perform FMA combining on a given FSUB node.
17367template <class MatchContextClass>
17368SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17369 SDValue N0 = N->getOperand(0);
17370 SDValue N1 = N->getOperand(1);
17371 EVT VT = N->getValueType(0);
17372 SDLoc SL(N);
17373 MatchContextClass matcher(DAG, TLI, N);
17374 const TargetOptions &Options = DAG.getTarget().Options;
17375
17376 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17377
17378 // Floating-point multiply-add with intermediate rounding.
17379 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17380 // FIXME: Add VP_FMAD opcode.
17381 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17382
17383 // Floating-point multiply-add without intermediate rounding.
17384 bool HasFMA =
17385 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17387
17388 // No valid opcode, do not combine.
17389 if (!HasFMAD && !HasFMA)
17390 return SDValue();
17391
17392 const SDNodeFlags Flags = N->getFlags();
17393 bool AllowFusionGlobally =
17394 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17395
17396 // If the subtraction is not contractable, do not combine.
17397 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17398 return SDValue();
17399
17400 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17401 return SDValue();
17402
17403 // Always prefer FMAD to FMA for precision.
17404 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17406 bool NoSignedZero = Flags.hasNoSignedZeros();
17407
17408 // Is the node an FMUL and contractable either due to global flags or
17409 // SDNodeFlags.
17410 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17411 if (!matcher.match(N, ISD::FMUL))
17412 return false;
17413 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17414 };
17415
17416 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17417 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17418 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17419 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17420 XY.getOperand(1),
17421 matcher.getNode(ISD::FNEG, SL, VT, Z));
17422 }
17423 return SDValue();
17424 };
17425
17426 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17427 // Note: Commutes FSUB operands.
17428 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17429 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17430 return matcher.getNode(
17431 PreferredFusedOpcode, SL, VT,
17432 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17433 YZ.getOperand(1), X);
17434 }
17435 return SDValue();
17436 };
17437
17438 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17439 // prefer to fold the multiply with fewer uses.
17440 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17441 (N0->use_size() > N1->use_size())) {
17442 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17443 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17444 return V;
17445 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17446 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17447 return V;
17448 } else {
17449 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17450 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17451 return V;
17452 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17453 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17454 return V;
17455 }
17456
17457 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17458 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17459 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17460 SDValue N00 = N0.getOperand(0).getOperand(0);
17461 SDValue N01 = N0.getOperand(0).getOperand(1);
17462 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17463 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17464 matcher.getNode(ISD::FNEG, SL, VT, N1));
17465 }
17466
17467 // Look through FP_EXTEND nodes to do more combining.
17468
17469 // fold (fsub (fpext (fmul x, y)), z)
17470 // -> (fma (fpext x), (fpext y), (fneg z))
17471 if (matcher.match(N0, ISD::FP_EXTEND)) {
17472 SDValue N00 = N0.getOperand(0);
17473 if (isContractableFMUL(N00) &&
17474 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17475 N00.getValueType())) {
17476 return matcher.getNode(
17477 PreferredFusedOpcode, SL, VT,
17478 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17479 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17480 matcher.getNode(ISD::FNEG, SL, VT, N1));
17481 }
17482 }
17483
17484 // fold (fsub x, (fpext (fmul y, z)))
17485 // -> (fma (fneg (fpext y)), (fpext z), x)
17486 // Note: Commutes FSUB operands.
17487 if (matcher.match(N1, ISD::FP_EXTEND)) {
17488 SDValue N10 = N1.getOperand(0);
17489 if (isContractableFMUL(N10) &&
17490 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17491 N10.getValueType())) {
17492 return matcher.getNode(
17493 PreferredFusedOpcode, SL, VT,
17494 matcher.getNode(
17495 ISD::FNEG, SL, VT,
17496 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17497 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17498 }
17499 }
17500
17501 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17502 // -> (fneg (fma (fpext x), (fpext y), z))
17503 // Note: This could be removed with appropriate canonicalization of the
17504 // input expression into (fneg (fadd (fpext (fmul, x, y)), z)). However, the
17505 // command line flag -fp-contract=fast and fast-math flag contract prevent
17506 // from implementing the canonicalization in visitFSUB.
17507 if (matcher.match(N0, ISD::FP_EXTEND)) {
17508 SDValue N00 = N0.getOperand(0);
17509 if (matcher.match(N00, ISD::FNEG)) {
17510 SDValue N000 = N00.getOperand(0);
17511 if (isContractableFMUL(N000) &&
17512 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17513 N00.getValueType())) {
17514 return matcher.getNode(
17515 ISD::FNEG, SL, VT,
17516 matcher.getNode(
17517 PreferredFusedOpcode, SL, VT,
17518 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17519 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17520 N1));
17521 }
17522 }
17523 }
17524
17525 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17526 // -> (fneg (fma (fpext x)), (fpext y), z)
17527 // Note: This could be removed with appropriate canonicalization of the
17528 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17529 // command line flag -fp-contract=fast and fast-math flag contract prevent
17530 // from implementing the canonicalization in visitFSUB.
17531 if (matcher.match(N0, ISD::FNEG)) {
17532 SDValue N00 = N0.getOperand(0);
17533 if (matcher.match(N00, ISD::FP_EXTEND)) {
17534 SDValue N000 = N00.getOperand(0);
17535 if (isContractableFMUL(N000) &&
17536 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17537 N000.getValueType())) {
17538 return matcher.getNode(
17539 ISD::FNEG, SL, VT,
17540 matcher.getNode(
17541 PreferredFusedOpcode, SL, VT,
17542 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17543 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17544 N1));
17545 }
17546 }
17547 }
17548
17549 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17550 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17551 };
17552
17553 auto isFusedOp = [&](SDValue N) {
17554 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17555 };
17556
17557 // More folding opportunities when target permits.
17558 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17559 bool CanFuse = N->getFlags().hasAllowContract();
17560 // fold (fsub (fma x, y, (fmul u, v)), z)
17561 // -> (fma x, y (fma u, v, (fneg z)))
17562 if (CanFuse && isFusedOp(N0) &&
17563 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17564 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17565 return matcher.getNode(
17566 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17567 matcher.getNode(PreferredFusedOpcode, SL, VT,
17568 N0.getOperand(2).getOperand(0),
17569 N0.getOperand(2).getOperand(1),
17570 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17571 }
17572
17573 // fold (fsub x, (fma y, z, (fmul u, v)))
17574 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17575 if (CanFuse && isFusedOp(N1) &&
17576 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17577 N1->hasOneUse() && NoSignedZero) {
17578 SDValue N20 = N1.getOperand(2).getOperand(0);
17579 SDValue N21 = N1.getOperand(2).getOperand(1);
17580 return matcher.getNode(
17581 PreferredFusedOpcode, SL, VT,
17582 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17583 N1.getOperand(1),
17584 matcher.getNode(PreferredFusedOpcode, SL, VT,
17585 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17586 }
17587
17588 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17589 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17590 if (isFusedOp(N0) && N0->hasOneUse()) {
17591 SDValue N02 = N0.getOperand(2);
17592 if (matcher.match(N02, ISD::FP_EXTEND)) {
17593 SDValue N020 = N02.getOperand(0);
17594 if (isContractableAndReassociableFMUL(N020) &&
17595 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17596 N020.getValueType())) {
17597 return matcher.getNode(
17598 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17599 matcher.getNode(
17600 PreferredFusedOpcode, SL, VT,
17601 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17602 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17603 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17604 }
17605 }
17606 }
17607
17608 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17609 // -> (fma (fpext x), (fpext y),
17610 // (fma (fpext u), (fpext v), (fneg z)))
17611 // FIXME: This turns two single-precision and one double-precision
17612 // operation into two double-precision operations, which might not be
17613 // interesting for all targets, especially GPUs.
17614 if (matcher.match(N0, ISD::FP_EXTEND)) {
17615 SDValue N00 = N0.getOperand(0);
17616 if (isFusedOp(N00)) {
17617 SDValue N002 = N00.getOperand(2);
17618 if (isContractableAndReassociableFMUL(N002) &&
17619 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17620 N00.getValueType())) {
17621 return matcher.getNode(
17622 PreferredFusedOpcode, SL, VT,
17623 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17624 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17625 matcher.getNode(
17626 PreferredFusedOpcode, SL, VT,
17627 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17628 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17629 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17630 }
17631 }
17632 }
17633
17634 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17635 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17636 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17637 N1->hasOneUse()) {
17638 SDValue N120 = N1.getOperand(2).getOperand(0);
17639 if (isContractableAndReassociableFMUL(N120) &&
17640 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17641 N120.getValueType())) {
17642 SDValue N1200 = N120.getOperand(0);
17643 SDValue N1201 = N120.getOperand(1);
17644 return matcher.getNode(
17645 PreferredFusedOpcode, SL, VT,
17646 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17647 N1.getOperand(1),
17648 matcher.getNode(
17649 PreferredFusedOpcode, SL, VT,
17650 matcher.getNode(ISD::FNEG, SL, VT,
17651 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17652 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17653 }
17654 }
17655
17656 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17657 // -> (fma (fneg (fpext y)), (fpext z),
17658 // (fma (fneg (fpext u)), (fpext v), x))
17659 // FIXME: This turns two single-precision and one double-precision
17660 // operation into two double-precision operations, which might not be
17661 // interesting for all targets, especially GPUs.
17662 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17663 SDValue CvtSrc = N1.getOperand(0);
17664 SDValue N100 = CvtSrc.getOperand(0);
17665 SDValue N101 = CvtSrc.getOperand(1);
17666 SDValue N102 = CvtSrc.getOperand(2);
17667 if (isContractableAndReassociableFMUL(N102) &&
17668 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17669 CvtSrc.getValueType())) {
17670 SDValue N1020 = N102.getOperand(0);
17671 SDValue N1021 = N102.getOperand(1);
17672 return matcher.getNode(
17673 PreferredFusedOpcode, SL, VT,
17674 matcher.getNode(ISD::FNEG, SL, VT,
17675 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17676 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17677 matcher.getNode(
17678 PreferredFusedOpcode, SL, VT,
17679 matcher.getNode(ISD::FNEG, SL, VT,
17680 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17681 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17682 }
17683 }
17684 }
17685
17686 return SDValue();
17687}
17688
17689/// Try to perform FMA combining on a given FMUL node based on the distributive
17690/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17691/// subtraction instead of addition).
17692SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17693 SDValue N0 = N->getOperand(0);
17694 SDValue N1 = N->getOperand(1);
17695 EVT VT = N->getValueType(0);
17696 SDLoc SL(N);
17697
17698 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17699
17700 const TargetOptions &Options = DAG.getTarget().Options;
17701
17702 // The transforms below are incorrect when x == 0 and y == inf, because the
17703 // intermediate multiplication produces a nan.
17704 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17705 if (!FAdd->getFlags().hasNoInfs())
17706 return SDValue();
17707
17708 // Floating-point multiply-add without intermediate rounding.
17709 bool HasFMA =
17711 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17713
17714 // Floating-point multiply-add with intermediate rounding. This can result
17715 // in a less precise result due to the changed rounding order.
17716 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17717
17718 // No valid opcode, do not combine.
17719 if (!HasFMAD && !HasFMA)
17720 return SDValue();
17721
17722 // Always prefer FMAD to FMA for precision.
17723 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17725
17726 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17727 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17728 auto FuseFADD = [&](SDValue X, SDValue Y) {
17729 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17730 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17731 if (C->isExactlyValue(+1.0))
17732 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17733 Y);
17734 if (C->isExactlyValue(-1.0))
17735 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17736 DAG.getNode(ISD::FNEG, SL, VT, Y));
17737 }
17738 }
17739 return SDValue();
17740 };
17741
17742 if (SDValue FMA = FuseFADD(N0, N1))
17743 return FMA;
17744 if (SDValue FMA = FuseFADD(N1, N0))
17745 return FMA;
17746
17747 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17748 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17749 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17750 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17751 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17752 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17753 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17754 if (C0->isExactlyValue(+1.0))
17755 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17756 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17757 Y);
17758 if (C0->isExactlyValue(-1.0))
17759 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17760 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17761 DAG.getNode(ISD::FNEG, SL, VT, Y));
17762 }
17763 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17764 if (C1->isExactlyValue(+1.0))
17765 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17766 DAG.getNode(ISD::FNEG, SL, VT, Y));
17767 if (C1->isExactlyValue(-1.0))
17768 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17769 Y);
17770 }
17771 }
17772 return SDValue();
17773 };
17774
17775 if (SDValue FMA = FuseFSUB(N0, N1))
17776 return FMA;
17777 if (SDValue FMA = FuseFSUB(N1, N0))
17778 return FMA;
17779
17780 return SDValue();
17781}
17782
17783SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17784 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17785
17786 // FADD -> FMA combines:
17787 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17788 if (Fused.getOpcode() != ISD::DELETED_NODE)
17789 AddToWorklist(Fused.getNode());
17790 return Fused;
17791 }
17792 return SDValue();
17793}
17794
17795SDValue DAGCombiner::visitFADD(SDNode *N) {
17796 SDValue N0 = N->getOperand(0);
17797 SDValue N1 = N->getOperand(1);
17798 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17799 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17800 EVT VT = N->getValueType(0);
17801 SDLoc DL(N);
17802 SDNodeFlags Flags = N->getFlags();
17803 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17804
17805 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17806 return R;
17807
17808 // fold (fadd c1, c2) -> c1 + c2
17809 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17810 return C;
17811
17812 // canonicalize constant to RHS
17813 if (N0CFP && !N1CFP)
17814 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17815
17816 // fold vector ops
17817 if (VT.isVector())
17818 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17819 return FoldedVOp;
17820
17821 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17822 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17823 if (N1C && N1C->isZero())
17824 if (N1C->isNegative() || Flags.hasNoSignedZeros())
17825 return N0;
17826
17827 if (SDValue NewSel = foldBinOpIntoSelect(N))
17828 return NewSel;
17829
17830 // fold (fadd A, (fneg B)) -> (fsub A, B)
17831 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17832 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17833 N1, DAG, LegalOperations, ForCodeSize))
17834 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17835
17836 // fold (fadd (fneg A), B) -> (fsub B, A)
17837 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17838 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17839 N0, DAG, LegalOperations, ForCodeSize))
17840 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17841
17842 auto isFMulNegTwo = [](SDValue FMul) {
17843 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17844 return false;
17845 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17846 return C && C->isExactlyValue(-2.0);
17847 };
17848
17849 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17850 if (isFMulNegTwo(N0)) {
17851 SDValue B = N0.getOperand(0);
17852 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17853 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17854 }
17855 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17856 if (isFMulNegTwo(N1)) {
17857 SDValue B = N1.getOperand(0);
17858 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17859 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17860 }
17861
17862 // No FP constant should be created after legalization as Instruction
17863 // Selection pass has a hard time dealing with FP constants.
17864 bool AllowNewConst = (Level < AfterLegalizeDAG);
17865
17866 // If nnan is enabled, fold lots of things.
17867 if (Flags.hasNoNaNs() && AllowNewConst) {
17868 // If allowed, fold (fadd (fneg x), x) -> 0.0
17869 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
17870 return DAG.getConstantFP(0.0, DL, VT);
17871
17872 // If allowed, fold (fadd x, (fneg x)) -> 0.0
17873 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
17874 return DAG.getConstantFP(0.0, DL, VT);
17875 }
17876
17877 // If reassoc and nsz, fold lots of things.
17878 // TODO: break out portions of the transformations below for which Unsafe is
17879 // considered and which do not require both nsz and reassoc
17880 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
17881 AllowNewConst) {
17882 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
17883 if (N1CFP && N0.getOpcode() == ISD::FADD &&
17885 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
17886 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
17887 }
17888
17889 // We can fold chains of FADD's of the same value into multiplications.
17890 // This transform is not safe in general because we are reducing the number
17891 // of rounding steps.
17892 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
17893 if (N0.getOpcode() == ISD::FMUL) {
17894 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17895 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
17896
17897 // (fadd (fmul x, c), x) -> (fmul x, c+1)
17898 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
17899 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17900 DAG.getConstantFP(1.0, DL, VT));
17901 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
17902 }
17903
17904 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
17905 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
17906 N1.getOperand(0) == N1.getOperand(1) &&
17907 N0.getOperand(0) == N1.getOperand(0)) {
17908 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
17909 DAG.getConstantFP(2.0, DL, VT));
17910 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
17911 }
17912 }
17913
17914 if (N1.getOpcode() == ISD::FMUL) {
17915 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17916 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
17917
17918 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
17919 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
17920 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17921 DAG.getConstantFP(1.0, DL, VT));
17922 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
17923 }
17924
17925 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
17926 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
17927 N0.getOperand(0) == N0.getOperand(1) &&
17928 N1.getOperand(0) == N0.getOperand(0)) {
17929 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
17930 DAG.getConstantFP(2.0, DL, VT));
17931 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
17932 }
17933 }
17934
17935 if (N0.getOpcode() == ISD::FADD) {
17936 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
17937 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
17938 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
17939 (N0.getOperand(0) == N1)) {
17940 return DAG.getNode(ISD::FMUL, DL, VT, N1,
17941 DAG.getConstantFP(3.0, DL, VT));
17942 }
17943 }
17944
17945 if (N1.getOpcode() == ISD::FADD) {
17946 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
17947 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
17948 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
17949 N1.getOperand(0) == N0) {
17950 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17951 DAG.getConstantFP(3.0, DL, VT));
17952 }
17953 }
17954
17955 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
17956 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
17957 N0.getOperand(0) == N0.getOperand(1) &&
17958 N1.getOperand(0) == N1.getOperand(1) &&
17959 N0.getOperand(0) == N1.getOperand(0)) {
17960 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
17961 DAG.getConstantFP(4.0, DL, VT));
17962 }
17963 }
17964 } // reassoc && nsz && AllowNewConst
17965
17966 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
17967 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
17968 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
17969 VT, N0, N1, Flags))
17970 return SD;
17971 }
17972
17973 // FADD -> FMA combines:
17974 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
17975 if (Fused.getOpcode() != ISD::DELETED_NODE)
17976 AddToWorklist(Fused.getNode());
17977 return Fused;
17978 }
17979 return SDValue();
17980}
17981
17982SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
17983 SDValue Chain = N->getOperand(0);
17984 SDValue N0 = N->getOperand(1);
17985 SDValue N1 = N->getOperand(2);
17986 EVT VT = N->getValueType(0);
17987 EVT ChainVT = N->getValueType(1);
17988 SDLoc DL(N);
17989 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17990
17991 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
17992 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
17993 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17994 N1, DAG, LegalOperations, ForCodeSize)) {
17995 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
17996 {Chain, N0, NegN1});
17997 }
17998
17999 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
18000 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18001 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18002 N0, DAG, LegalOperations, ForCodeSize)) {
18003 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18004 {Chain, N1, NegN0});
18005 }
18006 return SDValue();
18007}
18008
18009SDValue DAGCombiner::visitFSUB(SDNode *N) {
18010 SDValue N0 = N->getOperand(0);
18011 SDValue N1 = N->getOperand(1);
18012 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
18013 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18014 EVT VT = N->getValueType(0);
18015 SDLoc DL(N);
18016 const SDNodeFlags Flags = N->getFlags();
18017 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18018
18019 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18020 return R;
18021
18022 // fold (fsub c1, c2) -> c1-c2
18023 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
18024 return C;
18025
18026 // fold vector ops
18027 if (VT.isVector())
18028 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18029 return FoldedVOp;
18030
18031 if (SDValue NewSel = foldBinOpIntoSelect(N))
18032 return NewSel;
18033
18034 // (fsub A, 0) -> A
18035 if (N1CFP && N1CFP->isZero()) {
18036 if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
18037 return N0;
18038 }
18039 }
18040
18041 if (N0 == N1) {
18042 // (fsub x, x) -> 0.0
18043 if (Flags.hasNoNaNs())
18044 return DAG.getConstantFP(0.0f, DL, VT);
18045 }
18046
18047 // (fsub -0.0, N1) -> -N1
18048 if (N0CFP && N0CFP->isZero()) {
18049 if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
18050 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
18051 // flushed to zero, unless all users treat denorms as zero (DAZ).
18052 // FIXME: This transform will change the sign of a NaN and the behavior
18053 // of a signaling NaN. It is only valid when a NoNaN flag is present.
18054 DenormalMode DenormMode = DAG.getDenormalMode(VT);
18055 if (DenormMode == DenormalMode::getIEEE()) {
18056 if (SDValue NegN1 =
18057 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18058 return NegN1;
18059 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18060 return DAG.getNode(ISD::FNEG, DL, VT, N1);
18061 }
18062 }
18063 }
18064
18065 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18066 N1.getOpcode() == ISD::FADD) {
18067 // X - (X + Y) -> -Y
18068 if (N0 == N1->getOperand(0))
18069 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
18070 // X - (Y + X) -> -Y
18071 if (N0 == N1->getOperand(1))
18072 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
18073 }
18074
18075 // fold (fsub A, (fneg B)) -> (fadd A, B)
18076 if (SDValue NegN1 =
18077 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18078 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
18079
18080 // FSUB -> FMA combines:
18081 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18082 AddToWorklist(Fused.getNode());
18083 return Fused;
18084 }
18085
18086 return SDValue();
18087}
18088
18089// Transform IEEE Floats:
18090// (fmul C, (uitofp Pow2))
18091// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
18092// (fdiv C, (uitofp Pow2))
18093// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
18094//
18095// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
18096// there is no need for more than an add/sub.
18097//
18098// This is valid under the following circumstances:
18099// 1) We are dealing with IEEE floats
18100// 2) C is normal
18101// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18102// TODO: Much of this could also be used for generating `ldexp` on targets the
18103// prefer it.
18104SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18105 EVT VT = N->getValueType(0);
18107 return SDValue();
18108
18109 SDValue ConstOp, Pow2Op;
18110
18111 std::optional<int> Mantissa;
18112 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18113 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
18114 return false;
18115
18116 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
18117 Pow2Op = N->getOperand(1 - ConstOpIdx);
18118 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18119 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
18120 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
18121 return false;
18122
18123 Pow2Op = Pow2Op.getOperand(0);
18124
18125 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18126 // TODO: We could use knownbits to make this bound more precise.
18127 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18128
18129 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18130 if (CFP == nullptr)
18131 return false;
18132
18133 const APFloat &APF = CFP->getValueAPF();
18134
18135 // Make sure we have normal constant.
18136 if (!APF.isNormal())
18137 return false;
18138
18139 // Make sure the floats exponent is within the bounds that this transform
18140 // produces bitwise equals value.
18141 int CurExp = ilogb(APF);
18142 // FMul by pow2 will only increase exponent.
18143 int MinExp =
18144 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18145 // FDiv by pow2 will only decrease exponent.
18146 int MaxExp =
18147 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18148 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18150 return false;
18151
18152 // Finally make sure we actually know the mantissa for the float type.
18153 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18154 if (!Mantissa)
18155 Mantissa = ThisMantissa;
18156
18157 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18158 };
18159
18160 // TODO: We may be able to include undefs.
18161 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18162 };
18163
18164 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18165 return SDValue();
18166
18167 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18168 return SDValue();
18169
18170 // Get log2 after all other checks have taken place. This is because
18171 // BuildLogBase2 may create a new node.
18172 SDLoc DL(N);
18173 // Get Log2 type with same bitwidth as the float type (VT).
18174 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18175 if (VT.isVector())
18176 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18178
18179 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18180 /*InexpensiveOnly*/ true, NewIntVT);
18181 if (!Log2)
18182 return SDValue();
18183
18184 // Perform actual transform.
18185 SDValue MantissaShiftCnt =
18186 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18187 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18188 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18189 // cast. We could implement that by handle here to handle the casts.
18190 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18191 SDValue ResAsInt =
18192 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18193 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18194 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18195 return ResAsFP;
18196}
18197
18198SDValue DAGCombiner::visitFMUL(SDNode *N) {
18199 SDValue N0 = N->getOperand(0);
18200 SDValue N1 = N->getOperand(1);
18201 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18202 EVT VT = N->getValueType(0);
18203 SDLoc DL(N);
18204 const SDNodeFlags Flags = N->getFlags();
18205 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18206
18207 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18208 return R;
18209
18210 // fold (fmul c1, c2) -> c1*c2
18211 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18212 return C;
18213
18214 // canonicalize constant to RHS
18217 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18218
18219 // fold vector ops
18220 if (VT.isVector())
18221 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18222 return FoldedVOp;
18223
18224 if (SDValue NewSel = foldBinOpIntoSelect(N))
18225 return NewSel;
18226
18227 if (Flags.hasAllowReassociation()) {
18228 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18230 N0.getOpcode() == ISD::FMUL) {
18231 SDValue N00 = N0.getOperand(0);
18232 SDValue N01 = N0.getOperand(1);
18233 // Avoid an infinite loop by making sure that N00 is not a constant
18234 // (the inner multiply has not been constant folded yet).
18237 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18238 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18239 }
18240 }
18241
18242 // Match a special-case: we convert X * 2.0 into fadd.
18243 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18244 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18245 N0.getOperand(0) == N0.getOperand(1)) {
18246 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18247 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18248 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18249 }
18250
18251 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18252 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18253 VT, N0, N1, Flags))
18254 return SD;
18255 }
18256
18257 // fold (fmul X, 2.0) -> (fadd X, X)
18258 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18259 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18260
18261 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18262 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18263 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18264 return DAG.getNode(ISD::FSUB, DL, VT,
18265 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18266 }
18267 }
18268
18269 // -N0 * -N1 --> N0 * N1
18274 SDValue NegN0 =
18275 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18276 if (NegN0) {
18277 HandleSDNode NegN0Handle(NegN0);
18278 SDValue NegN1 =
18279 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18280 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18282 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18283 }
18284
18285 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18286 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18287 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18288 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18289 TLI.isOperationLegal(ISD::FABS, VT)) {
18290 SDValue Select = N0, X = N1;
18291 if (Select.getOpcode() != ISD::SELECT)
18292 std::swap(Select, X);
18293
18294 SDValue Cond = Select.getOperand(0);
18295 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18296 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18297
18298 if (TrueOpnd && FalseOpnd &&
18299 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18300 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18301 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18302 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18303 switch (CC) {
18304 default: break;
18305 case ISD::SETOLT:
18306 case ISD::SETULT:
18307 case ISD::SETOLE:
18308 case ISD::SETULE:
18309 case ISD::SETLT:
18310 case ISD::SETLE:
18311 std::swap(TrueOpnd, FalseOpnd);
18312 [[fallthrough]];
18313 case ISD::SETOGT:
18314 case ISD::SETUGT:
18315 case ISD::SETOGE:
18316 case ISD::SETUGE:
18317 case ISD::SETGT:
18318 case ISD::SETGE:
18319 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18320 TLI.isOperationLegal(ISD::FNEG, VT))
18321 return DAG.getNode(ISD::FNEG, DL, VT,
18322 DAG.getNode(ISD::FABS, DL, VT, X));
18323 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18324 return DAG.getNode(ISD::FABS, DL, VT, X);
18325
18326 break;
18327 }
18328 }
18329 }
18330
18331 // FMUL -> FMA combines:
18332 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18333 AddToWorklist(Fused.getNode());
18334 return Fused;
18335 }
18336
18337 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18338 // able to run.
18339 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18340 return R;
18341
18342 return SDValue();
18343}
18344
18345template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18346 SDValue N0 = N->getOperand(0);
18347 SDValue N1 = N->getOperand(1);
18348 SDValue N2 = N->getOperand(2);
18349 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18350 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18351 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18352 EVT VT = N->getValueType(0);
18353 SDLoc DL(N);
18354 // FMA nodes have flags that propagate to the created nodes.
18355 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18356 MatchContextClass matcher(DAG, TLI, N);
18357
18358 // Constant fold FMA.
18359 if (SDValue C =
18360 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18361 return C;
18362
18363 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18368 SDValue NegN0 =
18369 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18370 if (NegN0) {
18371 HandleSDNode NegN0Handle(NegN0);
18372 SDValue NegN1 =
18373 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18374 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18376 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18377 }
18378
18379 if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
18380 if (N->getFlags().hasNoSignedZeros() ||
18381 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18382 if (N0CFP && N0CFP->isZero())
18383 return N2;
18384 if (N1CFP && N1CFP->isZero())
18385 return N2;
18386 }
18387 }
18388
18389 // FIXME: Support splat of constant.
18390 if (N0CFP && N0CFP->isExactlyValue(1.0))
18391 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18392 if (N1CFP && N1CFP->isExactlyValue(1.0))
18393 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18394
18395 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18398 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18399
18400 bool CanReassociate = N->getFlags().hasAllowReassociation();
18401 if (CanReassociate) {
18402 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18403 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18406 return matcher.getNode(
18407 ISD::FMUL, DL, VT, N0,
18408 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18409 }
18410
18411 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18412 if (matcher.match(N0, ISD::FMUL) &&
18415 return matcher.getNode(
18416 ISD::FMA, DL, VT, N0.getOperand(0),
18417 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18418 }
18419 }
18420
18421 // (fma x, -1, y) -> (fadd (fneg x), y)
18422 // FIXME: Support splat of constant.
18423 if (N1CFP) {
18424 if (N1CFP->isExactlyValue(1.0))
18425 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18426
18427 if (N1CFP->isExactlyValue(-1.0) &&
18428 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18429 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18430 AddToWorklist(RHSNeg.getNode());
18431 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18432 }
18433
18434 // fma (fneg x), K, y -> fma x -K, y
18435 if (matcher.match(N0, ISD::FNEG) &&
18437 (N1.hasOneUse() &&
18438 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18439 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18440 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18441 }
18442 }
18443
18444 // FIXME: Support splat of constant.
18445 if (CanReassociate) {
18446 // (fma x, c, x) -> (fmul x, (c+1))
18447 if (N1CFP && N0 == N2) {
18448 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18449 matcher.getNode(ISD::FADD, DL, VT, N1,
18450 DAG.getConstantFP(1.0, DL, VT)));
18451 }
18452
18453 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18454 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18455 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18456 matcher.getNode(ISD::FADD, DL, VT, N1,
18457 DAG.getConstantFP(-1.0, DL, VT)));
18458 }
18459 }
18460
18461 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18462 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18463 if (!TLI.isFNegFree(VT))
18465 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18466 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18467 return SDValue();
18468}
18469
18470SDValue DAGCombiner::visitFMAD(SDNode *N) {
18471 SDValue N0 = N->getOperand(0);
18472 SDValue N1 = N->getOperand(1);
18473 SDValue N2 = N->getOperand(2);
18474 EVT VT = N->getValueType(0);
18475 SDLoc DL(N);
18476
18477 // Constant fold FMAD.
18478 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18479 return C;
18480
18481 return SDValue();
18482}
18483
18484SDValue DAGCombiner::visitFMULADD(SDNode *N) {
18485 SDValue N0 = N->getOperand(0);
18486 SDValue N1 = N->getOperand(1);
18487 SDValue N2 = N->getOperand(2);
18488 EVT VT = N->getValueType(0);
18489 SDLoc DL(N);
18490
18491 // Constant fold FMULADD.
18492 if (SDValue C =
18493 DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
18494 return C;
18495
18496 return SDValue();
18497}
18498
18499// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18500// reciprocal.
18501// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18502// Notice that this is not always beneficial. One reason is different targets
18503// may have different costs for FDIV and FMUL, so sometimes the cost of two
18504// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18505// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18506SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18507 // TODO: Limit this transform based on optsize/minsize - it always creates at
18508 // least 1 extra instruction. But the perf win may be substantial enough
18509 // that only minsize should restrict this.
18510 const SDNodeFlags Flags = N->getFlags();
18511 if (LegalDAG || !Flags.hasAllowReciprocal())
18512 return SDValue();
18513
18514 // Skip if current node is a reciprocal/fneg-reciprocal.
18515 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18516 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18517 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18518 return SDValue();
18519
18520 // Exit early if the target does not want this transform or if there can't
18521 // possibly be enough uses of the divisor to make the transform worthwhile.
18522 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18523
18524 // For splat vectors, scale the number of uses by the splat factor. If we can
18525 // convert the division into a scalar op, that will likely be much faster.
18526 unsigned NumElts = 1;
18527 EVT VT = N->getValueType(0);
18528 if (VT.isVector() && DAG.isSplatValue(N1))
18529 NumElts = VT.getVectorMinNumElements();
18530
18531 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18532 return SDValue();
18533
18534 // Find all FDIV users of the same divisor.
18535 // Use a set because duplicates may be present in the user list.
18536 SetVector<SDNode *> Users;
18537 for (auto *U : N1->users()) {
18538 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18539 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18540 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18541 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18542 U->getFlags().hasAllowReassociation() &&
18543 U->getFlags().hasNoSignedZeros())
18544 continue;
18545
18546 // This division is eligible for optimization only if global unsafe math
18547 // is enabled or if this division allows reciprocal formation.
18548 if (U->getFlags().hasAllowReciprocal())
18549 Users.insert(U);
18550 }
18551 }
18552
18553 // Now that we have the actual number of divisor uses, make sure it meets
18554 // the minimum threshold specified by the target.
18555 if ((Users.size() * NumElts) < MinUses)
18556 return SDValue();
18557
18558 SDLoc DL(N);
18559 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18560 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18561
18562 // Dividend / Divisor -> Dividend * Reciprocal
18563 for (auto *U : Users) {
18564 SDValue Dividend = U->getOperand(0);
18565 if (Dividend != FPOne) {
18566 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18567 Reciprocal, Flags);
18568 CombineTo(U, NewNode);
18569 } else if (U != Reciprocal.getNode()) {
18570 // In the absence of fast-math-flags, this user node is always the
18571 // same node as Reciprocal, but with FMF they may be different nodes.
18572 CombineTo(U, Reciprocal);
18573 }
18574 }
18575 return SDValue(N, 0); // N was replaced.
18576}
18577
18578SDValue DAGCombiner::visitFDIV(SDNode *N) {
18579 SDValue N0 = N->getOperand(0);
18580 SDValue N1 = N->getOperand(1);
18581 EVT VT = N->getValueType(0);
18582 SDLoc DL(N);
18583 SDNodeFlags Flags = N->getFlags();
18584 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18585
18586 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18587 return R;
18588
18589 // fold (fdiv c1, c2) -> c1/c2
18590 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18591 return C;
18592
18593 // fold vector ops
18594 if (VT.isVector())
18595 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18596 return FoldedVOp;
18597
18598 if (SDValue NewSel = foldBinOpIntoSelect(N))
18599 return NewSel;
18600
18602 return V;
18603
18604 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18605 // the loss is acceptable with AllowReciprocal.
18606 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18607 // Compute the reciprocal 1.0 / c2.
18608 const APFloat &N1APF = N1CFP->getValueAPF();
18609 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18611 // Only do the transform if the reciprocal is a legal fp immediate that
18612 // isn't too nasty (eg NaN, denormal, ...).
18613 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18614 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18615 (!LegalOperations ||
18616 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18617 // backend)... we should handle this gracefully after Legalize.
18618 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18620 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18621 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18622 DAG.getConstantFP(Recip, DL, VT));
18623 }
18624
18625 if (Flags.hasAllowReciprocal()) {
18626 // If this FDIV is part of a reciprocal square root, it may be folded
18627 // into a target-specific square root estimate instruction.
18628 if (N1.getOpcode() == ISD::FSQRT) {
18629 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
18630 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18631 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18632 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18633 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18634 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18635 AddToWorklist(RV.getNode());
18636 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18637 }
18638 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18639 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18640 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18641 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18642 AddToWorklist(RV.getNode());
18643 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18644 }
18645 } else if (N1.getOpcode() == ISD::FMUL) {
18646 // Look through an FMUL. Even though this won't remove the FDIV directly,
18647 // it's still worthwhile to get rid of the FSQRT if possible.
18648 SDValue Sqrt, Y;
18649 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18650 Sqrt = N1.getOperand(0);
18651 Y = N1.getOperand(1);
18652 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18653 Sqrt = N1.getOperand(1);
18654 Y = N1.getOperand(0);
18655 }
18656 if (Sqrt.getNode()) {
18657 // If the other multiply operand is known positive, pull it into the
18658 // sqrt. That will eliminate the division if we convert to an estimate.
18659 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18660 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18661 SDValue A;
18662 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18663 A = Y.getOperand(0);
18664 else if (Y == Sqrt.getOperand(0))
18665 A = Y;
18666 if (A) {
18667 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18668 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18669 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18670 SDValue AAZ =
18671 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18672 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
18673 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18674
18675 // Estimate creation failed. Clean up speculatively created nodes.
18676 recursivelyDeleteUnusedNodes(AAZ.getNode());
18677 }
18678 }
18679
18680 // We found a FSQRT, so try to make this fold:
18681 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18682 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
18683 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18684 AddToWorklist(Div.getNode());
18685 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18686 }
18687 }
18688 }
18689
18690 // Fold into a reciprocal estimate and multiply instead of a real divide.
18691 if (Flags.hasNoInfs())
18692 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18693 return RV;
18694 }
18695
18696 // Fold X/Sqrt(X) -> Sqrt(X)
18697 if (Flags.hasNoSignedZeros() && Flags.hasAllowReassociation())
18698 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18699 return N1;
18700
18701 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18706 SDValue NegN0 =
18707 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18708 if (NegN0) {
18709 HandleSDNode NegN0Handle(NegN0);
18710 SDValue NegN1 =
18711 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18712 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18714 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18715 }
18716
18717 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18718 return R;
18719
18720 return SDValue();
18721}
18722
18723SDValue DAGCombiner::visitFREM(SDNode *N) {
18724 SDValue N0 = N->getOperand(0);
18725 SDValue N1 = N->getOperand(1);
18726 EVT VT = N->getValueType(0);
18727 SDNodeFlags Flags = N->getFlags();
18728 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18729 SDLoc DL(N);
18730
18731 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18732 return R;
18733
18734 // fold (frem c1, c2) -> fmod(c1,c2)
18735 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18736 return C;
18737
18738 if (SDValue NewSel = foldBinOpIntoSelect(N))
18739 return NewSel;
18740
18741 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18742 // power of 2.
18743 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18746 TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
18747 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18748 bool NeedsCopySign =
18749 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
18750 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18751 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18752 SDValue MLA;
18754 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18755 N1, N0);
18756 } else {
18757 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18758 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18759 }
18760 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18761 }
18762
18763 return SDValue();
18764}
18765
18766SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18767 SDNodeFlags Flags = N->getFlags();
18768
18769 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18770 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18771 if (!Flags.hasApproximateFuncs() || !Flags.hasNoInfs())
18772 return SDValue();
18773
18774 SDValue N0 = N->getOperand(0);
18775 if (TLI.isFsqrtCheap(N0, DAG))
18776 return SDValue();
18777
18778 // FSQRT nodes have flags that propagate to the created nodes.
18779 SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
18780 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18781 // transform the fdiv, we may produce a sub-optimal estimate sequence
18782 // because the reciprocal calculation may not have to filter out a
18783 // 0.0 input.
18784 return buildSqrtEstimate(N0);
18785}
18786
18787/// copysign(x, fp_extend(y)) -> copysign(x, y)
18788/// copysign(x, fp_round(y)) -> copysign(x, y)
18789/// Operands to the functions are the type of X and Y respectively.
18790static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18791 // Always fold no-op FP casts.
18792 if (XTy == YTy)
18793 return true;
18794
18795 // Do not optimize out type conversion of f128 type yet.
18796 // For some targets like x86_64, configuration is changed to keep one f128
18797 // value in one SSE register, but instruction selection cannot handle
18798 // FCOPYSIGN on SSE registers yet.
18799 if (YTy == MVT::f128)
18800 return false;
18801
18802 // Avoid mismatched vector operand types, for better instruction selection.
18803 return !YTy.isVector();
18804}
18805
18807 SDValue N1 = N->getOperand(1);
18808 if (N1.getOpcode() != ISD::FP_EXTEND &&
18809 N1.getOpcode() != ISD::FP_ROUND)
18810 return false;
18811 EVT N1VT = N1->getValueType(0);
18812 EVT N1Op0VT = N1->getOperand(0).getValueType();
18813 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18814}
18815
18816SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18817 SDValue N0 = N->getOperand(0);
18818 SDValue N1 = N->getOperand(1);
18819 EVT VT = N->getValueType(0);
18820 SDLoc DL(N);
18821
18822 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18823 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18824 return C;
18825
18826 // copysign(x, fp_extend(y)) -> copysign(x, y)
18827 // copysign(x, fp_round(y)) -> copysign(x, y)
18829 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18830
18832 return SDValue(N, 0);
18833
18834 return SDValue();
18835}
18836
18837SDValue DAGCombiner::visitFPOW(SDNode *N) {
18838 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
18839 if (!ExponentC)
18840 return SDValue();
18841 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18842
18843 // Try to convert x ** (1/3) into cube root.
18844 // TODO: Handle the various flavors of long double.
18845 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
18846 // Some range near 1/3 should be fine.
18847 EVT VT = N->getValueType(0);
18848 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
18849 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
18850 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
18851 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
18852 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
18853 // For regular numbers, rounding may cause the results to differ.
18854 // Therefore, we require { nsz ninf nnan afn } for this transform.
18855 // TODO: We could select out the special cases if we don't have nsz/ninf.
18856 SDNodeFlags Flags = N->getFlags();
18857 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
18858 !Flags.hasApproximateFuncs())
18859 return SDValue();
18860
18861 // Do not create a cbrt() libcall if the target does not have it, and do not
18862 // turn a pow that has lowering support into a cbrt() libcall.
18863 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
18864 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
18865 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
18866 return SDValue();
18867
18868 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
18869 }
18870
18871 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
18872 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
18873 // TODO: This could be extended (using a target hook) to handle smaller
18874 // power-of-2 fractional exponents.
18875 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
18876 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
18877 if (ExponentIs025 || ExponentIs075) {
18878 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
18879 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
18880 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
18881 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
18882 // For regular numbers, rounding may cause the results to differ.
18883 // Therefore, we require { nsz ninf afn } for this transform.
18884 // TODO: We could select out the special cases if we don't have nsz/ninf.
18885 SDNodeFlags Flags = N->getFlags();
18886
18887 // We only need no signed zeros for the 0.25 case.
18888 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
18889 !Flags.hasApproximateFuncs())
18890 return SDValue();
18891
18892 // Don't double the number of libcalls. We are trying to inline fast code.
18893 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
18894 return SDValue();
18895
18896 // Assume that libcalls are the smallest code.
18897 // TODO: This restriction should probably be lifted for vectors.
18898 if (ForCodeSize)
18899 return SDValue();
18900
18901 // pow(X, 0.25) --> sqrt(sqrt(X))
18902 SDLoc DL(N);
18903 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
18904 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
18905 if (ExponentIs025)
18906 return SqrtSqrt;
18907 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
18908 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
18909 }
18910
18911 return SDValue();
18912}
18913
18915 const TargetLowering &TLI) {
18916 // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
18917 // If NoSignedZerosFPMath is enabled, this is a direct replacement.
18918 // Otherwise, for strict math, we must handle edge cases:
18919 // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
18920 // as example, it first becomes integer 0, and is converted back to +0.0.
18921 // FTRUNC on its own could produce -0.0.
18922
18923 // FIXME: We should be able to use node-level FMF here.
18924 EVT VT = N->getValueType(0);
18925 if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
18926 return SDValue();
18927
18928 // fptosi/fptoui round towards zero, so converting from FP to integer and
18929 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
18930 SDValue N0 = N->getOperand(0);
18931 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
18932 N0.getOperand(0).getValueType() == VT) {
18934 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18935 }
18936
18937 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
18938 N0.getOperand(0).getValueType() == VT) {
18940 return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
18941
18942 // Strict math: use FABS to handle negative inputs correctly.
18943 if (TLI.isFAbsFree(VT)) {
18944 SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
18945 return DAG.getNode(ISD::FTRUNC, DL, VT, Abs);
18946 }
18947 }
18948
18949 return SDValue();
18950}
18951
18952SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
18953 SDValue N0 = N->getOperand(0);
18954 EVT VT = N->getValueType(0);
18955 EVT OpVT = N0.getValueType();
18956 SDLoc DL(N);
18957
18958 // [us]itofp(undef) = 0, because the result value is bounded.
18959 if (N0.isUndef())
18960 return DAG.getConstantFP(0.0, DL, VT);
18961
18962 // fold (sint_to_fp c1) -> c1fp
18963 // ...but only if the target supports immediate floating-point values
18964 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18965 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
18966 return C;
18967
18968 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
18969 // but UINT_TO_FP is legal on this target, try to convert.
18970 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
18971 hasOperation(ISD::UINT_TO_FP, OpVT)) {
18972 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
18973 if (DAG.SignBitIsZero(N0))
18974 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
18975 }
18976
18977 // The next optimizations are desirable only if SELECT_CC can be lowered.
18978 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
18979 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
18980 !VT.isVector() &&
18981 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18982 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
18983 DAG.getConstantFP(0.0, DL, VT));
18984
18985 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
18986 // (select (setcc x, y, cc), 1.0, 0.0)
18987 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
18988 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
18989 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
18990 return DAG.getSelect(DL, VT, N0.getOperand(0),
18991 DAG.getConstantFP(1.0, DL, VT),
18992 DAG.getConstantFP(0.0, DL, VT));
18993
18994 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
18995 return FTrunc;
18996
18997 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18998 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
19000 N0.getOperand(0).getValueType()))
19001 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
19002
19003 return SDValue();
19004}
19005
19006SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
19007 SDValue N0 = N->getOperand(0);
19008 EVT VT = N->getValueType(0);
19009 EVT OpVT = N0.getValueType();
19010 SDLoc DL(N);
19011
19012 // [us]itofp(undef) = 0, because the result value is bounded.
19013 if (N0.isUndef())
19014 return DAG.getConstantFP(0.0, DL, VT);
19015
19016 // fold (uint_to_fp c1) -> c1fp
19017 // ...but only if the target supports immediate floating-point values
19018 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19019 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
19020 return C;
19021
19022 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
19023 // but SINT_TO_FP is legal on this target, try to convert.
19024 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
19025 hasOperation(ISD::SINT_TO_FP, OpVT)) {
19026 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
19027 if (DAG.SignBitIsZero(N0))
19028 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
19029 }
19030
19031 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
19032 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
19033 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19034 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
19035 DAG.getConstantFP(0.0, DL, VT));
19036
19037 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19038 return FTrunc;
19039
19040 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
19041 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
19043 N0.getOperand(0).getValueType()))
19044 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
19045
19046 return SDValue();
19047}
19048
19049// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
19051 SDValue N0 = N->getOperand(0);
19052 EVT VT = N->getValueType(0);
19053
19054 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
19055 return SDValue();
19056
19057 SDValue Src = N0.getOperand(0);
19058 EVT SrcVT = Src.getValueType();
19059 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
19060 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
19061
19062 // We can safely assume the conversion won't overflow the output range,
19063 // because (for example) (uint8_t)18293.f is undefined behavior.
19064
19065 // Since we can assume the conversion won't overflow, our decision as to
19066 // whether the input will fit in the float should depend on the minimum
19067 // of the input range and output range.
19068
19069 // This means this is also safe for a signed input and unsigned output, since
19070 // a negative input would lead to undefined behavior.
19071 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
19072 unsigned OutputSize = (int)VT.getScalarSizeInBits();
19073 unsigned ActualSize = std::min(InputSize, OutputSize);
19074 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
19075
19076 // We can only fold away the float conversion if the input range can be
19077 // represented exactly in the float range.
19078 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
19079 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
19080 unsigned ExtOp =
19081 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
19082 return DAG.getNode(ExtOp, DL, VT, Src);
19083 }
19084 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
19085 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
19086 return DAG.getBitcast(VT, Src);
19087 }
19088 return SDValue();
19089}
19090
19091SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
19092 SDValue N0 = N->getOperand(0);
19093 EVT VT = N->getValueType(0);
19094 SDLoc DL(N);
19095
19096 // fold (fp_to_sint undef) -> undef
19097 if (N0.isUndef())
19098 return DAG.getUNDEF(VT);
19099
19100 // fold (fp_to_sint c1fp) -> c1
19101 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
19102 return C;
19103
19104 return FoldIntToFPToInt(N, DL, DAG);
19105}
19106
19107SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
19108 SDValue N0 = N->getOperand(0);
19109 EVT VT = N->getValueType(0);
19110 SDLoc DL(N);
19111
19112 // fold (fp_to_uint undef) -> undef
19113 if (N0.isUndef())
19114 return DAG.getUNDEF(VT);
19115
19116 // fold (fp_to_uint c1fp) -> c1
19117 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
19118 return C;
19119
19120 return FoldIntToFPToInt(N, DL, DAG);
19121}
19122
19123SDValue DAGCombiner::visitXROUND(SDNode *N) {
19124 SDValue N0 = N->getOperand(0);
19125 EVT VT = N->getValueType(0);
19126
19127 // fold (lrint|llrint undef) -> undef
19128 // fold (lround|llround undef) -> undef
19129 if (N0.isUndef())
19130 return DAG.getUNDEF(VT);
19131
19132 // fold (lrint|llrint c1fp) -> c1
19133 // fold (lround|llround c1fp) -> c1
19134 if (SDValue C =
19135 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
19136 return C;
19137
19138 return SDValue();
19139}
19140
19141SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19142 SDValue N0 = N->getOperand(0);
19143 SDValue N1 = N->getOperand(1);
19144 EVT VT = N->getValueType(0);
19145 SDLoc DL(N);
19146
19147 // fold (fp_round c1fp) -> c1fp
19148 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
19149 return C;
19150
19151 // fold (fp_round (fp_extend x)) -> x
19152 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
19153 return N0.getOperand(0);
19154
19155 // fold (fp_round (fp_round x)) -> (fp_round x)
19156 if (N0.getOpcode() == ISD::FP_ROUND) {
19157 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
19158 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
19159
19160 // Avoid folding legal fp_rounds into non-legal ones.
19161 if (!hasOperation(ISD::FP_ROUND, VT))
19162 return SDValue();
19163
19164 // Skip this folding if it results in an fp_round from f80 to f16.
19165 //
19166 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19167 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19168 // instructions from f32 or f64. Moreover, the first (value-preserving)
19169 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19170 // x86.
19171 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19172 return SDValue();
19173
19174 // If the first fp_round isn't a value preserving truncation, it might
19175 // introduce a tie in the second fp_round, that wouldn't occur in the
19176 // single-step fp_round we want to fold to.
19177 // In other words, double rounding isn't the same as rounding.
19178 // Also, this is a value preserving truncation iff both fp_round's are.
19179 if ((N->getFlags().hasAllowContract() &&
19180 N0->getFlags().hasAllowContract()) ||
19181 N0IsTrunc)
19182 return DAG.getNode(
19183 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19184 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19185 }
19186
19187 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19188 // Note: From a legality perspective, this is a two step transform. First,
19189 // we duplicate the fp_round to the arguments of the copysign, then we
19190 // eliminate the fp_round on Y. The second step requires an additional
19191 // predicate to match the implementation above.
19192 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19194 N0.getValueType())) {
19195 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19196 N0.getOperand(0), N1);
19197 AddToWorklist(Tmp.getNode());
19198 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19199 }
19200
19201 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19202 return NewVSel;
19203
19204 return SDValue();
19205}
19206
19207// Eliminate a floating-point widening of a narrowed value if the fast math
19208// flags allow it.
19210 SDValue N0 = N->getOperand(0);
19211 EVT VT = N->getValueType(0);
19212
19213 unsigned NarrowingOp;
19214 switch (N->getOpcode()) {
19215 case ISD::FP16_TO_FP:
19216 NarrowingOp = ISD::FP_TO_FP16;
19217 break;
19218 case ISD::BF16_TO_FP:
19219 NarrowingOp = ISD::FP_TO_BF16;
19220 break;
19221 case ISD::FP_EXTEND:
19222 NarrowingOp = ISD::FP_ROUND;
19223 break;
19224 default:
19225 llvm_unreachable("Expected widening FP cast");
19226 }
19227
19228 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19229 const SDNodeFlags NarrowFlags = N0->getFlags();
19230 const SDNodeFlags WidenFlags = N->getFlags();
19231 // Narrowing can introduce inf and change the encoding of a nan, so the
19232 // widen must have the nnan and ninf flags to indicate that we don't need to
19233 // care about that. We are also removing a rounding step, and that requires
19234 // both the narrow and widen to allow contraction.
19235 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19236 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19237 return N0.getOperand(0);
19238 }
19239 }
19240
19241 return SDValue();
19242}
19243
19244SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19245 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19246 SDValue N0 = N->getOperand(0);
19247 EVT VT = N->getValueType(0);
19248 SDLoc DL(N);
19249
19250 if (VT.isVector())
19251 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19252 return FoldedVOp;
19253
19254 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19255 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19256 return SDValue();
19257
19258 // fold (fp_extend c1fp) -> c1fp
19259 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19260 return C;
19261
19262 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19263 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19264 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
19265 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19266
19267 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19268 // value of X.
19269 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19270 SDValue In = N0.getOperand(0);
19271 if (In.getValueType() == VT) return In;
19272 if (VT.bitsLT(In.getValueType()))
19273 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19274 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19275 }
19276
19277 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19278 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19280 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19281 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19282 LN0->getChain(),
19283 LN0->getBasePtr(), N0.getValueType(),
19284 LN0->getMemOperand());
19285 CombineTo(N, ExtLoad);
19286 CombineTo(
19287 N0.getNode(),
19288 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19289 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19290 ExtLoad.getValue(1));
19291 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19292 }
19293
19294 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19295 return NewVSel;
19296
19297 if (SDValue CastEliminated = eliminateFPCastPair(N))
19298 return CastEliminated;
19299
19300 return SDValue();
19301}
19302
19303SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19304 SDValue N0 = N->getOperand(0);
19305 EVT VT = N->getValueType(0);
19306
19307 // fold (fceil c1) -> fceil(c1)
19308 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19309 return C;
19310
19311 return SDValue();
19312}
19313
19314SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19315 SDValue N0 = N->getOperand(0);
19316 EVT VT = N->getValueType(0);
19317
19318 // fold (ftrunc c1) -> ftrunc(c1)
19319 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19320 return C;
19321
19322 // fold ftrunc (known rounded int x) -> x
19323 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19324 // likely to be generated to extract integer from a rounded floating value.
19325 switch (N0.getOpcode()) {
19326 default: break;
19327 case ISD::FRINT:
19328 case ISD::FTRUNC:
19329 case ISD::FNEARBYINT:
19330 case ISD::FROUNDEVEN:
19331 case ISD::FFLOOR:
19332 case ISD::FCEIL:
19333 return N0;
19334 }
19335
19336 return SDValue();
19337}
19338
19339SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19340 SDValue N0 = N->getOperand(0);
19341
19342 // fold (ffrexp c1) -> ffrexp(c1)
19344 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19345 return SDValue();
19346}
19347
19348SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19349 SDValue N0 = N->getOperand(0);
19350 EVT VT = N->getValueType(0);
19351
19352 // fold (ffloor c1) -> ffloor(c1)
19353 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19354 return C;
19355
19356 return SDValue();
19357}
19358
19359SDValue DAGCombiner::visitFNEG(SDNode *N) {
19360 SDValue N0 = N->getOperand(0);
19361 EVT VT = N->getValueType(0);
19362 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19363
19364 // Constant fold FNEG.
19365 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19366 return C;
19367
19368 if (SDValue NegN0 =
19369 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19370 return NegN0;
19371
19372 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19373 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19374 // know it was called from a context with a nsz flag if the input fsub does
19375 // not.
19376 if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
19377 N0.hasOneUse()) {
19378 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19379 N0.getOperand(0));
19380 }
19381
19383 return SDValue(N, 0);
19384
19385 if (SDValue Cast = foldSignChangeInBitcast(N))
19386 return Cast;
19387
19388 return SDValue();
19389}
19390
19391SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19392 SDValue N0 = N->getOperand(0);
19393 SDValue N1 = N->getOperand(1);
19394 EVT VT = N->getValueType(0);
19395 const SDNodeFlags Flags = N->getFlags();
19396 unsigned Opc = N->getOpcode();
19397 bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19398 bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM;
19399 bool IsMin =
19400 Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM;
19401 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19402
19403 // Constant fold.
19404 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19405 return C;
19406
19407 // Canonicalize to constant on RHS.
19410 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19411
19412 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19413 const APFloat &AF = N1CFP->getValueAPF();
19414
19415 // minnum(X, qnan) -> X
19416 // maxnum(X, qnan) -> X
19417 // minnum(X, snan) -> qnan
19418 // maxnum(X, snan) -> qnan
19419 // minimum(X, nan) -> qnan
19420 // maximum(X, nan) -> qnan
19421 // minimumnum(X, nan) -> X
19422 // maximumnum(X, nan) -> X
19423 if (AF.isNaN()) {
19424 if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) {
19425 if (AF.isSignaling())
19426 return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
19427 return N->getOperand(1);
19428 }
19429 return N->getOperand(0);
19430 }
19431
19432 // In the following folds, inf can be replaced with the largest finite
19433 // float, if the ninf flag is set.
19434 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19435 // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
19436 // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
19437 // minimum(X, -inf) -> -inf if nnan
19438 // maximum(X, +inf) -> +inf if nnan
19439 // minimumnum(X, -inf) -> -inf
19440 // maximumnum(X, +inf) -> +inf
19441 if (IsMin == AF.isNegative() &&
19442 (!PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19443 return N->getOperand(1);
19444
19445 // minnum(X, +inf) -> X if nnan
19446 // maxnum(X, -inf) -> X if nnan
19447 // minimum(X, +inf) -> X (ignoring quieting of sNaNs)
19448 // maximum(X, -inf) -> X (ignoring quieting of sNaNs)
19449 // minimumnum(X, +inf) -> X if nnan
19450 // maximumnum(X, -inf) -> X if nnan
19451 if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19452 return N->getOperand(0);
19453 }
19454 }
19455
19456 // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
19457 if (Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM)
19458 return SDValue();
19459
19460 if (SDValue SD = reassociateReduction(
19461 PropAllNaNsToQNaNs
19462 ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
19463 : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
19464 Opc, SDLoc(N), VT, N0, N1, Flags))
19465 return SD;
19466
19467 return SDValue();
19468}
19469
19470SDValue DAGCombiner::visitFABS(SDNode *N) {
19471 SDValue N0 = N->getOperand(0);
19472 EVT VT = N->getValueType(0);
19473 SDLoc DL(N);
19474
19475 // fold (fabs c1) -> fabs(c1)
19476 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19477 return C;
19478
19480 return SDValue(N, 0);
19481
19482 if (SDValue Cast = foldSignChangeInBitcast(N))
19483 return Cast;
19484
19485 return SDValue();
19486}
19487
19488SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19489 SDValue Chain = N->getOperand(0);
19490 SDValue N1 = N->getOperand(1);
19491 SDValue N2 = N->getOperand(2);
19492
19493 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19494 // nondeterministic jumps).
19495 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19496 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19497 N1->getOperand(0), N2, N->getFlags());
19498 }
19499
19500 // Variant of the previous fold where there is a SETCC in between:
19501 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19502 // =>
19503 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19504 // =>
19505 // BRCOND(SETCC(X, CONST, Cond))
19506 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19507 // isn't equivalent to true or false.
19508 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19509 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19510 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19511 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19513 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19514 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19515 bool Updated = false;
19516
19517 // Is 'X Cond C' always true or false?
19518 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19519 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19520 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19521 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19522 (Cond == ISD::SETGT && C->isMaxSignedValue());
19523 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19524 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19525 (Cond == ISD::SETUGE && C->isZero()) ||
19526 (Cond == ISD::SETGE && C->isMinSignedValue());
19527 return True || False;
19528 };
19529
19530 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19531 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19532 S0 = S0->getOperand(0);
19533 Updated = true;
19534 }
19535 }
19536 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19537 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19538 S1 = S1->getOperand(0);
19539 Updated = true;
19540 }
19541 }
19542
19543 if (Updated)
19544 return DAG.getNode(
19545 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19546 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19547 N->getFlags());
19548 }
19549
19550 // If N is a constant we could fold this into a fallthrough or unconditional
19551 // branch. However that doesn't happen very often in normal code, because
19552 // Instcombine/SimplifyCFG should have handled the available opportunities.
19553 // If we did this folding here, it would be necessary to update the
19554 // MachineBasicBlock CFG, which is awkward.
19555
19556 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19557 // on the target, also copy fast math flags.
19558 if (N1.getOpcode() == ISD::SETCC &&
19559 TLI.isOperationLegalOrCustom(ISD::BR_CC,
19560 N1.getOperand(0).getValueType())) {
19561 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19562 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19563 N1->getFlags());
19564 }
19565
19566 if (N1.hasOneUse()) {
19567 // rebuildSetCC calls visitXor which may change the Chain when there is a
19568 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19569 HandleSDNode ChainHandle(Chain);
19570 if (SDValue NewN1 = rebuildSetCC(N1))
19571 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19572 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19573 }
19574
19575 return SDValue();
19576}
19577
19578SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19579 if (N.getOpcode() == ISD::SRL ||
19580 (N.getOpcode() == ISD::TRUNCATE &&
19581 (N.getOperand(0).hasOneUse() &&
19582 N.getOperand(0).getOpcode() == ISD::SRL))) {
19583 // Look pass the truncate.
19584 if (N.getOpcode() == ISD::TRUNCATE)
19585 N = N.getOperand(0);
19586
19587 // Match this pattern so that we can generate simpler code:
19588 //
19589 // %a = ...
19590 // %b = and i32 %a, 2
19591 // %c = srl i32 %b, 1
19592 // brcond i32 %c ...
19593 //
19594 // into
19595 //
19596 // %a = ...
19597 // %b = and i32 %a, 2
19598 // %c = setcc eq %b, 0
19599 // brcond %c ...
19600 //
19601 // This applies only when the AND constant value has one bit set and the
19602 // SRL constant is equal to the log2 of the AND constant. The back-end is
19603 // smart enough to convert the result into a TEST/JMP sequence.
19604 SDValue Op0 = N.getOperand(0);
19605 SDValue Op1 = N.getOperand(1);
19606
19607 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19608 SDValue AndOp1 = Op0.getOperand(1);
19609
19610 if (AndOp1.getOpcode() == ISD::Constant) {
19611 const APInt &AndConst = AndOp1->getAsAPIntVal();
19612
19613 if (AndConst.isPowerOf2() &&
19614 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19615 SDLoc DL(N);
19616 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19617 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19618 ISD::SETNE);
19619 }
19620 }
19621 }
19622 }
19623
19624 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19625 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19626 if (N.getOpcode() == ISD::XOR) {
19627 // Because we may call this on a speculatively constructed
19628 // SimplifiedSetCC Node, we need to simplify this node first.
19629 // Ideally this should be folded into SimplifySetCC and not
19630 // here. For now, grab a handle to N so we don't lose it from
19631 // replacements interal to the visit.
19632 HandleSDNode XORHandle(N);
19633 while (N.getOpcode() == ISD::XOR) {
19634 SDValue Tmp = visitXOR(N.getNode());
19635 // No simplification done.
19636 if (!Tmp.getNode())
19637 break;
19638 // Returning N is form in-visit replacement that may invalidated
19639 // N. Grab value from Handle.
19640 if (Tmp.getNode() == N.getNode())
19641 N = XORHandle.getValue();
19642 else // Node simplified. Try simplifying again.
19643 N = Tmp;
19644 }
19645
19646 if (N.getOpcode() != ISD::XOR)
19647 return N;
19648
19649 SDValue Op0 = N->getOperand(0);
19650 SDValue Op1 = N->getOperand(1);
19651
19652 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19653 bool Equal = false;
19654 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19655 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19656 Op0.getValueType() == MVT::i1) {
19657 N = Op0;
19658 Op0 = N->getOperand(0);
19659 Op1 = N->getOperand(1);
19660 Equal = true;
19661 }
19662
19663 EVT SetCCVT = N.getValueType();
19664 if (LegalTypes)
19665 SetCCVT = getSetCCResultType(SetCCVT);
19666 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19667 // it would introduce illegal operations post-legalization as this can
19668 // result in infinite looping between converting xor->setcc here, and
19669 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19671 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19672 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19673 }
19674 }
19675
19676 return SDValue();
19677}
19678
19679// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19680//
19681SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19682 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19683 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19684
19685 // If N is a constant we could fold this into a fallthrough or unconditional
19686 // branch. However that doesn't happen very often in normal code, because
19687 // Instcombine/SimplifyCFG should have handled the available opportunities.
19688 // If we did this folding here, it would be necessary to update the
19689 // MachineBasicBlock CFG, which is awkward.
19690
19691 // Use SimplifySetCC to simplify SETCC's.
19693 CondLHS, CondRHS, CC->get(), SDLoc(N),
19694 false);
19695 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19696
19697 // fold to a simpler setcc
19698 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19699 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19700 N->getOperand(0), Simp.getOperand(2),
19701 Simp.getOperand(0), Simp.getOperand(1),
19702 N->getOperand(4));
19703
19704 return SDValue();
19705}
19706
19707static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19708 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19709 const TargetLowering &TLI) {
19710 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19711 if (LD->isIndexed())
19712 return false;
19713 EVT VT = LD->getMemoryVT();
19714 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19715 return false;
19716 Ptr = LD->getBasePtr();
19717 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19718 if (ST->isIndexed())
19719 return false;
19720 EVT VT = ST->getMemoryVT();
19721 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19722 return false;
19723 Ptr = ST->getBasePtr();
19724 IsLoad = false;
19725 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19726 if (LD->isIndexed())
19727 return false;
19728 EVT VT = LD->getMemoryVT();
19729 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19730 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19731 return false;
19732 Ptr = LD->getBasePtr();
19733 IsMasked = true;
19735 if (ST->isIndexed())
19736 return false;
19737 EVT VT = ST->getMemoryVT();
19738 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19739 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19740 return false;
19741 Ptr = ST->getBasePtr();
19742 IsLoad = false;
19743 IsMasked = true;
19744 } else {
19745 return false;
19746 }
19747 return true;
19748}
19749
19750/// Try turning a load/store into a pre-indexed load/store when the base
19751/// pointer is an add or subtract and it has other uses besides the load/store.
19752/// After the transformation, the new indexed load/store has effectively folded
19753/// the add/subtract in and all of its other uses are redirected to the
19754/// new load/store.
19755bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19756 if (Level < AfterLegalizeDAG)
19757 return false;
19758
19759 bool IsLoad = true;
19760 bool IsMasked = false;
19761 SDValue Ptr;
19762 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19763 Ptr, TLI))
19764 return false;
19765
19766 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19767 // out. There is no reason to make this a preinc/predec.
19768 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19769 Ptr->hasOneUse())
19770 return false;
19771
19772 // Ask the target to do addressing mode selection.
19776 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19777 return false;
19778
19779 // Backends without true r+i pre-indexed forms may need to pass a
19780 // constant base with a variable offset so that constant coercion
19781 // will work with the patterns in canonical form.
19782 bool Swapped = false;
19783 if (isa<ConstantSDNode>(BasePtr)) {
19784 std::swap(BasePtr, Offset);
19785 Swapped = true;
19786 }
19787
19788 // Don't create a indexed load / store with zero offset.
19790 return false;
19791
19792 // Try turning it into a pre-indexed load / store except when:
19793 // 1) The new base ptr is a frame index.
19794 // 2) If N is a store and the new base ptr is either the same as or is a
19795 // predecessor of the value being stored.
19796 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
19797 // that would create a cycle.
19798 // 4) All uses are load / store ops that use it as old base ptr.
19799
19800 // Check #1. Preinc'ing a frame index would require copying the stack pointer
19801 // (plus the implicit offset) to a register to preinc anyway.
19802 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19803 return false;
19804
19805 // Check #2.
19806 if (!IsLoad) {
19807 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
19808 : cast<StoreSDNode>(N)->getValue();
19809
19810 // Would require a copy.
19811 if (Val == BasePtr)
19812 return false;
19813
19814 // Would create a cycle.
19815 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
19816 return false;
19817 }
19818
19819 // Caches for hasPredecessorHelper.
19820 SmallPtrSet<const SDNode *, 32> Visited;
19822 Worklist.push_back(N);
19823
19824 // If the offset is a constant, there may be other adds of constants that
19825 // can be folded with this one. We should do this to avoid having to keep
19826 // a copy of the original base pointer.
19827 SmallVector<SDNode *, 16> OtherUses;
19830 for (SDUse &Use : BasePtr->uses()) {
19831 // Skip the use that is Ptr and uses of other results from BasePtr's
19832 // node (important for nodes that return multiple results).
19833 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
19834 continue;
19835
19836 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
19837 MaxSteps))
19838 continue;
19839
19840 if (Use.getUser()->getOpcode() != ISD::ADD &&
19841 Use.getUser()->getOpcode() != ISD::SUB) {
19842 OtherUses.clear();
19843 break;
19844 }
19845
19846 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
19847 if (!isa<ConstantSDNode>(Op1)) {
19848 OtherUses.clear();
19849 break;
19850 }
19851
19852 // FIXME: In some cases, we can be smarter about this.
19853 if (Op1.getValueType() != Offset.getValueType()) {
19854 OtherUses.clear();
19855 break;
19856 }
19857
19858 OtherUses.push_back(Use.getUser());
19859 }
19860
19861 if (Swapped)
19862 std::swap(BasePtr, Offset);
19863
19864 // Now check for #3 and #4.
19865 bool RealUse = false;
19866
19867 for (SDNode *User : Ptr->users()) {
19868 if (User == N)
19869 continue;
19870 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
19871 return false;
19872
19873 // If Ptr may be folded in addressing mode of other use, then it's
19874 // not profitable to do this transformation.
19875 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
19876 RealUse = true;
19877 }
19878
19879 if (!RealUse)
19880 return false;
19881
19883 if (!IsMasked) {
19884 if (IsLoad)
19885 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19886 else
19887 Result =
19888 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
19889 } else {
19890 if (IsLoad)
19891 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
19892 Offset, AM);
19893 else
19894 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
19895 Offset, AM);
19896 }
19897 ++PreIndexedNodes;
19898 ++NodesCombined;
19899 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
19900 Result.dump(&DAG); dbgs() << '\n');
19901 WorklistRemover DeadNodes(*this);
19902 if (IsLoad) {
19903 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
19904 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
19905 } else {
19906 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
19907 }
19908
19909 // Finally, since the node is now dead, remove it from the graph.
19910 deleteAndRecombine(N);
19911
19912 if (Swapped)
19913 std::swap(BasePtr, Offset);
19914
19915 // Replace other uses of BasePtr that can be updated to use Ptr
19916 for (SDNode *OtherUse : OtherUses) {
19917 unsigned OffsetIdx = 1;
19918 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
19919 OffsetIdx = 0;
19920 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
19921 "Expected BasePtr operand");
19922
19923 // We need to replace ptr0 in the following expression:
19924 // x0 * offset0 + y0 * ptr0 = t0
19925 // knowing that
19926 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
19927 //
19928 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
19929 // indexed load/store and the expression that needs to be re-written.
19930 //
19931 // Therefore, we have:
19932 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
19933
19934 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
19935 const APInt &Offset0 = CN->getAPIntValue();
19936 const APInt &Offset1 = Offset->getAsAPIntVal();
19937 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
19938 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
19939 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
19940 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
19941
19942 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
19943
19944 APInt CNV = Offset0;
19945 if (X0 < 0) CNV = -CNV;
19946 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
19947 else CNV = CNV - Offset1;
19948
19949 SDLoc DL(OtherUse);
19950
19951 // We can now generate the new expression.
19952 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
19953 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
19954
19955 SDValue NewUse =
19956 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
19957 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
19958 deleteAndRecombine(OtherUse);
19959 }
19960
19961 // Replace the uses of Ptr with uses of the updated base value.
19962 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
19963 deleteAndRecombine(Ptr.getNode());
19964 AddToWorklist(Result.getNode());
19965
19966 return true;
19967}
19968
19970 SDValue &BasePtr, SDValue &Offset,
19972 SelectionDAG &DAG,
19973 const TargetLowering &TLI) {
19974 if (PtrUse == N ||
19975 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
19976 return false;
19977
19978 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
19979 return false;
19980
19981 // Don't create a indexed load / store with zero offset.
19983 return false;
19984
19985 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
19986 return false;
19987
19990 for (SDNode *User : BasePtr->users()) {
19991 if (User == Ptr.getNode())
19992 continue;
19993
19994 // No if there's a later user which could perform the index instead.
19995 if (isa<MemSDNode>(User)) {
19996 bool IsLoad = true;
19997 bool IsMasked = false;
19998 SDValue OtherPtr;
20000 IsMasked, OtherPtr, TLI)) {
20002 Worklist.push_back(User);
20003 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
20004 return false;
20005 }
20006 }
20007
20008 // If all the uses are load / store addresses, then don't do the
20009 // transformation.
20010 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
20011 for (SDNode *UserUser : User->users())
20012 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
20013 return false;
20014 }
20015 }
20016 return true;
20017}
20018
20020 bool &IsMasked, SDValue &Ptr,
20021 SDValue &BasePtr, SDValue &Offset,
20023 SelectionDAG &DAG,
20024 const TargetLowering &TLI) {
20026 IsMasked, Ptr, TLI) ||
20027 Ptr->hasOneUse())
20028 return nullptr;
20029
20030 // Try turning it into a post-indexed load / store except when
20031 // 1) All uses are load / store ops that use it as base ptr (and
20032 // it may be folded as addressing mmode).
20033 // 2) Op must be independent of N, i.e. Op is neither a predecessor
20034 // nor a successor of N. Otherwise, if Op is folded that would
20035 // create a cycle.
20037 for (SDUse &U : Ptr->uses()) {
20038 if (U.getResNo() != Ptr.getResNo())
20039 continue;
20040
20041 // Check for #1.
20042 SDNode *Op = U.getUser();
20043 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
20044 continue;
20045
20046 // Check for #2.
20049 // Ptr is predecessor to both N and Op.
20050 Visited.insert(Ptr.getNode());
20051 Worklist.push_back(N);
20052 Worklist.push_back(Op);
20053 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
20054 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
20055 return Op;
20056 }
20057 return nullptr;
20058}
20059
20060/// Try to combine a load/store with a add/sub of the base pointer node into a
20061/// post-indexed load/store. The transformation folded the add/subtract into the
20062/// new indexed load/store effectively and all of its uses are redirected to the
20063/// new load/store.
20064bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
20065 if (Level < AfterLegalizeDAG)
20066 return false;
20067
20068 bool IsLoad = true;
20069 bool IsMasked = false;
20070 SDValue Ptr;
20074 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
20075 Offset, AM, DAG, TLI);
20076 if (!Op)
20077 return false;
20078
20080 if (!IsMasked)
20081 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
20082 Offset, AM)
20083 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
20084 BasePtr, Offset, AM);
20085 else
20086 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
20087 BasePtr, Offset, AM)
20088 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
20089 BasePtr, Offset, AM);
20090 ++PostIndexedNodes;
20091 ++NodesCombined;
20092 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
20093 Result.dump(&DAG); dbgs() << '\n');
20094 WorklistRemover DeadNodes(*this);
20095 if (IsLoad) {
20096 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
20097 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
20098 } else {
20099 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
20100 }
20101
20102 // Finally, since the node is now dead, remove it from the graph.
20103 deleteAndRecombine(N);
20104
20105 // Replace the uses of Use with uses of the updated base value.
20107 Result.getValue(IsLoad ? 1 : 0));
20108 deleteAndRecombine(Op);
20109 return true;
20110}
20111
20112/// Return the base-pointer arithmetic from an indexed \p LD.
20113SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
20114 ISD::MemIndexedMode AM = LD->getAddressingMode();
20115 assert(AM != ISD::UNINDEXED);
20116 SDValue BP = LD->getOperand(1);
20117 SDValue Inc = LD->getOperand(2);
20118
20119 // Some backends use TargetConstants for load offsets, but don't expect
20120 // TargetConstants in general ADD nodes. We can convert these constants into
20121 // regular Constants (if the constant is not opaque).
20123 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
20124 "Cannot split out indexing using opaque target constants");
20125 if (Inc.getOpcode() == ISD::TargetConstant) {
20126 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
20127 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
20128 ConstInc->getValueType(0));
20129 }
20130
20131 unsigned Opc =
20132 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
20133 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
20134}
20135
20137 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
20138}
20139
20140bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
20141 EVT STType = Val.getValueType();
20142 EVT STMemType = ST->getMemoryVT();
20143 if (STType == STMemType)
20144 return true;
20145 if (isTypeLegal(STMemType))
20146 return false; // fail.
20147 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
20148 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
20149 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
20150 return true;
20151 }
20152 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
20153 STType.isInteger() && STMemType.isInteger()) {
20154 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
20155 return true;
20156 }
20157 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
20158 Val = DAG.getBitcast(STMemType, Val);
20159 return true;
20160 }
20161 return false; // fail.
20162}
20163
20164bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
20165 EVT LDMemType = LD->getMemoryVT();
20166 EVT LDType = LD->getValueType(0);
20167 assert(Val.getValueType() == LDMemType &&
20168 "Attempting to extend value of non-matching type");
20169 if (LDType == LDMemType)
20170 return true;
20171 if (LDMemType.isInteger() && LDType.isInteger()) {
20172 switch (LD->getExtensionType()) {
20173 case ISD::NON_EXTLOAD:
20174 Val = DAG.getBitcast(LDType, Val);
20175 return true;
20176 case ISD::EXTLOAD:
20177 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
20178 return true;
20179 case ISD::SEXTLOAD:
20180 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
20181 return true;
20182 case ISD::ZEXTLOAD:
20183 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
20184 return true;
20185 }
20186 }
20187 return false;
20188}
20189
20190StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
20191 int64_t &Offset) {
20192 SDValue Chain = LD->getOperand(0);
20193
20194 // Look through CALLSEQ_START.
20195 if (Chain.getOpcode() == ISD::CALLSEQ_START)
20196 Chain = Chain->getOperand(0);
20197
20198 StoreSDNode *ST = nullptr;
20200 if (Chain.getOpcode() == ISD::TokenFactor) {
20201 // Look for unique store within the TokenFactor.
20202 for (SDValue Op : Chain->ops()) {
20203 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
20204 if (!Store)
20205 continue;
20206 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20207 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20208 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20209 continue;
20210 // Make sure the store is not aliased with any nodes in TokenFactor.
20211 GatherAllAliases(Store, Chain, Aliases);
20212 if (Aliases.empty() ||
20213 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
20214 ST = Store;
20215 break;
20216 }
20217 } else {
20218 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
20219 if (Store) {
20220 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20221 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20222 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20223 ST = Store;
20224 }
20225 }
20226
20227 return ST;
20228}
20229
20230SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20231 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
20232 return SDValue();
20233 SDValue Chain = LD->getOperand(0);
20234 int64_t Offset;
20235
20236 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20237 // TODO: Relax this restriction for unordered atomics (see D66309)
20238 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
20239 return SDValue();
20240
20241 EVT LDType = LD->getValueType(0);
20242 EVT LDMemType = LD->getMemoryVT();
20243 EVT STMemType = ST->getMemoryVT();
20244 EVT STType = ST->getValue().getValueType();
20245
20246 // There are two cases to consider here:
20247 // 1. The store is fixed width and the load is scalable. In this case we
20248 // don't know at compile time if the store completely envelops the load
20249 // so we abandon the optimisation.
20250 // 2. The store is scalable and the load is fixed width. We could
20251 // potentially support a limited number of cases here, but there has been
20252 // no cost-benefit analysis to prove it's worth it.
20253 bool LdStScalable = LDMemType.isScalableVT();
20254 if (LdStScalable != STMemType.isScalableVT())
20255 return SDValue();
20256
20257 // If we are dealing with scalable vectors on a big endian platform the
20258 // calculation of offsets below becomes trickier, since we do not know at
20259 // compile time the absolute size of the vector. Until we've done more
20260 // analysis on big-endian platforms it seems better to bail out for now.
20261 if (LdStScalable && DAG.getDataLayout().isBigEndian())
20262 return SDValue();
20263
20264 // Normalize for Endianness. After this Offset=0 will denote that the least
20265 // significant bit in the loaded value maps to the least significant bit in
20266 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
20267 // n:th least significant byte of the stored value.
20268 int64_t OrigOffset = Offset;
20269 if (DAG.getDataLayout().isBigEndian())
20270 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20271 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20272 8 -
20273 Offset;
20274
20275 // Check that the stored value cover all bits that are loaded.
20276 bool STCoversLD;
20277
20278 TypeSize LdMemSize = LDMemType.getSizeInBits();
20279 TypeSize StMemSize = STMemType.getSizeInBits();
20280 if (LdStScalable)
20281 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
20282 else
20283 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
20284 StMemSize.getFixedValue());
20285
20286 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20287 if (LD->isIndexed()) {
20288 // Cannot handle opaque target constants and we must respect the user's
20289 // request not to split indexes from loads.
20290 if (!canSplitIdx(LD))
20291 return SDValue();
20292 SDValue Idx = SplitIndexingFromLoad(LD);
20293 SDValue Ops[] = {Val, Idx, Chain};
20294 return CombineTo(LD, Ops, 3);
20295 }
20296 return CombineTo(LD, Val, Chain);
20297 };
20298
20299 if (!STCoversLD)
20300 return SDValue();
20301
20302 // Memory as copy space (potentially masked).
20303 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
20304 // Simple case: Direct non-truncating forwarding
20305 if (LDType.getSizeInBits() == LdMemSize)
20306 return ReplaceLd(LD, ST->getValue(), Chain);
20307 // Can we model the truncate and extension with an and mask?
20308 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20309 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20310 // Mask to size of LDMemType
20311 auto Mask =
20313 StMemSize.getFixedValue()),
20314 SDLoc(ST), STType);
20315 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
20316 return ReplaceLd(LD, Val, Chain);
20317 }
20318 }
20319
20320 // Handle some cases for big-endian that would be Offset 0 and handled for
20321 // little-endian.
20322 SDValue Val = ST->getValue();
20323 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
20324 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20325 !LDType.isVector() && isTypeLegal(STType) &&
20326 TLI.isOperationLegal(ISD::SRL, STType)) {
20327 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
20328 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
20329 Offset = 0;
20330 }
20331 }
20332
20333 // TODO: Deal with nonzero offset.
20334 if (LD->getBasePtr().isUndef() || Offset != 0)
20335 return SDValue();
20336 // Model necessary truncations / extenstions.
20337 // Truncate Value To Stored Memory Size.
20338 do {
20339 if (!getTruncatedStoreValue(ST, Val))
20340 break;
20341 if (!isTypeLegal(LDMemType))
20342 break;
20343 if (STMemType != LDMemType) {
20344 // TODO: Support vectors? This requires extract_subvector/bitcast.
20345 if (!STMemType.isVector() && !LDMemType.isVector() &&
20346 STMemType.isInteger() && LDMemType.isInteger())
20347 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
20348 else
20349 break;
20350 }
20351 if (!extendLoadedValueToExtension(LD, Val))
20352 break;
20353 return ReplaceLd(LD, Val, Chain);
20354 } while (false);
20355
20356 // On failure, cleanup dead nodes we may have created.
20357 if (Val->use_empty())
20358 deleteAndRecombine(Val.getNode());
20359 return SDValue();
20360}
20361
20362SDValue DAGCombiner::visitLOAD(SDNode *N) {
20363 LoadSDNode *LD = cast<LoadSDNode>(N);
20364 SDValue Chain = LD->getChain();
20365 SDValue Ptr = LD->getBasePtr();
20366
20367 // If load is not volatile and there are no uses of the loaded value (and
20368 // the updated indexed value in case of indexed loads), change uses of the
20369 // chain value into uses of the chain input (i.e. delete the dead load).
20370 // TODO: Allow this for unordered atomics (see D66309)
20371 if (LD->isSimple()) {
20372 if (N->getValueType(1) == MVT::Other) {
20373 // Unindexed loads.
20374 if (!N->hasAnyUseOfValue(0)) {
20375 // It's not safe to use the two value CombineTo variant here. e.g.
20376 // v1, chain2 = load chain1, loc
20377 // v2, chain3 = load chain2, loc
20378 // v3 = add v2, c
20379 // Now we replace use of chain2 with chain1. This makes the second load
20380 // isomorphic to the one we are deleting, and thus makes this load live.
20381 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20382 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20383 dbgs() << "\n");
20384 WorklistRemover DeadNodes(*this);
20385 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20386 AddUsersToWorklist(Chain.getNode());
20387 if (N->use_empty())
20388 deleteAndRecombine(N);
20389
20390 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20391 }
20392 } else {
20393 // Indexed loads.
20394 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
20395
20396 // If this load has an opaque TargetConstant offset, then we cannot split
20397 // the indexing into an add/sub directly (that TargetConstant may not be
20398 // valid for a different type of node, and we cannot convert an opaque
20399 // target constant into a regular constant).
20400 bool CanSplitIdx = canSplitIdx(LD);
20401
20402 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
20403 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
20404 SDValue Index;
20405 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
20406 Index = SplitIndexingFromLoad(LD);
20407 // Try to fold the base pointer arithmetic into subsequent loads and
20408 // stores.
20409 AddUsersToWorklist(N);
20410 } else
20411 Index = DAG.getUNDEF(N->getValueType(1));
20412 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
20413 dbgs() << "\nWith: "; Undef.dump(&DAG);
20414 dbgs() << " and 2 other values\n");
20415 WorklistRemover DeadNodes(*this);
20416 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
20417 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
20418 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
20419 deleteAndRecombine(N);
20420 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20421 }
20422 }
20423 }
20424
20425 // If this load is directly stored, replace the load value with the stored
20426 // value.
20427 if (auto V = ForwardStoreValueToDirectLoad(LD))
20428 return V;
20429
20430 // Try to infer better alignment information than the load already has.
20431 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
20432 !LD->isAtomic()) {
20433 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
20434 if (*Alignment > LD->getAlign() &&
20435 isAligned(*Alignment, LD->getSrcValueOffset())) {
20436 SDValue NewLoad = DAG.getExtLoad(
20437 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
20438 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
20439 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20440 // NewLoad will always be N as we are only refining the alignment
20441 assert(NewLoad.getNode() == N);
20442 (void)NewLoad;
20443 }
20444 }
20445 }
20446
20447 if (LD->isUnindexed()) {
20448 // Walk up chain skipping non-aliasing memory nodes.
20449 SDValue BetterChain = FindBetterChain(LD, Chain);
20450
20451 // If there is a better chain.
20452 if (Chain != BetterChain) {
20453 SDValue ReplLoad;
20454
20455 // Replace the chain to void dependency.
20456 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
20457 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
20458 BetterChain, Ptr, LD->getMemOperand());
20459 } else {
20460 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
20461 LD->getValueType(0),
20462 BetterChain, Ptr, LD->getMemoryVT(),
20463 LD->getMemOperand());
20464 }
20465
20466 // Create token factor to keep old chain connected.
20467 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
20468 MVT::Other, Chain, ReplLoad.getValue(1));
20469
20470 // Replace uses with load result and token factor
20471 return CombineTo(N, ReplLoad.getValue(0), Token);
20472 }
20473 }
20474
20475 // Try transforming N to an indexed load.
20476 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20477 return SDValue(N, 0);
20478
20479 // Try to slice up N to more direct loads if the slices are mapped to
20480 // different register banks or pairing can take place.
20481 if (SliceUpLoad(N))
20482 return SDValue(N, 0);
20483
20484 return SDValue();
20485}
20486
20487namespace {
20488
20489/// Helper structure used to slice a load in smaller loads.
20490/// Basically a slice is obtained from the following sequence:
20491/// Origin = load Ty1, Base
20492/// Shift = srl Ty1 Origin, CstTy Amount
20493/// Inst = trunc Shift to Ty2
20494///
20495/// Then, it will be rewritten into:
20496/// Slice = load SliceTy, Base + SliceOffset
20497/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
20498///
20499/// SliceTy is deduced from the number of bits that are actually used to
20500/// build Inst.
20501struct LoadedSlice {
20502 /// Helper structure used to compute the cost of a slice.
20503 struct Cost {
20504 /// Are we optimizing for code size.
20505 bool ForCodeSize = false;
20506
20507 /// Various cost.
20508 unsigned Loads = 0;
20509 unsigned Truncates = 0;
20510 unsigned CrossRegisterBanksCopies = 0;
20511 unsigned ZExts = 0;
20512 unsigned Shift = 0;
20513
20514 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
20515
20516 /// Get the cost of one isolated slice.
20517 Cost(const LoadedSlice &LS, bool ForCodeSize)
20518 : ForCodeSize(ForCodeSize), Loads(1) {
20519 EVT TruncType = LS.Inst->getValueType(0);
20520 EVT LoadedType = LS.getLoadedType();
20521 if (TruncType != LoadedType &&
20522 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
20523 ZExts = 1;
20524 }
20525
20526 /// Account for slicing gain in the current cost.
20527 /// Slicing provide a few gains like removing a shift or a
20528 /// truncate. This method allows to grow the cost of the original
20529 /// load with the gain from this slice.
20530 void addSliceGain(const LoadedSlice &LS) {
20531 // Each slice saves a truncate.
20532 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
20533 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
20534 ++Truncates;
20535 // If there is a shift amount, this slice gets rid of it.
20536 if (LS.Shift)
20537 ++Shift;
20538 // If this slice can merge a cross register bank copy, account for it.
20539 if (LS.canMergeExpensiveCrossRegisterBankCopy())
20540 ++CrossRegisterBanksCopies;
20541 }
20542
20543 Cost &operator+=(const Cost &RHS) {
20544 Loads += RHS.Loads;
20545 Truncates += RHS.Truncates;
20546 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
20547 ZExts += RHS.ZExts;
20548 Shift += RHS.Shift;
20549 return *this;
20550 }
20551
20552 bool operator==(const Cost &RHS) const {
20553 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
20554 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
20555 ZExts == RHS.ZExts && Shift == RHS.Shift;
20556 }
20557
20558 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
20559
20560 bool operator<(const Cost &RHS) const {
20561 // Assume cross register banks copies are as expensive as loads.
20562 // FIXME: Do we want some more target hooks?
20563 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
20564 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
20565 // Unless we are optimizing for code size, consider the
20566 // expensive operation first.
20567 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
20568 return ExpensiveOpsLHS < ExpensiveOpsRHS;
20569 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
20570 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
20571 }
20572
20573 bool operator>(const Cost &RHS) const { return RHS < *this; }
20574
20575 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
20576
20577 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
20578 };
20579
20580 // The last instruction that represent the slice. This should be a
20581 // truncate instruction.
20582 SDNode *Inst;
20583
20584 // The original load instruction.
20585 LoadSDNode *Origin;
20586
20587 // The right shift amount in bits from the original load.
20588 unsigned Shift;
20589
20590 // The DAG from which Origin came from.
20591 // This is used to get some contextual information about legal types, etc.
20592 SelectionDAG *DAG;
20593
20594 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
20595 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
20596 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
20597
20598 /// Get the bits used in a chunk of bits \p BitWidth large.
20599 /// \return Result is \p BitWidth and has used bits set to 1 and
20600 /// not used bits set to 0.
20601 APInt getUsedBits() const {
20602 // Reproduce the trunc(lshr) sequence:
20603 // - Start from the truncated value.
20604 // - Zero extend to the desired bit width.
20605 // - Shift left.
20606 assert(Origin && "No original load to compare against.");
20607 unsigned BitWidth = Origin->getValueSizeInBits(0);
20608 assert(Inst && "This slice is not bound to an instruction");
20609 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
20610 "Extracted slice is bigger than the whole type!");
20611 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
20612 UsedBits.setAllBits();
20613 UsedBits = UsedBits.zext(BitWidth);
20614 UsedBits <<= Shift;
20615 return UsedBits;
20616 }
20617
20618 /// Get the size of the slice to be loaded in bytes.
20619 unsigned getLoadedSize() const {
20620 unsigned SliceSize = getUsedBits().popcount();
20621 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
20622 return SliceSize / 8;
20623 }
20624
20625 /// Get the type that will be loaded for this slice.
20626 /// Note: This may not be the final type for the slice.
20627 EVT getLoadedType() const {
20628 assert(DAG && "Missing context");
20629 LLVMContext &Ctxt = *DAG->getContext();
20630 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
20631 }
20632
20633 /// Get the alignment of the load used for this slice.
20634 Align getAlign() const {
20635 Align Alignment = Origin->getAlign();
20636 uint64_t Offset = getOffsetFromBase();
20637 if (Offset != 0)
20638 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
20639 return Alignment;
20640 }
20641
20642 /// Check if this slice can be rewritten with legal operations.
20643 bool isLegal() const {
20644 // An invalid slice is not legal.
20645 if (!Origin || !Inst || !DAG)
20646 return false;
20647
20648 // Offsets are for indexed load only, we do not handle that.
20649 if (!Origin->getOffset().isUndef())
20650 return false;
20651
20652 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20653
20654 // Check that the type is legal.
20655 EVT SliceType = getLoadedType();
20656 if (!TLI.isTypeLegal(SliceType))
20657 return false;
20658
20659 // Check that the load is legal for this type.
20660 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
20661 return false;
20662
20663 // Check that the offset can be computed.
20664 // 1. Check its type.
20665 EVT PtrType = Origin->getBasePtr().getValueType();
20666 if (PtrType == MVT::Untyped || PtrType.isExtended())
20667 return false;
20668
20669 // 2. Check that it fits in the immediate.
20670 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
20671 return false;
20672
20673 // 3. Check that the computation is legal.
20674 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
20675 return false;
20676
20677 // Check that the zext is legal if it needs one.
20678 EVT TruncateType = Inst->getValueType(0);
20679 if (TruncateType != SliceType &&
20680 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
20681 return false;
20682
20683 return true;
20684 }
20685
20686 /// Get the offset in bytes of this slice in the original chunk of
20687 /// bits.
20688 /// \pre DAG != nullptr.
20689 uint64_t getOffsetFromBase() const {
20690 assert(DAG && "Missing context.");
20691 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
20692 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
20693 uint64_t Offset = Shift / 8;
20694 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
20695 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
20696 "The size of the original loaded type is not a multiple of a"
20697 " byte.");
20698 // If Offset is bigger than TySizeInBytes, it means we are loading all
20699 // zeros. This should have been optimized before in the process.
20700 assert(TySizeInBytes > Offset &&
20701 "Invalid shift amount for given loaded size");
20702 if (IsBigEndian)
20703 Offset = TySizeInBytes - Offset - getLoadedSize();
20704 return Offset;
20705 }
20706
20707 /// Generate the sequence of instructions to load the slice
20708 /// represented by this object and redirect the uses of this slice to
20709 /// this new sequence of instructions.
20710 /// \pre this->Inst && this->Origin are valid Instructions and this
20711 /// object passed the legal check: LoadedSlice::isLegal returned true.
20712 /// \return The last instruction of the sequence used to load the slice.
20713 SDValue loadSlice() const {
20714 assert(Inst && Origin && "Unable to replace a non-existing slice.");
20715 const SDValue &OldBaseAddr = Origin->getBasePtr();
20716 SDValue BaseAddr = OldBaseAddr;
20717 // Get the offset in that chunk of bytes w.r.t. the endianness.
20718 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
20719 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
20720 if (Offset) {
20721 // BaseAddr = BaseAddr + Offset.
20722 EVT ArithType = BaseAddr.getValueType();
20723 SDLoc DL(Origin);
20724 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
20725 DAG->getConstant(Offset, DL, ArithType));
20726 }
20727
20728 // Create the type of the loaded slice according to its size.
20729 EVT SliceType = getLoadedType();
20730
20731 // Create the load for the slice.
20732 SDValue LastInst =
20733 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
20735 Origin->getMemOperand()->getFlags());
20736 // If the final type is not the same as the loaded type, this means that
20737 // we have to pad with zero. Create a zero extend for that.
20738 EVT FinalType = Inst->getValueType(0);
20739 if (SliceType != FinalType)
20740 LastInst =
20741 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
20742 return LastInst;
20743 }
20744
20745 /// Check if this slice can be merged with an expensive cross register
20746 /// bank copy. E.g.,
20747 /// i = load i32
20748 /// f = bitcast i32 i to float
20749 bool canMergeExpensiveCrossRegisterBankCopy() const {
20750 if (!Inst || !Inst->hasOneUse())
20751 return false;
20752 SDNode *User = *Inst->user_begin();
20753 if (User->getOpcode() != ISD::BITCAST)
20754 return false;
20755 assert(DAG && "Missing context");
20756 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20757 EVT ResVT = User->getValueType(0);
20758 const TargetRegisterClass *ResRC =
20759 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
20760 const TargetRegisterClass *ArgRC =
20761 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
20762 User->getOperand(0)->isDivergent());
20763 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
20764 return false;
20765
20766 // At this point, we know that we perform a cross-register-bank copy.
20767 // Check if it is expensive.
20768 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
20769 // Assume bitcasts are cheap, unless both register classes do not
20770 // explicitly share a common sub class.
20771 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
20772 return false;
20773
20774 // Check if it will be merged with the load.
20775 // 1. Check the alignment / fast memory access constraint.
20776 unsigned IsFast = 0;
20777 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
20778 Origin->getAddressSpace(), getAlign(),
20779 Origin->getMemOperand()->getFlags(), &IsFast) ||
20780 !IsFast)
20781 return false;
20782
20783 // 2. Check that the load is a legal operation for that type.
20784 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
20785 return false;
20786
20787 // 3. Check that we do not have a zext in the way.
20788 if (Inst->getValueType(0) != getLoadedType())
20789 return false;
20790
20791 return true;
20792 }
20793};
20794
20795} // end anonymous namespace
20796
20797/// Check that all bits set in \p UsedBits form a dense region, i.e.,
20798/// \p UsedBits looks like 0..0 1..1 0..0.
20799static bool areUsedBitsDense(const APInt &UsedBits) {
20800 // If all the bits are one, this is dense!
20801 if (UsedBits.isAllOnes())
20802 return true;
20803
20804 // Get rid of the unused bits on the right.
20805 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
20806 // Get rid of the unused bits on the left.
20807 if (NarrowedUsedBits.countl_zero())
20808 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
20809 // Check that the chunk of bits is completely used.
20810 return NarrowedUsedBits.isAllOnes();
20811}
20812
20813/// Check whether or not \p First and \p Second are next to each other
20814/// in memory. This means that there is no hole between the bits loaded
20815/// by \p First and the bits loaded by \p Second.
20816static bool areSlicesNextToEachOther(const LoadedSlice &First,
20817 const LoadedSlice &Second) {
20818 assert(First.Origin == Second.Origin && First.Origin &&
20819 "Unable to match different memory origins.");
20820 APInt UsedBits = First.getUsedBits();
20821 assert((UsedBits & Second.getUsedBits()) == 0 &&
20822 "Slices are not supposed to overlap.");
20823 UsedBits |= Second.getUsedBits();
20824 return areUsedBitsDense(UsedBits);
20825}
20826
20827/// Adjust the \p GlobalLSCost according to the target
20828/// paring capabilities and the layout of the slices.
20829/// \pre \p GlobalLSCost should account for at least as many loads as
20830/// there is in the slices in \p LoadedSlices.
20832 LoadedSlice::Cost &GlobalLSCost) {
20833 unsigned NumberOfSlices = LoadedSlices.size();
20834 // If there is less than 2 elements, no pairing is possible.
20835 if (NumberOfSlices < 2)
20836 return;
20837
20838 // Sort the slices so that elements that are likely to be next to each
20839 // other in memory are next to each other in the list.
20840 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
20841 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
20842 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
20843 });
20844 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
20845 // First (resp. Second) is the first (resp. Second) potentially candidate
20846 // to be placed in a paired load.
20847 const LoadedSlice *First = nullptr;
20848 const LoadedSlice *Second = nullptr;
20849 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
20850 // Set the beginning of the pair.
20851 First = Second) {
20852 Second = &LoadedSlices[CurrSlice];
20853
20854 // If First is NULL, it means we start a new pair.
20855 // Get to the next slice.
20856 if (!First)
20857 continue;
20858
20859 EVT LoadedType = First->getLoadedType();
20860
20861 // If the types of the slices are different, we cannot pair them.
20862 if (LoadedType != Second->getLoadedType())
20863 continue;
20864
20865 // Check if the target supplies paired loads for this type.
20866 Align RequiredAlignment;
20867 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
20868 // move to the next pair, this type is hopeless.
20869 Second = nullptr;
20870 continue;
20871 }
20872 // Check if we meet the alignment requirement.
20873 if (First->getAlign() < RequiredAlignment)
20874 continue;
20875
20876 // Check that both loads are next to each other in memory.
20877 if (!areSlicesNextToEachOther(*First, *Second))
20878 continue;
20879
20880 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
20881 --GlobalLSCost.Loads;
20882 // Move to the next pair.
20883 Second = nullptr;
20884 }
20885}
20886
20887/// Check the profitability of all involved LoadedSlice.
20888/// Currently, it is considered profitable if there is exactly two
20889/// involved slices (1) which are (2) next to each other in memory, and
20890/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
20891///
20892/// Note: The order of the elements in \p LoadedSlices may be modified, but not
20893/// the elements themselves.
20894///
20895/// FIXME: When the cost model will be mature enough, we can relax
20896/// constraints (1) and (2).
20898 const APInt &UsedBits, bool ForCodeSize) {
20899 unsigned NumberOfSlices = LoadedSlices.size();
20901 return NumberOfSlices > 1;
20902
20903 // Check (1).
20904 if (NumberOfSlices != 2)
20905 return false;
20906
20907 // Check (2).
20908 if (!areUsedBitsDense(UsedBits))
20909 return false;
20910
20911 // Check (3).
20912 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
20913 // The original code has one big load.
20914 OrigCost.Loads = 1;
20915 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
20916 const LoadedSlice &LS = LoadedSlices[CurrSlice];
20917 // Accumulate the cost of all the slices.
20918 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
20919 GlobalSlicingCost += SliceCost;
20920
20921 // Account as cost in the original configuration the gain obtained
20922 // with the current slices.
20923 OrigCost.addSliceGain(LS);
20924 }
20925
20926 // If the target supports paired load, adjust the cost accordingly.
20927 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
20928 return OrigCost > GlobalSlicingCost;
20929}
20930
20931/// If the given load, \p LI, is used only by trunc or trunc(lshr)
20932/// operations, split it in the various pieces being extracted.
20933///
20934/// This sort of thing is introduced by SROA.
20935/// This slicing takes care not to insert overlapping loads.
20936/// \pre LI is a simple load (i.e., not an atomic or volatile load).
20937bool DAGCombiner::SliceUpLoad(SDNode *N) {
20938 if (Level < AfterLegalizeDAG)
20939 return false;
20940
20941 LoadSDNode *LD = cast<LoadSDNode>(N);
20942 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
20943 !LD->getValueType(0).isInteger())
20944 return false;
20945
20946 // The algorithm to split up a load of a scalable vector into individual
20947 // elements currently requires knowing the length of the loaded type,
20948 // so will need adjusting to work on scalable vectors.
20949 if (LD->getValueType(0).isScalableVector())
20950 return false;
20951
20952 // Keep track of already used bits to detect overlapping values.
20953 // In that case, we will just abort the transformation.
20954 APInt UsedBits(LD->getValueSizeInBits(0), 0);
20955
20956 SmallVector<LoadedSlice, 4> LoadedSlices;
20957
20958 // Check if this load is used as several smaller chunks of bits.
20959 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
20960 // of computation for each trunc.
20961 for (SDUse &U : LD->uses()) {
20962 // Skip the uses of the chain.
20963 if (U.getResNo() != 0)
20964 continue;
20965
20966 SDNode *User = U.getUser();
20967 unsigned Shift = 0;
20968
20969 // Check if this is a trunc(lshr).
20970 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
20971 isa<ConstantSDNode>(User->getOperand(1))) {
20972 Shift = User->getConstantOperandVal(1);
20973 User = *User->user_begin();
20974 }
20975
20976 // At this point, User is a Truncate, iff we encountered, trunc or
20977 // trunc(lshr).
20978 if (User->getOpcode() != ISD::TRUNCATE)
20979 return false;
20980
20981 // The width of the type must be a power of 2 and greater than 8-bits.
20982 // Otherwise the load cannot be represented in LLVM IR.
20983 // Moreover, if we shifted with a non-8-bits multiple, the slice
20984 // will be across several bytes. We do not support that.
20985 unsigned Width = User->getValueSizeInBits(0);
20986 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
20987 return false;
20988
20989 // Build the slice for this chain of computations.
20990 LoadedSlice LS(User, LD, Shift, &DAG);
20991 APInt CurrentUsedBits = LS.getUsedBits();
20992
20993 // Check if this slice overlaps with another.
20994 if ((CurrentUsedBits & UsedBits) != 0)
20995 return false;
20996 // Update the bits used globally.
20997 UsedBits |= CurrentUsedBits;
20998
20999 // Check if the new slice would be legal.
21000 if (!LS.isLegal())
21001 return false;
21002
21003 // Record the slice.
21004 LoadedSlices.push_back(LS);
21005 }
21006
21007 // Abort slicing if it does not seem to be profitable.
21008 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
21009 return false;
21010
21011 ++SlicedLoads;
21012
21013 // Rewrite each chain to use an independent load.
21014 // By construction, each chain can be represented by a unique load.
21015
21016 // Prepare the argument for the new token factor for all the slices.
21017 SmallVector<SDValue, 8> ArgChains;
21018 for (const LoadedSlice &LS : LoadedSlices) {
21019 SDValue SliceInst = LS.loadSlice();
21020 CombineTo(LS.Inst, SliceInst, true);
21021 if (SliceInst.getOpcode() != ISD::LOAD)
21022 SliceInst = SliceInst.getOperand(0);
21023 assert(SliceInst->getOpcode() == ISD::LOAD &&
21024 "It takes more than a zext to get to the loaded slice!!");
21025 ArgChains.push_back(SliceInst.getValue(1));
21026 }
21027
21028 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
21029 ArgChains);
21030 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
21031 AddToWorklist(Chain.getNode());
21032 return true;
21033}
21034
21035/// Check to see if V is (and load (ptr), imm), where the load is having
21036/// specific bytes cleared out. If so, return the byte size being masked out
21037/// and the shift amount.
21038static std::pair<unsigned, unsigned>
21040 std::pair<unsigned, unsigned> Result(0, 0);
21041
21042 // Check for the structure we're looking for.
21043 if (V->getOpcode() != ISD::AND ||
21044 !isa<ConstantSDNode>(V->getOperand(1)) ||
21045 !ISD::isNormalLoad(V->getOperand(0).getNode()))
21046 return Result;
21047
21048 // Check the chain and pointer.
21049 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
21050 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
21051
21052 // This only handles simple types.
21053 if (V.getValueType() != MVT::i16 &&
21054 V.getValueType() != MVT::i32 &&
21055 V.getValueType() != MVT::i64)
21056 return Result;
21057
21058 // Check the constant mask. Invert it so that the bits being masked out are
21059 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
21060 // follow the sign bit for uniformity.
21061 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
21062 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
21063 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
21064 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
21065 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
21066 if (NotMaskLZ == 64) return Result; // All zero mask.
21067
21068 // See if we have a continuous run of bits. If so, we have 0*1+0*
21069 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
21070 return Result;
21071
21072 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
21073 if (V.getValueType() != MVT::i64 && NotMaskLZ)
21074 NotMaskLZ -= 64-V.getValueSizeInBits();
21075
21076 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
21077 switch (MaskedBytes) {
21078 case 1:
21079 case 2:
21080 case 4: break;
21081 default: return Result; // All one mask, or 5-byte mask.
21082 }
21083
21084 // Verify that the first bit starts at a multiple of mask so that the access
21085 // is aligned the same as the access width.
21086 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
21087
21088 // For narrowing to be valid, it must be the case that the load the
21089 // immediately preceding memory operation before the store.
21090 if (LD == Chain.getNode())
21091 ; // ok.
21092 else if (Chain->getOpcode() == ISD::TokenFactor &&
21093 SDValue(LD, 1).hasOneUse()) {
21094 // LD has only 1 chain use so they are no indirect dependencies.
21095 if (!LD->isOperandOf(Chain.getNode()))
21096 return Result;
21097 } else
21098 return Result; // Fail.
21099
21100 Result.first = MaskedBytes;
21101 Result.second = NotMaskTZ/8;
21102 return Result;
21103}
21104
21105/// Check to see if IVal is something that provides a value as specified by
21106/// MaskInfo. If so, replace the specified store with a narrower store of
21107/// truncated IVal.
21108static SDValue
21109ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
21110 SDValue IVal, StoreSDNode *St,
21111 DAGCombiner *DC) {
21112 unsigned NumBytes = MaskInfo.first;
21113 unsigned ByteShift = MaskInfo.second;
21114 SelectionDAG &DAG = DC->getDAG();
21115
21116 // Check to see if IVal is all zeros in the part being masked in by the 'or'
21117 // that uses this. If not, this is not a replacement.
21118 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
21119 ByteShift*8, (ByteShift+NumBytes)*8);
21120 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
21121
21122 // Check that it is legal on the target to do this. It is legal if the new
21123 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
21124 // legalization. If the source type is legal, but the store type isn't, see
21125 // if we can use a truncating store.
21126 MVT VT = MVT::getIntegerVT(NumBytes * 8);
21127 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21128 bool UseTruncStore;
21129 if (DC->isTypeLegal(VT))
21130 UseTruncStore = false;
21131 else if (TLI.isTypeLegal(IVal.getValueType()) &&
21132 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
21133 UseTruncStore = true;
21134 else
21135 return SDValue();
21136
21137 // Can't do this for indexed stores.
21138 if (St->isIndexed())
21139 return SDValue();
21140
21141 // Check that the target doesn't think this is a bad idea.
21142 if (St->getMemOperand() &&
21143 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
21144 *St->getMemOperand()))
21145 return SDValue();
21146
21147 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
21148 // shifted by ByteShift and truncated down to NumBytes.
21149 if (ByteShift) {
21150 SDLoc DL(IVal);
21151 IVal = DAG.getNode(
21152 ISD::SRL, DL, IVal.getValueType(), IVal,
21153 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
21154 }
21155
21156 // Figure out the offset for the store and the alignment of the access.
21157 unsigned StOffset;
21158 if (DAG.getDataLayout().isLittleEndian())
21159 StOffset = ByteShift;
21160 else
21161 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
21162
21163 SDValue Ptr = St->getBasePtr();
21164 if (StOffset) {
21165 SDLoc DL(IVal);
21167 }
21168
21169 ++OpsNarrowed;
21170 if (UseTruncStore)
21171 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
21172 St->getPointerInfo().getWithOffset(StOffset), VT,
21173 St->getBaseAlign());
21174
21175 // Truncate down to the new size.
21176 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
21177
21178 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
21179 St->getPointerInfo().getWithOffset(StOffset),
21180 St->getBaseAlign());
21181}
21182
21183/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
21184/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
21185/// narrowing the load and store if it would end up being a win for performance
21186/// or code size.
21187SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
21188 StoreSDNode *ST = cast<StoreSDNode>(N);
21189 if (!ST->isSimple())
21190 return SDValue();
21191
21192 SDValue Chain = ST->getChain();
21193 SDValue Value = ST->getValue();
21194 SDValue Ptr = ST->getBasePtr();
21195 EVT VT = Value.getValueType();
21196
21197 if (ST->isTruncatingStore() || VT.isVector())
21198 return SDValue();
21199
21200 unsigned Opc = Value.getOpcode();
21201
21202 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
21203 !Value.hasOneUse())
21204 return SDValue();
21205
21206 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21207 // is a byte mask indicating a consecutive number of bytes, check to see if
21208 // Y is known to provide just those bytes. If so, we try to replace the
21209 // load + replace + store sequence with a single (narrower) store, which makes
21210 // the load dead.
21212 std::pair<unsigned, unsigned> MaskedLoad;
21213 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
21214 if (MaskedLoad.first)
21215 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21216 Value.getOperand(1), ST,this))
21217 return NewST;
21218
21219 // Or is commutative, so try swapping X and Y.
21220 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
21221 if (MaskedLoad.first)
21222 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21223 Value.getOperand(0), ST,this))
21224 return NewST;
21225 }
21226
21228 return SDValue();
21229
21230 if (Value.getOperand(1).getOpcode() != ISD::Constant)
21231 return SDValue();
21232
21233 SDValue N0 = Value.getOperand(0);
21234 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
21235 Chain == SDValue(N0.getNode(), 1)) {
21236 LoadSDNode *LD = cast<LoadSDNode>(N0);
21237 if (LD->getBasePtr() != Ptr ||
21238 LD->getPointerInfo().getAddrSpace() !=
21239 ST->getPointerInfo().getAddrSpace())
21240 return SDValue();
21241
21242 // Find the type NewVT to narrow the load / op / store to.
21243 SDValue N1 = Value.getOperand(1);
21244 unsigned BitWidth = N1.getValueSizeInBits();
21245 APInt Imm = N1->getAsAPIntVal();
21246 if (Opc == ISD::AND)
21247 Imm.flipAllBits();
21248 if (Imm == 0 || Imm.isAllOnes())
21249 return SDValue();
21250 // Find least/most significant bit that need to be part of the narrowed
21251 // operation. We assume target will need to address/access full bytes, so
21252 // we make sure to align LSB and MSB at byte boundaries.
21253 unsigned BitsPerByteMask = 7u;
21254 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21255 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
21256 unsigned NewBW = NextPowerOf2(MSB - LSB);
21257 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21258 // The narrowing should be profitable, the load/store operation should be
21259 // legal (or custom) and the store size should be equal to the NewVT width.
21260 while (NewBW < BitWidth &&
21261 (NewVT.getStoreSizeInBits() != NewBW ||
21262 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
21264 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
21265 NewBW = NextPowerOf2(NewBW);
21266 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21267 }
21268 if (NewBW >= BitWidth)
21269 return SDValue();
21270
21271 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21272 // large enough to cover all bits that should be modified. This type might
21273 // however be larger than really needed (such as i32 while we actually only
21274 // need to modify one byte). Now we need to find our how to align the memory
21275 // accesses to satisfy preferred alignments as well as avoiding to access
21276 // memory outside the store size of the orignal access.
21277
21278 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21279
21280 // Let ShAmt denote amount of bits to skip, counted from the least
21281 // significant bits of Imm. And let PtrOff how much the pointer needs to be
21282 // offsetted (in bytes) for the new access.
21283 unsigned ShAmt = 0;
21284 uint64_t PtrOff = 0;
21285 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
21286 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21287 if (ShAmt > LSB)
21288 return SDValue();
21289 if (ShAmt + NewBW < MSB)
21290 continue;
21291
21292 // Calculate PtrOff.
21293 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21294 ? VTStoreSize - NewBW - ShAmt
21295 : ShAmt;
21296 PtrOff = PtrAdjustmentInBits / 8;
21297
21298 // Now check if narrow access is allowed and fast, considering alignments.
21299 unsigned IsFast = 0;
21300 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21301 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
21302 LD->getAddressSpace(), NewAlign,
21303 LD->getMemOperand()->getFlags(), &IsFast) &&
21304 IsFast)
21305 break;
21306 }
21307 // If loop above did not find any accepted ShAmt we need to exit here.
21308 if (ShAmt + NewBW > VTStoreSize)
21309 return SDValue();
21310
21311 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
21312 if (Opc == ISD::AND)
21313 NewImm.flipAllBits();
21314 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21315 SDValue NewPtr =
21316 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
21317 SDValue NewLD =
21318 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
21319 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
21320 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21321 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
21322 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
21323 SDValue NewST =
21324 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
21325 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
21326
21327 AddToWorklist(NewPtr.getNode());
21328 AddToWorklist(NewLD.getNode());
21329 AddToWorklist(NewVal.getNode());
21330 WorklistRemover DeadNodes(*this);
21331 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
21332 ++OpsNarrowed;
21333 return NewST;
21334 }
21335
21336 return SDValue();
21337}
21338
21339/// For a given floating point load / store pair, if the load value isn't used
21340/// by any other operations, then consider transforming the pair to integer
21341/// load / store operations if the target deems the transformation profitable.
21342SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21343 StoreSDNode *ST = cast<StoreSDNode>(N);
21344 SDValue Value = ST->getValue();
21345 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
21346 Value.hasOneUse()) {
21347 LoadSDNode *LD = cast<LoadSDNode>(Value);
21348 EVT VT = LD->getMemoryVT();
21349 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
21350 LD->isNonTemporal() || ST->isNonTemporal() ||
21351 LD->getPointerInfo().getAddrSpace() != 0 ||
21352 ST->getPointerInfo().getAddrSpace() != 0)
21353 return SDValue();
21354
21355 TypeSize VTSize = VT.getSizeInBits();
21356
21357 // We don't know the size of scalable types at compile time so we cannot
21358 // create an integer of the equivalent size.
21359 if (VTSize.isScalable())
21360 return SDValue();
21361
21362 unsigned FastLD = 0, FastST = 0;
21363 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
21364 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
21365 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
21366 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
21367 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
21368 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21369 *LD->getMemOperand(), &FastLD) ||
21370 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21371 *ST->getMemOperand(), &FastST) ||
21372 !FastLD || !FastST)
21373 return SDValue();
21374
21375 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
21376 LD->getBasePtr(), LD->getMemOperand());
21377
21378 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
21379 ST->getBasePtr(), ST->getMemOperand());
21380
21381 AddToWorklist(NewLD.getNode());
21382 AddToWorklist(NewST.getNode());
21383 WorklistRemover DeadNodes(*this);
21384 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
21385 ++LdStFP2Int;
21386 return NewST;
21387 }
21388
21389 return SDValue();
21390}
21391
21392// This is a helper function for visitMUL to check the profitability
21393// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
21394// MulNode is the original multiply, AddNode is (add x, c1),
21395// and ConstNode is c2.
21396//
21397// If the (add x, c1) has multiple uses, we could increase
21398// the number of adds if we make this transformation.
21399// It would only be worth doing this if we can remove a
21400// multiply in the process. Check for that here.
21401// To illustrate:
21402// (A + c1) * c3
21403// (A + c2) * c3
21404// We're checking for cases where we have common "c3 * A" expressions.
21405bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
21406 SDValue ConstNode) {
21407 // If the add only has one use, and the target thinks the folding is
21408 // profitable or does not lead to worse code, this would be OK to do.
21409 if (AddNode->hasOneUse() &&
21410 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
21411 return true;
21412
21413 // Walk all the users of the constant with which we're multiplying.
21414 for (SDNode *User : ConstNode->users()) {
21415 if (User == MulNode) // This use is the one we're on right now. Skip it.
21416 continue;
21417
21418 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
21419 SDNode *OtherOp;
21420 SDNode *MulVar = AddNode.getOperand(0).getNode();
21421
21422 // OtherOp is what we're multiplying against the constant.
21423 if (User->getOperand(0) == ConstNode)
21424 OtherOp = User->getOperand(1).getNode();
21425 else
21426 OtherOp = User->getOperand(0).getNode();
21427
21428 // Check to see if multiply is with the same operand of our "add".
21429 //
21430 // ConstNode = CONST
21431 // User = ConstNode * A <-- visiting User. OtherOp is A.
21432 // ...
21433 // AddNode = (A + c1) <-- MulVar is A.
21434 // = AddNode * ConstNode <-- current visiting instruction.
21435 //
21436 // If we make this transformation, we will have a common
21437 // multiply (ConstNode * A) that we can save.
21438 if (OtherOp == MulVar)
21439 return true;
21440
21441 // Now check to see if a future expansion will give us a common
21442 // multiply.
21443 //
21444 // ConstNode = CONST
21445 // AddNode = (A + c1)
21446 // ... = AddNode * ConstNode <-- current visiting instruction.
21447 // ...
21448 // OtherOp = (A + c2)
21449 // User = OtherOp * ConstNode <-- visiting User.
21450 //
21451 // If we make this transformation, we will have a common
21452 // multiply (CONST * A) after we also do the same transformation
21453 // to the "t2" instruction.
21454 if (OtherOp->getOpcode() == ISD::ADD &&
21456 OtherOp->getOperand(0).getNode() == MulVar)
21457 return true;
21458 }
21459 }
21460
21461 // Didn't find a case where this would be profitable.
21462 return false;
21463}
21464
21465SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
21466 unsigned NumStores) {
21468 SmallPtrSet<const SDNode *, 8> Visited;
21469 SDLoc StoreDL(StoreNodes[0].MemNode);
21470
21471 for (unsigned i = 0; i < NumStores; ++i) {
21472 Visited.insert(StoreNodes[i].MemNode);
21473 }
21474
21475 // don't include nodes that are children or repeated nodes.
21476 for (unsigned i = 0; i < NumStores; ++i) {
21477 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
21478 Chains.push_back(StoreNodes[i].MemNode->getChain());
21479 }
21480
21481 assert(!Chains.empty() && "Chain should have generated a chain");
21482 return DAG.getTokenFactor(StoreDL, Chains);
21483}
21484
21485bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
21486 const Value *UnderlyingObj = nullptr;
21487 for (const auto &MemOp : StoreNodes) {
21488 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
21489 // Pseudo value like stack frame has its own frame index and size, should
21490 // not use the first store's frame index for other frames.
21491 if (MMO->getPseudoValue())
21492 return false;
21493
21494 if (!MMO->getValue())
21495 return false;
21496
21497 const Value *Obj = getUnderlyingObject(MMO->getValue());
21498
21499 if (UnderlyingObj && UnderlyingObj != Obj)
21500 return false;
21501
21502 if (!UnderlyingObj)
21503 UnderlyingObj = Obj;
21504 }
21505
21506 return true;
21507}
21508
21509bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
21510 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
21511 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
21512 // Make sure we have something to merge.
21513 if (NumStores < 2)
21514 return false;
21515
21516 assert((!UseTrunc || !UseVector) &&
21517 "This optimization cannot emit a vector truncating store");
21518
21519 // The latest Node in the DAG.
21520 SDLoc DL(StoreNodes[0].MemNode);
21521
21522 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
21523 unsigned SizeInBits = NumStores * ElementSizeBits;
21524 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21525
21526 std::optional<MachineMemOperand::Flags> Flags;
21527 AAMDNodes AAInfo;
21528 for (unsigned I = 0; I != NumStores; ++I) {
21529 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21530 if (!Flags) {
21531 Flags = St->getMemOperand()->getFlags();
21532 AAInfo = St->getAAInfo();
21533 continue;
21534 }
21535 // Skip merging if there's an inconsistent flag.
21536 if (Flags != St->getMemOperand()->getFlags())
21537 return false;
21538 // Concatenate AA metadata.
21539 AAInfo = AAInfo.concat(St->getAAInfo());
21540 }
21541
21542 EVT StoreTy;
21543 if (UseVector) {
21544 unsigned Elts = NumStores * NumMemElts;
21545 // Get the type for the merged vector store.
21546 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21547 } else
21548 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
21549
21550 SDValue StoredVal;
21551 if (UseVector) {
21552 if (IsConstantSrc) {
21553 SmallVector<SDValue, 8> BuildVector;
21554 for (unsigned I = 0; I != NumStores; ++I) {
21555 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21556 SDValue Val = St->getValue();
21557 // If constant is of the wrong type, convert it now. This comes up
21558 // when one of our stores was truncating.
21559 if (MemVT != Val.getValueType()) {
21560 Val = peekThroughBitcasts(Val);
21561 // Deal with constants of wrong size.
21562 if (ElementSizeBits != Val.getValueSizeInBits()) {
21563 auto *C = dyn_cast<ConstantSDNode>(Val);
21564 if (!C)
21565 // Not clear how to truncate FP values.
21566 // TODO: Handle truncation of build_vector constants
21567 return false;
21568
21569 EVT IntMemVT =
21571 Val = DAG.getConstant(C->getAPIntValue()
21572 .zextOrTrunc(Val.getValueSizeInBits())
21573 .zextOrTrunc(ElementSizeBits),
21574 SDLoc(C), IntMemVT);
21575 }
21576 // Make sure correctly size type is the correct type.
21577 Val = DAG.getBitcast(MemVT, Val);
21578 }
21579 BuildVector.push_back(Val);
21580 }
21581 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21583 DL, StoreTy, BuildVector);
21584 } else {
21586 for (unsigned i = 0; i < NumStores; ++i) {
21587 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21589 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
21590 // type MemVT. If the underlying value is not the correct
21591 // type, but it is an extraction of an appropriate vector we
21592 // can recast Val to be of the correct type. This may require
21593 // converting between EXTRACT_VECTOR_ELT and
21594 // EXTRACT_SUBVECTOR.
21595 if ((MemVT != Val.getValueType()) &&
21598 EVT MemVTScalarTy = MemVT.getScalarType();
21599 // We may need to add a bitcast here to get types to line up.
21600 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
21601 Val = DAG.getBitcast(MemVT, Val);
21602 } else if (MemVT.isVector() &&
21604 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
21605 } else {
21606 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
21608 SDValue Vec = Val.getOperand(0);
21609 SDValue Idx = Val.getOperand(1);
21610 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
21611 }
21612 }
21613 Ops.push_back(Val);
21614 }
21615
21616 // Build the extracted vector elements back into a vector.
21617 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21619 DL, StoreTy, Ops);
21620 }
21621 } else {
21622 // We should always use a vector store when merging extracted vector
21623 // elements, so this path implies a store of constants.
21624 assert(IsConstantSrc && "Merged vector elements should use vector store");
21625
21626 APInt StoreInt(SizeInBits, 0);
21627
21628 // Construct a single integer constant which is made of the smaller
21629 // constant inputs.
21630 bool IsLE = DAG.getDataLayout().isLittleEndian();
21631 for (unsigned i = 0; i < NumStores; ++i) {
21632 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
21633 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
21634
21635 SDValue Val = St->getValue();
21636 Val = peekThroughBitcasts(Val);
21637 StoreInt <<= ElementSizeBits;
21638 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
21639 StoreInt |= C->getAPIntValue()
21640 .zextOrTrunc(ElementSizeBits)
21641 .zextOrTrunc(SizeInBits);
21642 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
21643 StoreInt |= C->getValueAPF()
21644 .bitcastToAPInt()
21645 .zextOrTrunc(ElementSizeBits)
21646 .zextOrTrunc(SizeInBits);
21647 // If fp truncation is necessary give up for now.
21648 if (MemVT.getSizeInBits() != ElementSizeBits)
21649 return false;
21650 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
21652 // Not yet handled
21653 return false;
21654 } else {
21655 llvm_unreachable("Invalid constant element type");
21656 }
21657 }
21658
21659 // Create the new Load and Store operations.
21660 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
21661 }
21662
21663 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21664 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
21665 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21666
21667 // make sure we use trunc store if it's necessary to be legal.
21668 // When generate the new widen store, if the first store's pointer info can
21669 // not be reused, discard the pointer info except the address space because
21670 // now the widen store can not be represented by the original pointer info
21671 // which is for the narrow memory object.
21672 SDValue NewStore;
21673 if (!UseTrunc) {
21674 NewStore = DAG.getStore(
21675 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
21676 CanReusePtrInfo
21677 ? FirstInChain->getPointerInfo()
21678 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21679 FirstInChain->getAlign(), *Flags, AAInfo);
21680 } else { // Must be realized as a trunc store
21681 EVT LegalizedStoredValTy =
21682 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
21683 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
21684 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
21685 SDValue ExtendedStoreVal =
21686 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
21687 LegalizedStoredValTy);
21688 NewStore = DAG.getTruncStore(
21689 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
21690 CanReusePtrInfo
21691 ? FirstInChain->getPointerInfo()
21692 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21693 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
21694 AAInfo);
21695 }
21696
21697 // Replace all merged stores with the new store.
21698 for (unsigned i = 0; i < NumStores; ++i)
21699 CombineTo(StoreNodes[i].MemNode, NewStore);
21700
21701 AddToWorklist(NewChain.getNode());
21702 return true;
21703}
21704
21705SDNode *
21706DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
21707 SmallVectorImpl<MemOpLink> &StoreNodes) {
21708 // This holds the base pointer, index, and the offset in bytes from the base
21709 // pointer. We must have a base and an offset. Do not handle stores to undef
21710 // base pointers.
21711 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21712 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
21713 return nullptr;
21714
21716 StoreSource StoreSrc = getStoreSource(Val);
21717 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
21718
21719 // Match on loadbaseptr if relevant.
21720 EVT MemVT = St->getMemoryVT();
21721 BaseIndexOffset LBasePtr;
21722 EVT LoadVT;
21723 if (StoreSrc == StoreSource::Load) {
21724 auto *Ld = cast<LoadSDNode>(Val);
21725 LBasePtr = BaseIndexOffset::match(Ld, DAG);
21726 LoadVT = Ld->getMemoryVT();
21727 // Load and store should be the same type.
21728 if (MemVT != LoadVT)
21729 return nullptr;
21730 // Loads must only have one use.
21731 if (!Ld->hasNUsesOfValue(1, 0))
21732 return nullptr;
21733 // The memory operands must not be volatile/indexed/atomic.
21734 // TODO: May be able to relax for unordered atomics (see D66309)
21735 if (!Ld->isSimple() || Ld->isIndexed())
21736 return nullptr;
21737 }
21738 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
21739 int64_t &Offset) -> bool {
21740 // The memory operands must not be volatile/indexed/atomic.
21741 // TODO: May be able to relax for unordered atomics (see D66309)
21742 if (!Other->isSimple() || Other->isIndexed())
21743 return false;
21744 // Don't mix temporal stores with non-temporal stores.
21745 if (St->isNonTemporal() != Other->isNonTemporal())
21746 return false;
21748 return false;
21749 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
21750 // Allow merging constants of different types as integers.
21751 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
21752 : Other->getMemoryVT() != MemVT;
21753 switch (StoreSrc) {
21754 case StoreSource::Load: {
21755 if (NoTypeMatch)
21756 return false;
21757 // The Load's Base Ptr must also match.
21758 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
21759 if (!OtherLd)
21760 return false;
21761 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
21762 if (LoadVT != OtherLd->getMemoryVT())
21763 return false;
21764 // Loads must only have one use.
21765 if (!OtherLd->hasNUsesOfValue(1, 0))
21766 return false;
21767 // The memory operands must not be volatile/indexed/atomic.
21768 // TODO: May be able to relax for unordered atomics (see D66309)
21769 if (!OtherLd->isSimple() || OtherLd->isIndexed())
21770 return false;
21771 // Don't mix temporal loads with non-temporal loads.
21772 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
21773 return false;
21775 *OtherLd))
21776 return false;
21777 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
21778 return false;
21779 break;
21780 }
21781 case StoreSource::Constant:
21782 if (NoTypeMatch)
21783 return false;
21784 if (getStoreSource(OtherBC) != StoreSource::Constant)
21785 return false;
21786 break;
21787 case StoreSource::Extract:
21788 // Do not merge truncated stores here.
21789 if (Other->isTruncatingStore())
21790 return false;
21791 if (!MemVT.bitsEq(OtherBC.getValueType()))
21792 return false;
21793 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
21794 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21795 return false;
21796 break;
21797 default:
21798 llvm_unreachable("Unhandled store source for merging");
21799 }
21801 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
21802 };
21803
21804 // We are looking for a root node which is an ancestor to all mergable
21805 // stores. We search up through a load, to our root and then down
21806 // through all children. For instance we will find Store{1,2,3} if
21807 // St is Store1, Store2. or Store3 where the root is not a load
21808 // which always true for nonvolatile ops. TODO: Expand
21809 // the search to find all valid candidates through multiple layers of loads.
21810 //
21811 // Root
21812 // |-------|-------|
21813 // Load Load Store3
21814 // | |
21815 // Store1 Store2
21816 //
21817 // FIXME: We should be able to climb and
21818 // descend TokenFactors to find candidates as well.
21819
21820 SDNode *RootNode = St->getChain().getNode();
21821 // Bail out if we already analyzed this root node and found nothing.
21822 if (ChainsWithoutMergeableStores.contains(RootNode))
21823 return nullptr;
21824
21825 // Check if the pair of StoreNode and the RootNode already bail out many
21826 // times which is over the limit in dependence check.
21827 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
21828 SDNode *RootNode) -> bool {
21829 auto RootCount = StoreRootCountMap.find(StoreNode);
21830 return RootCount != StoreRootCountMap.end() &&
21831 RootCount->second.first == RootNode &&
21832 RootCount->second.second > StoreMergeDependenceLimit;
21833 };
21834
21835 auto TryToAddCandidate = [&](SDUse &Use) {
21836 // This must be a chain use.
21837 if (Use.getOperandNo() != 0)
21838 return;
21839 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
21840 BaseIndexOffset Ptr;
21841 int64_t PtrDiff;
21842 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
21843 !OverLimitInDependenceCheck(OtherStore, RootNode))
21844 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
21845 }
21846 };
21847
21848 unsigned NumNodesExplored = 0;
21849 const unsigned MaxSearchNodes = 1024;
21850 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
21851 RootNode = Ldn->getChain().getNode();
21852 // Bail out if we already analyzed this root node and found nothing.
21853 if (ChainsWithoutMergeableStores.contains(RootNode))
21854 return nullptr;
21855 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21856 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
21857 SDNode *User = I->getUser();
21858 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
21859 for (SDUse &U2 : User->uses())
21860 TryToAddCandidate(U2);
21861 }
21862 // Check stores that depend on the root (e.g. Store 3 in the chart above).
21863 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
21864 TryToAddCandidate(*I);
21865 }
21866 }
21867 } else {
21868 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
21869 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
21870 TryToAddCandidate(*I);
21871 }
21872
21873 return RootNode;
21874}
21875
21876// We need to check that merging these stores does not cause a loop in the
21877// DAG. Any store candidate may depend on another candidate indirectly through
21878// its operands. Check in parallel by searching up from operands of candidates.
21879bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
21880 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
21881 SDNode *RootNode) {
21882 // FIXME: We should be able to truncate a full search of
21883 // predecessors by doing a BFS and keeping tabs the originating
21884 // stores from which worklist nodes come from in a similar way to
21885 // TokenFactor simplfication.
21886
21887 SmallPtrSet<const SDNode *, 32> Visited;
21889
21890 // RootNode is a predecessor to all candidates so we need not search
21891 // past it. Add RootNode (peeking through TokenFactors). Do not count
21892 // these towards size check.
21893
21894 Worklist.push_back(RootNode);
21895 while (!Worklist.empty()) {
21896 auto N = Worklist.pop_back_val();
21897 if (!Visited.insert(N).second)
21898 continue; // Already present in Visited.
21899 if (N->getOpcode() == ISD::TokenFactor) {
21900 for (SDValue Op : N->ops())
21901 Worklist.push_back(Op.getNode());
21902 }
21903 }
21904
21905 // Don't count pruning nodes towards max.
21906 unsigned int Max = 1024 + Visited.size();
21907 // Search Ops of store candidates.
21908 for (unsigned i = 0; i < NumStores; ++i) {
21909 SDNode *N = StoreNodes[i].MemNode;
21910 // Of the 4 Store Operands:
21911 // * Chain (Op 0) -> We have already considered these
21912 // in candidate selection, but only by following the
21913 // chain dependencies. We could still have a chain
21914 // dependency to a load, that has a non-chain dep to
21915 // another load, that depends on a store, etc. So it is
21916 // possible to have dependencies that consist of a mix
21917 // of chain and non-chain deps, and we need to include
21918 // chain operands in the analysis here..
21919 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
21920 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
21921 // but aren't necessarily fromt the same base node, so
21922 // cycles possible (e.g. via indexed store).
21923 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
21924 // non-indexed stores). Not constant on all targets (e.g. ARM)
21925 // and so can participate in a cycle.
21926 for (const SDValue &Op : N->op_values())
21927 Worklist.push_back(Op.getNode());
21928 }
21929 // Search through DAG. We can stop early if we find a store node.
21930 for (unsigned i = 0; i < NumStores; ++i)
21931 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
21932 Max)) {
21933 // If the searching bail out, record the StoreNode and RootNode in the
21934 // StoreRootCountMap. If we have seen the pair many times over a limit,
21935 // we won't add the StoreNode into StoreNodes set again.
21936 if (Visited.size() >= Max) {
21937 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
21938 if (RootCount.first == RootNode)
21939 RootCount.second++;
21940 else
21941 RootCount = {RootNode, 1};
21942 }
21943 return false;
21944 }
21945 return true;
21946}
21947
21948bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
21949 SmallPtrSet<const SDNode *, 32> Visited;
21951 Worklist.emplace_back(St->getChain().getNode(), false);
21952
21953 while (!Worklist.empty()) {
21954 auto [Node, FoundCall] = Worklist.pop_back_val();
21955 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
21956 continue;
21957
21958 switch (Node->getOpcode()) {
21959 case ISD::CALLSEQ_END:
21960 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
21961 break;
21962 case ISD::TokenFactor:
21963 for (SDValue Op : Node->ops())
21964 Worklist.emplace_back(Op.getNode(), FoundCall);
21965 break;
21966 case ISD::LOAD:
21967 if (Node == Ld)
21968 return FoundCall;
21969 [[fallthrough]];
21970 default:
21971 assert(Node->getOperand(0).getValueType() == MVT::Other &&
21972 "Invalid chain type");
21973 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
21974 break;
21975 }
21976 }
21977 return false;
21978}
21979
21980unsigned
21981DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
21982 int64_t ElementSizeBytes) const {
21983 while (true) {
21984 // Find a store past the width of the first store.
21985 size_t StartIdx = 0;
21986 while ((StartIdx + 1 < StoreNodes.size()) &&
21987 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
21988 StoreNodes[StartIdx + 1].OffsetFromBase)
21989 ++StartIdx;
21990
21991 // Bail if we don't have enough candidates to merge.
21992 if (StartIdx + 1 >= StoreNodes.size())
21993 return 0;
21994
21995 // Trim stores that overlapped with the first store.
21996 if (StartIdx)
21997 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
21998
21999 // Scan the memory operations on the chain and find the first
22000 // non-consecutive store memory address.
22001 unsigned NumConsecutiveStores = 1;
22002 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
22003 // Check that the addresses are consecutive starting from the second
22004 // element in the list of stores.
22005 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
22006 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
22007 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22008 break;
22009 NumConsecutiveStores = i + 1;
22010 }
22011 if (NumConsecutiveStores > 1)
22012 return NumConsecutiveStores;
22013
22014 // There are no consecutive stores at the start of the list.
22015 // Remove the first store and try again.
22016 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
22017 }
22018}
22019
22020bool DAGCombiner::tryStoreMergeOfConstants(
22021 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22022 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
22023 LLVMContext &Context = *DAG.getContext();
22024 const DataLayout &DL = DAG.getDataLayout();
22025 int64_t ElementSizeBytes = MemVT.getStoreSize();
22026 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22027 bool MadeChange = false;
22028
22029 // Store the constants into memory as one consecutive store.
22030 while (NumConsecutiveStores >= 2) {
22031 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22032 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22033 Align FirstStoreAlign = FirstInChain->getAlign();
22034 unsigned LastLegalType = 1;
22035 unsigned LastLegalVectorType = 1;
22036 bool LastIntegerTrunc = false;
22037 bool NonZero = false;
22038 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
22039 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22040 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
22041 SDValue StoredVal = ST->getValue();
22042 bool IsElementZero = false;
22043 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
22044 IsElementZero = C->isZero();
22045 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
22046 IsElementZero = C->getConstantFPValue()->isNullValue();
22047 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
22048 IsElementZero = true;
22049 if (IsElementZero) {
22050 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
22051 FirstZeroAfterNonZero = i;
22052 }
22053 NonZero |= !IsElementZero;
22054
22055 // Find a legal type for the constant store.
22056 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22057 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22058 unsigned IsFast = 0;
22059
22060 // Break early when size is too large to be legal.
22061 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22062 break;
22063
22064 if (TLI.isTypeLegal(StoreTy) &&
22065 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22066 DAG.getMachineFunction()) &&
22067 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22068 *FirstInChain->getMemOperand(), &IsFast) &&
22069 IsFast) {
22070 LastIntegerTrunc = false;
22071 LastLegalType = i + 1;
22072 // Or check whether a truncstore is legal.
22073 } else if (TLI.getTypeAction(Context, StoreTy) ==
22075 EVT LegalizedStoredValTy =
22076 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
22077 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22078 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22079 DAG.getMachineFunction()) &&
22080 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22081 *FirstInChain->getMemOperand(), &IsFast) &&
22082 IsFast) {
22083 LastIntegerTrunc = true;
22084 LastLegalType = i + 1;
22085 }
22086 }
22087
22088 // We only use vectors if the target allows it and the function is not
22089 // marked with the noimplicitfloat attribute.
22090 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
22091 AllowVectors) {
22092 // Find a legal type for the vector store.
22093 unsigned Elts = (i + 1) * NumMemElts;
22094 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22095 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
22096 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22097 TLI.allowsMemoryAccess(Context, DL, Ty,
22098 *FirstInChain->getMemOperand(), &IsFast) &&
22099 IsFast)
22100 LastLegalVectorType = i + 1;
22101 }
22102 }
22103
22104 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
22105 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
22106 bool UseTrunc = LastIntegerTrunc && !UseVector;
22107
22108 // Check if we found a legal integer type that creates a meaningful
22109 // merge.
22110 if (NumElem < 2) {
22111 // We know that candidate stores are in order and of correct
22112 // shape. While there is no mergeable sequence from the
22113 // beginning one may start later in the sequence. The only
22114 // reason a merge of size N could have failed where another of
22115 // the same size would not have, is if the alignment has
22116 // improved or we've dropped a non-zero value. Drop as many
22117 // candidates as we can here.
22118 unsigned NumSkip = 1;
22119 while ((NumSkip < NumConsecutiveStores) &&
22120 (NumSkip < FirstZeroAfterNonZero) &&
22121 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22122 NumSkip++;
22123
22124 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22125 NumConsecutiveStores -= NumSkip;
22126 continue;
22127 }
22128
22129 // Check that we can merge these candidates without causing a cycle.
22130 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22131 RootNode)) {
22132 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22133 NumConsecutiveStores -= NumElem;
22134 continue;
22135 }
22136
22137 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
22138 /*IsConstantSrc*/ true,
22139 UseVector, UseTrunc);
22140
22141 // Remove merged stores for next iteration.
22142 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22143 NumConsecutiveStores -= NumElem;
22144 }
22145 return MadeChange;
22146}
22147
22148bool DAGCombiner::tryStoreMergeOfExtracts(
22149 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22150 EVT MemVT, SDNode *RootNode) {
22151 LLVMContext &Context = *DAG.getContext();
22152 const DataLayout &DL = DAG.getDataLayout();
22153 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22154 bool MadeChange = false;
22155
22156 // Loop on Consecutive Stores on success.
22157 while (NumConsecutiveStores >= 2) {
22158 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22159 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22160 Align FirstStoreAlign = FirstInChain->getAlign();
22161 unsigned NumStoresToMerge = 1;
22162 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22163 // Find a legal type for the vector store.
22164 unsigned Elts = (i + 1) * NumMemElts;
22165 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
22166 unsigned IsFast = 0;
22167
22168 // Break early when size is too large to be legal.
22169 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
22170 break;
22171
22172 if (TLI.isTypeLegal(Ty) &&
22173 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22174 TLI.allowsMemoryAccess(Context, DL, Ty,
22175 *FirstInChain->getMemOperand(), &IsFast) &&
22176 IsFast)
22177 NumStoresToMerge = i + 1;
22178 }
22179
22180 // Check if we found a legal integer type creating a meaningful
22181 // merge.
22182 if (NumStoresToMerge < 2) {
22183 // We know that candidate stores are in order and of correct
22184 // shape. While there is no mergeable sequence from the
22185 // beginning one may start later in the sequence. The only
22186 // reason a merge of size N could have failed where another of
22187 // the same size would not have, is if the alignment has
22188 // improved. Drop as many candidates as we can here.
22189 unsigned NumSkip = 1;
22190 while ((NumSkip < NumConsecutiveStores) &&
22191 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22192 NumSkip++;
22193
22194 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22195 NumConsecutiveStores -= NumSkip;
22196 continue;
22197 }
22198
22199 // Check that we can merge these candidates without causing a cycle.
22200 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
22201 RootNode)) {
22202 StoreNodes.erase(StoreNodes.begin(),
22203 StoreNodes.begin() + NumStoresToMerge);
22204 NumConsecutiveStores -= NumStoresToMerge;
22205 continue;
22206 }
22207
22208 MadeChange |= mergeStoresOfConstantsOrVecElts(
22209 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
22210 /*UseVector*/ true, /*UseTrunc*/ false);
22211
22212 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
22213 NumConsecutiveStores -= NumStoresToMerge;
22214 }
22215 return MadeChange;
22216}
22217
22218bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22219 unsigned NumConsecutiveStores, EVT MemVT,
22220 SDNode *RootNode, bool AllowVectors,
22221 bool IsNonTemporalStore,
22222 bool IsNonTemporalLoad) {
22223 LLVMContext &Context = *DAG.getContext();
22224 const DataLayout &DL = DAG.getDataLayout();
22225 int64_t ElementSizeBytes = MemVT.getStoreSize();
22226 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22227 bool MadeChange = false;
22228
22229 // Look for load nodes which are used by the stored values.
22230 SmallVector<MemOpLink, 8> LoadNodes;
22231
22232 // Find acceptable loads. Loads need to have the same chain (token factor),
22233 // must not be zext, volatile, indexed, and they must be consecutive.
22234 BaseIndexOffset LdBasePtr;
22235
22236 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22237 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22239 LoadSDNode *Ld = cast<LoadSDNode>(Val);
22240
22241 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
22242 // If this is not the first ptr that we check.
22243 int64_t LdOffset = 0;
22244 if (LdBasePtr.getBase().getNode()) {
22245 // The base ptr must be the same.
22246 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
22247 break;
22248 } else {
22249 // Check that all other base pointers are the same as this one.
22250 LdBasePtr = LdPtr;
22251 }
22252
22253 // We found a potential memory operand to merge.
22254 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
22255 }
22256
22257 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
22258 Align RequiredAlignment;
22259 bool NeedRotate = false;
22260 if (LoadNodes.size() == 2) {
22261 // If we have load/store pair instructions and we only have two values,
22262 // don't bother merging.
22263 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22264 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
22265 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
22266 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
22267 break;
22268 }
22269 // If the loads are reversed, see if we can rotate the halves into place.
22270 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
22271 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
22272 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
22273 if (Offset0 - Offset1 == ElementSizeBytes &&
22274 (hasOperation(ISD::ROTL, PairVT) ||
22275 hasOperation(ISD::ROTR, PairVT))) {
22276 std::swap(LoadNodes[0], LoadNodes[1]);
22277 NeedRotate = true;
22278 }
22279 }
22280 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22281 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22282 Align FirstStoreAlign = FirstInChain->getAlign();
22283 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
22284
22285 // Scan the memory operations on the chain and find the first
22286 // non-consecutive load memory address. These variables hold the index in
22287 // the store node array.
22288
22289 unsigned LastConsecutiveLoad = 1;
22290
22291 // This variable refers to the size and not index in the array.
22292 unsigned LastLegalVectorType = 1;
22293 unsigned LastLegalIntegerType = 1;
22294 bool isDereferenceable = true;
22295 bool DoIntegerTruncate = false;
22296 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
22297 SDValue LoadChain = FirstLoad->getChain();
22298 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
22299 // All loads must share the same chain.
22300 if (LoadNodes[i].MemNode->getChain() != LoadChain)
22301 break;
22302
22303 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
22304 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22305 break;
22306 LastConsecutiveLoad = i;
22307
22308 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
22309 isDereferenceable = false;
22310
22311 // Find a legal type for the vector store.
22312 unsigned Elts = (i + 1) * NumMemElts;
22313 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22314
22315 // Break early when size is too large to be legal.
22316 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22317 break;
22318
22319 unsigned IsFastSt = 0;
22320 unsigned IsFastLd = 0;
22321 // Don't try vector types if we need a rotate. We may still fail the
22322 // legality checks for the integer type, but we can't handle the rotate
22323 // case with vectors.
22324 // FIXME: We could use a shuffle in place of the rotate.
22325 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
22326 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22327 DAG.getMachineFunction()) &&
22328 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22329 *FirstInChain->getMemOperand(), &IsFastSt) &&
22330 IsFastSt &&
22331 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22332 *FirstLoad->getMemOperand(), &IsFastLd) &&
22333 IsFastLd) {
22334 LastLegalVectorType = i + 1;
22335 }
22336
22337 // Find a legal type for the integer store.
22338 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22339 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22340 if (TLI.isTypeLegal(StoreTy) &&
22341 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22342 DAG.getMachineFunction()) &&
22343 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22344 *FirstInChain->getMemOperand(), &IsFastSt) &&
22345 IsFastSt &&
22346 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22347 *FirstLoad->getMemOperand(), &IsFastLd) &&
22348 IsFastLd) {
22349 LastLegalIntegerType = i + 1;
22350 DoIntegerTruncate = false;
22351 // Or check whether a truncstore and extload is legal.
22352 } else if (TLI.getTypeAction(Context, StoreTy) ==
22354 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
22355 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22356 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22357 DAG.getMachineFunction()) &&
22358 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22359 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22360 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
22361 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22362 *FirstInChain->getMemOperand(), &IsFastSt) &&
22363 IsFastSt &&
22364 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22365 *FirstLoad->getMemOperand(), &IsFastLd) &&
22366 IsFastLd) {
22367 LastLegalIntegerType = i + 1;
22368 DoIntegerTruncate = true;
22369 }
22370 }
22371 }
22372
22373 // Only use vector types if the vector type is larger than the integer
22374 // type. If they are the same, use integers.
22375 bool UseVectorTy =
22376 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22377 unsigned LastLegalType =
22378 std::max(LastLegalVectorType, LastLegalIntegerType);
22379
22380 // We add +1 here because the LastXXX variables refer to location while
22381 // the NumElem refers to array/index size.
22382 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
22383 NumElem = std::min(LastLegalType, NumElem);
22384 Align FirstLoadAlign = FirstLoad->getAlign();
22385
22386 if (NumElem < 2) {
22387 // We know that candidate stores are in order and of correct
22388 // shape. While there is no mergeable sequence from the
22389 // beginning one may start later in the sequence. The only
22390 // reason a merge of size N could have failed where another of
22391 // the same size would not have is if the alignment or either
22392 // the load or store has improved. Drop as many candidates as we
22393 // can here.
22394 unsigned NumSkip = 1;
22395 while ((NumSkip < LoadNodes.size()) &&
22396 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22397 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22398 NumSkip++;
22399 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22400 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
22401 NumConsecutiveStores -= NumSkip;
22402 continue;
22403 }
22404
22405 // Check that we can merge these candidates without causing a cycle.
22406 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22407 RootNode)) {
22408 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22409 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22410 NumConsecutiveStores -= NumElem;
22411 continue;
22412 }
22413
22414 // Find if it is better to use vectors or integers to load and store
22415 // to memory.
22416 EVT JointMemOpVT;
22417 if (UseVectorTy) {
22418 // Find a legal type for the vector store.
22419 unsigned Elts = NumElem * NumMemElts;
22420 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22421 } else {
22422 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
22423 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
22424 }
22425
22426 // Check if there is a call in the load/store chain.
22427 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
22428 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
22429 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
22430 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22431 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22432 NumConsecutiveStores -= NumElem;
22433 continue;
22434 }
22435
22436 SDLoc LoadDL(LoadNodes[0].MemNode);
22437 SDLoc StoreDL(StoreNodes[0].MemNode);
22438
22439 // The merged loads are required to have the same incoming chain, so
22440 // using the first's chain is acceptable.
22441
22442 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
22443 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22444 AddToWorklist(NewStoreChain.getNode());
22445
22446 MachineMemOperand::Flags LdMMOFlags =
22447 isDereferenceable ? MachineMemOperand::MODereferenceable
22449 if (IsNonTemporalLoad)
22451
22452 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
22453
22454 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
22457
22458 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
22459
22460 SDValue NewLoad, NewStore;
22461 if (UseVectorTy || !DoIntegerTruncate) {
22462 NewLoad = DAG.getLoad(
22463 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
22464 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
22465 SDValue StoreOp = NewLoad;
22466 if (NeedRotate) {
22467 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
22468 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
22469 "Unexpected type for rotate-able load pair");
22470 SDValue RotAmt =
22471 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
22472 // Target can convert to the identical ROTR if it does not have ROTL.
22473 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
22474 }
22475 NewStore = DAG.getStore(
22476 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
22477 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22478 : MachinePointerInfo(FirstStoreAS),
22479 FirstStoreAlign, StMMOFlags);
22480 } else { // This must be the truncstore/extload case
22481 EVT ExtendedTy =
22482 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
22483 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
22484 FirstLoad->getChain(), FirstLoad->getBasePtr(),
22485 FirstLoad->getPointerInfo(), JointMemOpVT,
22486 FirstLoadAlign, LdMMOFlags);
22487 NewStore = DAG.getTruncStore(
22488 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
22489 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22490 : MachinePointerInfo(FirstStoreAS),
22491 JointMemOpVT, FirstInChain->getAlign(),
22492 FirstInChain->getMemOperand()->getFlags());
22493 }
22494
22495 // Transfer chain users from old loads to the new load.
22496 for (unsigned i = 0; i < NumElem; ++i) {
22497 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
22499 SDValue(NewLoad.getNode(), 1));
22500 }
22501
22502 // Replace all stores with the new store. Recursively remove corresponding
22503 // values if they are no longer used.
22504 for (unsigned i = 0; i < NumElem; ++i) {
22505 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
22506 CombineTo(StoreNodes[i].MemNode, NewStore);
22507 if (Val->use_empty())
22508 recursivelyDeleteUnusedNodes(Val.getNode());
22509 }
22510
22511 MadeChange = true;
22512 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22513 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22514 NumConsecutiveStores -= NumElem;
22515 }
22516 return MadeChange;
22517}
22518
22519bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
22520 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
22521 return false;
22522
22523 // TODO: Extend this function to merge stores of scalable vectors.
22524 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
22525 // store since we know <vscale x 16 x i8> is exactly twice as large as
22526 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
22527 EVT MemVT = St->getMemoryVT();
22528 if (MemVT.isScalableVT())
22529 return false;
22530 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
22531 return false;
22532
22533 // This function cannot currently deal with non-byte-sized memory sizes.
22534 int64_t ElementSizeBytes = MemVT.getStoreSize();
22535 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
22536 return false;
22537
22538 // Do not bother looking at stored values that are not constants, loads, or
22539 // extracted vector elements.
22540 SDValue StoredVal = peekThroughBitcasts(St->getValue());
22541 const StoreSource StoreSrc = getStoreSource(StoredVal);
22542 if (StoreSrc == StoreSource::Unknown)
22543 return false;
22544
22545 SmallVector<MemOpLink, 8> StoreNodes;
22546 // Find potential store merge candidates by searching through chain sub-DAG
22547 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
22548
22549 // Check if there is anything to merge.
22550 if (StoreNodes.size() < 2)
22551 return false;
22552
22553 // Sort the memory operands according to their distance from the
22554 // base pointer.
22555 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
22556 return LHS.OffsetFromBase < RHS.OffsetFromBase;
22557 });
22558
22559 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
22560 Attribute::NoImplicitFloat);
22561 bool IsNonTemporalStore = St->isNonTemporal();
22562 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
22563 cast<LoadSDNode>(StoredVal)->isNonTemporal();
22564
22565 // Store Merge attempts to merge the lowest stores. This generally
22566 // works out as if successful, as the remaining stores are checked
22567 // after the first collection of stores is merged. However, in the
22568 // case that a non-mergeable store is found first, e.g., {p[-2],
22569 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
22570 // mergeable cases. To prevent this, we prune such stores from the
22571 // front of StoreNodes here.
22572 bool MadeChange = false;
22573 while (StoreNodes.size() > 1) {
22574 unsigned NumConsecutiveStores =
22575 getConsecutiveStores(StoreNodes, ElementSizeBytes);
22576 // There are no more stores in the list to examine.
22577 if (NumConsecutiveStores == 0)
22578 return MadeChange;
22579
22580 // We have at least 2 consecutive stores. Try to merge them.
22581 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
22582 switch (StoreSrc) {
22583 case StoreSource::Constant:
22584 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
22585 MemVT, RootNode, AllowVectors);
22586 break;
22587
22588 case StoreSource::Extract:
22589 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
22590 MemVT, RootNode);
22591 break;
22592
22593 case StoreSource::Load:
22594 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
22595 MemVT, RootNode, AllowVectors,
22596 IsNonTemporalStore, IsNonTemporalLoad);
22597 break;
22598
22599 default:
22600 llvm_unreachable("Unhandled store source type");
22601 }
22602 }
22603
22604 // Remember if we failed to optimize, to save compile time.
22605 if (!MadeChange)
22606 ChainsWithoutMergeableStores.insert(RootNode);
22607
22608 return MadeChange;
22609}
22610
22611SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
22612 SDLoc SL(ST);
22613 SDValue ReplStore;
22614
22615 // Replace the chain to avoid dependency.
22616 if (ST->isTruncatingStore()) {
22617 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
22618 ST->getBasePtr(), ST->getMemoryVT(),
22619 ST->getMemOperand());
22620 } else {
22621 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
22622 ST->getMemOperand());
22623 }
22624
22625 // Create token to keep both nodes around.
22626 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
22627 MVT::Other, ST->getChain(), ReplStore);
22628
22629 // Make sure the new and old chains are cleaned up.
22630 AddToWorklist(Token.getNode());
22631
22632 // Don't add users to work list.
22633 return CombineTo(ST, Token, false);
22634}
22635
22636SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
22637 SDValue Value = ST->getValue();
22638 if (Value.getOpcode() == ISD::TargetConstantFP)
22639 return SDValue();
22640
22641 if (!ISD::isNormalStore(ST))
22642 return SDValue();
22643
22644 SDLoc DL(ST);
22645
22646 SDValue Chain = ST->getChain();
22647 SDValue Ptr = ST->getBasePtr();
22648
22649 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
22650
22651 // NOTE: If the original store is volatile, this transform must not increase
22652 // the number of stores. For example, on x86-32 an f64 can be stored in one
22653 // processor operation but an i64 (which is not legal) requires two. So the
22654 // transform should not be done in this case.
22655
22656 SDValue Tmp;
22657 switch (CFP->getSimpleValueType(0).SimpleTy) {
22658 default:
22659 llvm_unreachable("Unknown FP type");
22660 case MVT::f16: // We don't do this for these yet.
22661 case MVT::bf16:
22662 case MVT::f80:
22663 case MVT::f128:
22664 case MVT::ppcf128:
22665 return SDValue();
22666 case MVT::f32:
22667 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
22668 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
22669 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
22670 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
22671 MVT::i32);
22672 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
22673 }
22674
22675 return SDValue();
22676 case MVT::f64:
22677 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
22678 ST->isSimple()) ||
22679 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
22680 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
22681 getZExtValue(), SDLoc(CFP), MVT::i64);
22682 return DAG.getStore(Chain, DL, Tmp,
22683 Ptr, ST->getMemOperand());
22684 }
22685
22686 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
22687 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
22688 // Many FP stores are not made apparent until after legalize, e.g. for
22689 // argument passing. Since this is so common, custom legalize the
22690 // 64-bit integer store into two 32-bit stores.
22691 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
22692 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
22693 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
22694 if (DAG.getDataLayout().isBigEndian())
22695 std::swap(Lo, Hi);
22696
22697 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22698 AAMDNodes AAInfo = ST->getAAInfo();
22699
22700 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22701 ST->getBaseAlign(), MMOFlags, AAInfo);
22703 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
22704 ST->getPointerInfo().getWithOffset(4),
22705 ST->getBaseAlign(), MMOFlags, AAInfo);
22706 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
22707 St0, St1);
22708 }
22709
22710 return SDValue();
22711 }
22712}
22713
22714// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
22715//
22716// If a store of a load with an element inserted into it has no other
22717// uses in between the chain, then we can consider the vector store
22718// dead and replace it with just the single scalar element store.
22719SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
22720 SDLoc DL(ST);
22721 SDValue Value = ST->getValue();
22722 SDValue Ptr = ST->getBasePtr();
22723 SDValue Chain = ST->getChain();
22724 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
22725 return SDValue();
22726
22727 SDValue Elt = Value.getOperand(1);
22728 SDValue Idx = Value.getOperand(2);
22729
22730 // If the element isn't byte sized or is implicitly truncated then we can't
22731 // compute an offset.
22732 EVT EltVT = Elt.getValueType();
22733 if (!EltVT.isByteSized() ||
22734 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
22735 return SDValue();
22736
22737 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
22738 if (!Ld || Ld->getBasePtr() != Ptr ||
22739 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
22740 !ISD::isNormalStore(ST) ||
22741 Ld->getAddressSpace() != ST->getAddressSpace() ||
22743 return SDValue();
22744
22745 unsigned IsFast;
22746 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22747 Elt.getValueType(), ST->getAddressSpace(),
22748 ST->getAlign(), ST->getMemOperand()->getFlags(),
22749 &IsFast) ||
22750 !IsFast)
22751 return SDValue();
22752
22753 MachinePointerInfo PointerInfo(ST->getAddressSpace());
22754
22755 // If the offset is a known constant then try to recover the pointer
22756 // info
22757 SDValue NewPtr;
22758 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
22759 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
22760 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
22761 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
22762 } else {
22763 // The original DAG loaded the entire vector from memory, so arithmetic
22764 // within it must be inbounds.
22765 NewPtr = TLI.getInboundsVectorElementPointer(DAG, Ptr, Value.getValueType(),
22766 Idx);
22767 }
22768
22769 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
22770 ST->getMemOperand()->getFlags());
22771}
22772
22773SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
22774 AtomicSDNode *ST = cast<AtomicSDNode>(N);
22775 SDValue Val = ST->getVal();
22776 EVT VT = Val.getValueType();
22777 EVT MemVT = ST->getMemoryVT();
22778
22779 if (MemVT.bitsLT(VT)) { // Is truncating store
22780 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
22781 MemVT.getScalarSizeInBits());
22782 // See if we can simplify the operation with SimplifyDemandedBits, which
22783 // only works if the value has a single use.
22784 if (SimplifyDemandedBits(Val, TruncDemandedBits))
22785 return SDValue(N, 0);
22786 }
22787
22788 return SDValue();
22789}
22790
22792 const SDLoc &Dl) {
22793 if (!Store->isSimple() || !ISD::isNormalStore(Store))
22794 return SDValue();
22795
22796 SDValue StoredVal = Store->getValue();
22797 SDValue StorePtr = Store->getBasePtr();
22798 SDValue StoreOffset = Store->getOffset();
22799 EVT VT = Store->getMemoryVT();
22800
22801 // Skip this combine for non-vector types and for <1 x ty> vectors, as they
22802 // will be scalarized later.
22803 if (!VT.isVector() || VT.isScalableVector() || VT.getVectorNumElements() == 1)
22804 return SDValue();
22805
22806 unsigned AddrSpace = Store->getAddressSpace();
22807 Align Alignment = Store->getAlign();
22808 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22809
22810 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
22811 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
22812 return SDValue();
22813
22814 SDValue Mask, OtherVec, LoadCh;
22815 unsigned LoadPos;
22816 if (sd_match(StoredVal,
22817 m_VSelect(m_Value(Mask), m_Value(OtherVec),
22818 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22819 m_Specific(StoreOffset))))) {
22820 LoadPos = 2;
22821 } else if (sd_match(StoredVal,
22822 m_VSelect(m_Value(Mask),
22823 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22824 m_Specific(StoreOffset)),
22825 m_Value(OtherVec)))) {
22826 LoadPos = 1;
22827 } else {
22828 return SDValue();
22829 }
22830
22831 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
22832 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
22833 Load->getAddressSpace() != AddrSpace)
22834 return SDValue();
22835
22836 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
22837 return SDValue();
22838
22839 if (LoadPos == 1)
22840 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
22841
22842 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
22843 StoreOffset, Mask, VT, Store->getMemOperand(),
22844 Store->getAddressingMode());
22845}
22846
22847SDValue DAGCombiner::visitSTORE(SDNode *N) {
22848 StoreSDNode *ST = cast<StoreSDNode>(N);
22849 SDValue Chain = ST->getChain();
22850 SDValue Value = ST->getValue();
22851 SDValue Ptr = ST->getBasePtr();
22852
22853 // If this is a store of a bit convert, store the input value if the
22854 // resultant store does not need a higher alignment than the original.
22855 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
22856 ST->isUnindexed()) {
22857 EVT SVT = Value.getOperand(0).getValueType();
22858 // If the store is volatile, we only want to change the store type if the
22859 // resulting store is legal. Otherwise we might increase the number of
22860 // memory accesses. We don't care if the original type was legal or not
22861 // as we assume software couldn't rely on the number of accesses of an
22862 // illegal type.
22863 // TODO: May be able to relax for unordered atomics (see D66309)
22864 if (((!LegalOperations && ST->isSimple()) ||
22865 TLI.isOperationLegal(ISD::STORE, SVT)) &&
22866 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
22867 DAG, *ST->getMemOperand())) {
22868 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22869 ST->getMemOperand());
22870 }
22871 }
22872
22873 // Turn 'store undef, Ptr' -> nothing.
22874 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
22875 return Chain;
22876
22877 // Try to infer better alignment information than the store already has.
22878 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
22879 !ST->isAtomic()) {
22880 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
22881 if (*Alignment > ST->getAlign() &&
22882 isAligned(*Alignment, ST->getSrcValueOffset())) {
22883 SDValue NewStore =
22884 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
22885 ST->getMemoryVT(), *Alignment,
22886 ST->getMemOperand()->getFlags(), ST->getAAInfo());
22887 // NewStore will always be N as we are only refining the alignment
22888 assert(NewStore.getNode() == N);
22889 (void)NewStore;
22890 }
22891 }
22892 }
22893
22894 // Try transforming a pair floating point load / store ops to integer
22895 // load / store ops.
22896 if (SDValue NewST = TransformFPLoadStorePair(N))
22897 return NewST;
22898
22899 // Try transforming several stores into STORE (BSWAP).
22900 if (SDValue Store = mergeTruncStores(ST))
22901 return Store;
22902
22903 if (ST->isUnindexed()) {
22904 // Walk up chain skipping non-aliasing memory nodes, on this store and any
22905 // adjacent stores.
22906 if (findBetterNeighborChains(ST)) {
22907 // replaceStoreChain uses CombineTo, which handled all of the worklist
22908 // manipulation. Return the original node to not do anything else.
22909 return SDValue(ST, 0);
22910 }
22911 Chain = ST->getChain();
22912 }
22913
22914 // FIXME: is there such a thing as a truncating indexed store?
22915 if (ST->isTruncatingStore() && ST->isUnindexed() &&
22916 Value.getValueType().isInteger() &&
22918 !cast<ConstantSDNode>(Value)->isOpaque())) {
22919 // Convert a truncating store of a extension into a standard store.
22920 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
22921 Value.getOpcode() == ISD::SIGN_EXTEND ||
22922 Value.getOpcode() == ISD::ANY_EXTEND) &&
22923 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
22924 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
22925 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
22926 ST->getMemOperand());
22927
22928 APInt TruncDemandedBits =
22929 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
22930 ST->getMemoryVT().getScalarSizeInBits());
22931
22932 // See if we can simplify the operation with SimplifyDemandedBits, which
22933 // only works if the value has a single use.
22934 AddToWorklist(Value.getNode());
22935 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
22936 // Re-visit the store if anything changed and the store hasn't been merged
22937 // with another node (N is deleted) SimplifyDemandedBits will add Value's
22938 // node back to the worklist if necessary, but we also need to re-visit
22939 // the Store node itself.
22940 if (N->getOpcode() != ISD::DELETED_NODE)
22941 AddToWorklist(N);
22942 return SDValue(N, 0);
22943 }
22944
22945 // Otherwise, see if we can simplify the input to this truncstore with
22946 // knowledge that only the low bits are being used. For example:
22947 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
22948 if (SDValue Shorter =
22949 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
22950 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
22951 ST->getMemOperand());
22952
22953 // If we're storing a truncated constant, see if we can simplify it.
22954 // TODO: Move this to targetShrinkDemandedConstant?
22955 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
22956 if (!Cst->isOpaque()) {
22957 const APInt &CValue = Cst->getAPIntValue();
22958 APInt NewVal = CValue & TruncDemandedBits;
22959 if (NewVal != CValue) {
22960 SDValue Shorter =
22961 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
22962 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
22963 ST->getMemoryVT(), ST->getMemOperand());
22964 }
22965 }
22966 }
22967
22968 // If this is a load followed by a store to the same location, then the store
22969 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
22970 // TODO: Add big-endian truncate support with test coverage.
22971 // TODO: Can relax for unordered atomics (see D66309)
22972 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
22974 : Value;
22975 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
22976 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
22977 ST->isUnindexed() && ST->isSimple() &&
22978 Ld->getAddressSpace() == ST->getAddressSpace() &&
22979 // There can't be any side effects between the load and store, such as
22980 // a call or store.
22982 // The store is dead, remove it.
22983 return Chain;
22984 }
22985 }
22986
22987 // Try scalarizing vector stores of loads where we only change one element
22988 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
22989 return NewST;
22990
22991 // TODO: Can relax for unordered atomics (see D66309)
22992 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
22993 if (ST->isUnindexed() && ST->isSimple() &&
22994 ST1->isUnindexed() && ST1->isSimple()) {
22995 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
22996 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
22997 ST->getAddressSpace() == ST1->getAddressSpace()) {
22998 // If this is a store followed by a store with the same value to the
22999 // same location, then the store is dead/noop.
23000 return Chain;
23001 }
23002
23003 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
23004 !ST1->getBasePtr().isUndef() &&
23005 ST->getAddressSpace() == ST1->getAddressSpace()) {
23006 // If we consider two stores and one smaller in size is a scalable
23007 // vector type and another one a bigger size store with a fixed type,
23008 // then we could not allow the scalable store removal because we don't
23009 // know its final size in the end.
23010 if (ST->getMemoryVT().isScalableVector() ||
23011 ST1->getMemoryVT().isScalableVector()) {
23012 if (ST1->getBasePtr() == Ptr &&
23013 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
23014 ST->getMemoryVT().getStoreSize())) {
23015 CombineTo(ST1, ST1->getChain());
23016 return SDValue(N, 0);
23017 }
23018 } else {
23019 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
23020 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
23021 // If this is a store who's preceding store to a subset of the current
23022 // location and no one other node is chained to that store we can
23023 // effectively drop the store. Do not remove stores to undef as they
23024 // may be used as data sinks.
23025 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
23026 ChainBase,
23027 ST1->getMemoryVT().getFixedSizeInBits())) {
23028 CombineTo(ST1, ST1->getChain());
23029 return SDValue(N, 0);
23030 }
23031 }
23032 }
23033 }
23034 }
23035
23036 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
23037 // truncating store. We can do this even if this is already a truncstore.
23038 if ((Value.getOpcode() == ISD::FP_ROUND ||
23039 Value.getOpcode() == ISD::TRUNCATE) &&
23040 Value->hasOneUse() && ST->isUnindexed() &&
23041 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
23042 ST->getMemoryVT(), LegalOperations)) {
23043 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
23044 Ptr, ST->getMemoryVT(), ST->getMemOperand());
23045 }
23046
23047 // Always perform this optimization before types are legal. If the target
23048 // prefers, also try this after legalization to catch stores that were created
23049 // by intrinsics or other nodes.
23050 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
23051 while (true) {
23052 // There can be multiple store sequences on the same chain.
23053 // Keep trying to merge store sequences until we are unable to do so
23054 // or until we merge the last store on the chain.
23055 bool Changed = mergeConsecutiveStores(ST);
23056 if (!Changed) break;
23057 // Return N as merge only uses CombineTo and no worklist clean
23058 // up is necessary.
23059 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
23060 return SDValue(N, 0);
23061 }
23062 }
23063
23064 // Try transforming N to an indexed store.
23065 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
23066 return SDValue(N, 0);
23067
23068 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
23069 //
23070 // Make sure to do this only after attempting to merge stores in order to
23071 // avoid changing the types of some subset of stores due to visit order,
23072 // preventing their merging.
23073 if (isa<ConstantFPSDNode>(ST->getValue())) {
23074 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
23075 return NewSt;
23076 }
23077
23078 if (SDValue NewSt = splitMergedValStore(ST))
23079 return NewSt;
23080
23081 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
23082 return MaskedStore;
23083
23084 return ReduceLoadOpStoreWidth(N);
23085}
23086
23087SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
23088 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
23089 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
23090
23091 // We walk up the chains to find stores.
23092 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
23093 while (!Chains.empty()) {
23094 SDValue Chain = Chains.pop_back_val();
23095 if (!Chain.hasOneUse())
23096 continue;
23097 switch (Chain.getOpcode()) {
23098 case ISD::TokenFactor:
23099 for (unsigned Nops = Chain.getNumOperands(); Nops;)
23100 Chains.push_back(Chain.getOperand(--Nops));
23101 break;
23102 case ISD::LIFETIME_START:
23103 case ISD::LIFETIME_END:
23104 // We can forward past any lifetime start/end that can be proven not to
23105 // alias the node.
23106 if (!mayAlias(Chain.getNode(), N))
23107 Chains.push_back(Chain.getOperand(0));
23108 break;
23109 case ISD::STORE: {
23110 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
23111 // TODO: Can relax for unordered atomics (see D66309)
23112 if (!ST->isSimple() || ST->isIndexed())
23113 continue;
23114 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
23115 // The bounds of a scalable store are not known until runtime, so this
23116 // store cannot be elided.
23117 if (StoreSize.isScalable())
23118 continue;
23119 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
23120 // If we store purely within object bounds just before its lifetime ends,
23121 // we can remove the store.
23122 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
23123 if (LifetimeEndBase.contains(
23124 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
23125 StoreBase, StoreSize.getFixedValue() * 8)) {
23126 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
23127 dbgs() << "\nwithin LIFETIME_END of : ";
23128 LifetimeEndBase.dump(); dbgs() << "\n");
23129 CombineTo(ST, ST->getChain());
23130 return SDValue(N, 0);
23131 }
23132 }
23133 }
23134 }
23135 return SDValue();
23136}
23137
23138/// For the instruction sequence of store below, F and I values
23139/// are bundled together as an i64 value before being stored into memory.
23140/// Sometimes it is more efficent to generate separate stores for F and I,
23141/// which can remove the bitwise instructions or sink them to colder places.
23142///
23143/// (store (or (zext (bitcast F to i32) to i64),
23144/// (shl (zext I to i64), 32)), addr) -->
23145/// (store F, addr) and (store I, addr+4)
23146///
23147/// Similarly, splitting for other merged store can also be beneficial, like:
23148/// For pair of {i32, i32}, i64 store --> two i32 stores.
23149/// For pair of {i32, i16}, i64 store --> two i32 stores.
23150/// For pair of {i16, i16}, i32 store --> two i16 stores.
23151/// For pair of {i16, i8}, i32 store --> two i16 stores.
23152/// For pair of {i8, i8}, i16 store --> two i8 stores.
23153///
23154/// We allow each target to determine specifically which kind of splitting is
23155/// supported.
23156///
23157/// The store patterns are commonly seen from the simple code snippet below
23158/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
23159/// void goo(const std::pair<int, float> &);
23160/// hoo() {
23161/// ...
23162/// goo(std::make_pair(tmp, ftmp));
23163/// ...
23164/// }
23165///
23166SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
23167 if (OptLevel == CodeGenOptLevel::None)
23168 return SDValue();
23169
23170 // Can't change the number of memory accesses for a volatile store or break
23171 // atomicity for an atomic one.
23172 if (!ST->isSimple())
23173 return SDValue();
23174
23175 SDValue Val = ST->getValue();
23176 SDLoc DL(ST);
23177
23178 // Match OR operand.
23179 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
23180 return SDValue();
23181
23182 // Match SHL operand and get Lower and Higher parts of Val.
23183 SDValue Op1 = Val.getOperand(0);
23184 SDValue Op2 = Val.getOperand(1);
23185 SDValue Lo, Hi;
23186 if (Op1.getOpcode() != ISD::SHL) {
23187 std::swap(Op1, Op2);
23188 if (Op1.getOpcode() != ISD::SHL)
23189 return SDValue();
23190 }
23191 Lo = Op2;
23192 Hi = Op1.getOperand(0);
23193 if (!Op1.hasOneUse())
23194 return SDValue();
23195
23196 // Match shift amount to HalfValBitSize.
23197 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
23198 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
23199 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
23200 return SDValue();
23201
23202 // Lo and Hi are zero-extended from int with size less equal than 32
23203 // to i64.
23204 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
23205 !Lo.getOperand(0).getValueType().isScalarInteger() ||
23206 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
23207 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
23208 !Hi.getOperand(0).getValueType().isScalarInteger() ||
23209 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
23210 return SDValue();
23211
23212 // Use the EVT of low and high parts before bitcast as the input
23213 // of target query.
23214 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
23215 ? Lo.getOperand(0).getValueType()
23216 : Lo.getValueType();
23217 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
23218 ? Hi.getOperand(0).getValueType()
23219 : Hi.getValueType();
23220 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
23221 return SDValue();
23222
23223 // Start to split store.
23224 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23225 AAMDNodes AAInfo = ST->getAAInfo();
23226
23227 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
23228 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
23229 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
23230 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
23231
23232 SDValue Chain = ST->getChain();
23233 SDValue Ptr = ST->getBasePtr();
23234 // Lower value store.
23235 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23236 ST->getBaseAlign(), MMOFlags, AAInfo);
23237 Ptr =
23238 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
23239 // Higher value store.
23240 SDValue St1 = DAG.getStore(
23241 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
23242 ST->getBaseAlign(), MMOFlags, AAInfo);
23243 return St1;
23244}
23245
23246// Merge an insertion into an existing shuffle:
23247// (insert_vector_elt (vector_shuffle X, Y, Mask),
23248// .(extract_vector_elt X, N), InsIndex)
23249// --> (vector_shuffle X, Y, NewMask)
23250// and variations where shuffle operands may be CONCAT_VECTORS.
23252 SmallVectorImpl<int> &NewMask, SDValue Elt,
23253 unsigned InsIndex) {
23254 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23256 return false;
23257
23258 // Vec's operand 0 is using indices from 0 to N-1 and
23259 // operand 1 from N to 2N - 1, where N is the number of
23260 // elements in the vectors.
23261 SDValue InsertVal0 = Elt.getOperand(0);
23262 int ElementOffset = -1;
23263
23264 // We explore the inputs of the shuffle in order to see if we find the
23265 // source of the extract_vector_elt. If so, we can use it to modify the
23266 // shuffle rather than perform an insert_vector_elt.
23268 ArgWorkList.emplace_back(Mask.size(), Y);
23269 ArgWorkList.emplace_back(0, X);
23270
23271 while (!ArgWorkList.empty()) {
23272 int ArgOffset;
23273 SDValue ArgVal;
23274 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
23275
23276 if (ArgVal == InsertVal0) {
23277 ElementOffset = ArgOffset;
23278 break;
23279 }
23280
23281 // Peek through concat_vector.
23282 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23283 int CurrentArgOffset =
23284 ArgOffset + ArgVal.getValueType().getVectorNumElements();
23285 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
23286 for (SDValue Op : reverse(ArgVal->ops())) {
23287 CurrentArgOffset -= Step;
23288 ArgWorkList.emplace_back(CurrentArgOffset, Op);
23289 }
23290
23291 // Make sure we went through all the elements and did not screw up index
23292 // computation.
23293 assert(CurrentArgOffset == ArgOffset);
23294 }
23295 }
23296
23297 // If we failed to find a match, see if we can replace an UNDEF shuffle
23298 // operand.
23299 if (ElementOffset == -1) {
23300 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
23301 return false;
23302 ElementOffset = Mask.size();
23303 Y = InsertVal0;
23304 }
23305
23306 NewMask.assign(Mask.begin(), Mask.end());
23307 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
23308 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
23309 "NewMask[InsIndex] is out of bound");
23310 return true;
23311}
23312
23313// Merge an insertion into an existing shuffle:
23314// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23315// InsIndex)
23316// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23317// CONCAT_VECTORS.
23318SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
23319 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23320 "Expected extract_vector_elt");
23321 SDValue InsertVal = N->getOperand(1);
23322 SDValue Vec = N->getOperand(0);
23323
23324 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
23325 if (!SVN || !Vec.hasOneUse())
23326 return SDValue();
23327
23328 ArrayRef<int> Mask = SVN->getMask();
23329 SDValue X = Vec.getOperand(0);
23330 SDValue Y = Vec.getOperand(1);
23331
23332 SmallVector<int, 16> NewMask(Mask);
23333 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
23334 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23335 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
23336 if (LegalShuffle)
23337 return LegalShuffle;
23338 }
23339
23340 return SDValue();
23341}
23342
23343// Convert a disguised subvector insertion into a shuffle:
23344// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23345// bitcast(shuffle (bitcast V), (extended X), Mask)
23346// Note: We do not use an insert_subvector node because that requires a
23347// legal subvector type.
23348SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
23349 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23350 "Expected extract_vector_elt");
23351 SDValue InsertVal = N->getOperand(1);
23352
23353 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
23354 !InsertVal.getOperand(0).getValueType().isVector())
23355 return SDValue();
23356
23357 SDValue SubVec = InsertVal.getOperand(0);
23358 SDValue DestVec = N->getOperand(0);
23359 EVT SubVecVT = SubVec.getValueType();
23360 EVT VT = DestVec.getValueType();
23361 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23362 // If the source only has a single vector element, the cost of creating adding
23363 // it to a vector is likely to exceed the cost of a insert_vector_elt.
23364 if (NumSrcElts == 1)
23365 return SDValue();
23366 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23367 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23368
23369 // Step 1: Create a shuffle mask that implements this insert operation. The
23370 // vector that we are inserting into will be operand 0 of the shuffle, so
23371 // those elements are just 'i'. The inserted subvector is in the first
23372 // positions of operand 1 of the shuffle. Example:
23373 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23374 SmallVector<int, 16> Mask(NumMaskVals);
23375 for (unsigned i = 0; i != NumMaskVals; ++i) {
23376 if (i / NumSrcElts == InsIndex)
23377 Mask[i] = (i % NumSrcElts) + NumMaskVals;
23378 else
23379 Mask[i] = i;
23380 }
23381
23382 // Bail out if the target can not handle the shuffle we want to create.
23383 EVT SubVecEltVT = SubVecVT.getVectorElementType();
23384 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
23385 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23386 return SDValue();
23387
23388 // Step 2: Create a wide vector from the inserted source vector by appending
23389 // undefined elements. This is the same size as our destination vector.
23390 SDLoc DL(N);
23391 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
23392 ConcatOps[0] = SubVec;
23393 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
23394
23395 // Step 3: Shuffle in the padded subvector.
23396 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
23397 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
23398 AddToWorklist(PaddedSubV.getNode());
23399 AddToWorklist(DestVecBC.getNode());
23400 AddToWorklist(Shuf.getNode());
23401 return DAG.getBitcast(VT, Shuf);
23402}
23403
23404// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
23405// possible and the new load will be quick. We use more loads but less shuffles
23406// and inserts.
23407SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
23408 EVT VT = N->getValueType(0);
23409
23410 // InsIndex is expected to be the first of last lane.
23411 if (!VT.isFixedLengthVector() ||
23412 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
23413 return SDValue();
23414
23415 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
23416 // depending on the InsIndex.
23417 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
23418 SDValue Scalar = N->getOperand(1);
23419 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
23420 return InsIndex == P.index() || P.value() < 0 ||
23421 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
23422 (InsIndex == VT.getVectorNumElements() - 1 &&
23423 P.value() == (int)P.index() + 1);
23424 }))
23425 return SDValue();
23426
23427 // We optionally skip over an extend so long as both loads are extended in the
23428 // same way from the same type.
23429 unsigned Extend = 0;
23430 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
23431 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
23432 Scalar.getOpcode() == ISD::ANY_EXTEND) {
23433 Extend = Scalar.getOpcode();
23434 Scalar = Scalar.getOperand(0);
23435 }
23436
23437 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
23438 if (!ScalarLoad)
23439 return SDValue();
23440
23441 SDValue Vec = Shuffle->getOperand(0);
23442 if (Extend) {
23443 if (Vec.getOpcode() != Extend)
23444 return SDValue();
23445 Vec = Vec.getOperand(0);
23446 }
23447 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
23448 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
23449 return SDValue();
23450
23451 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
23452 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
23453 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23454 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23455 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
23456 return SDValue();
23457
23458 // Check that the offset between the pointers to produce a single continuous
23459 // load.
23460 if (InsIndex == 0) {
23461 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
23462 -1))
23463 return SDValue();
23464 } else {
23466 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
23467 return SDValue();
23468 }
23469
23470 // And that the new unaligned load will be fast.
23471 unsigned IsFast = 0;
23472 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
23473 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23474 Vec.getValueType(), VecLoad->getAddressSpace(),
23475 NewAlign, VecLoad->getMemOperand()->getFlags(),
23476 &IsFast) ||
23477 !IsFast)
23478 return SDValue();
23479
23480 // Calculate the new Ptr and create the new load.
23481 SDLoc DL(N);
23482 SDValue Ptr = ScalarLoad->getBasePtr();
23483 if (InsIndex != 0)
23484 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
23485 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
23486 MachinePointerInfo PtrInfo =
23487 InsIndex == 0 ? ScalarLoad->getPointerInfo()
23488 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
23489
23490 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
23491 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
23492 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
23493 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
23494 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
23495}
23496
23497SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23498 SDValue InVec = N->getOperand(0);
23499 SDValue InVal = N->getOperand(1);
23500 SDValue EltNo = N->getOperand(2);
23501 SDLoc DL(N);
23502
23503 EVT VT = InVec.getValueType();
23504 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
23505
23506 // Insert into out-of-bounds element is undefined.
23507 if (IndexC && VT.isFixedLengthVector() &&
23508 IndexC->getZExtValue() >= VT.getVectorNumElements())
23509 return DAG.getUNDEF(VT);
23510
23511 // Remove redundant insertions:
23512 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
23513 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23514 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
23515 return InVec;
23516
23517 // Remove insert of UNDEF/POISON elements.
23518 if (InVal.isUndef()) {
23519 if (InVal.getOpcode() == ISD::POISON || InVec.getOpcode() == ISD::UNDEF)
23520 return InVec;
23521 return DAG.getFreeze(InVec);
23522 }
23523
23524 if (!IndexC) {
23525 // If this is variable insert to undef vector, it might be better to splat:
23526 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23527 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23528 return DAG.getSplat(VT, DL, InVal);
23529
23530 // Extend this type to be byte-addressable
23531 EVT OldVT = VT;
23532 EVT EltVT = VT.getVectorElementType();
23533 bool IsByteSized = EltVT.isByteSized();
23534 if (!IsByteSized) {
23535 EltVT =
23537 VT = VT.changeElementType(EltVT);
23538 }
23539
23540 // Check if this operation will be handled the default way for its type.
23541 auto IsTypeDefaultHandled = [this](EVT VT) {
23542 return TLI.getTypeAction(*DAG.getContext(), VT) ==
23545 };
23546
23547 // Check if this operation is illegal and will be handled the default way,
23548 // even after extending the type to be byte-addressable.
23549 if (IsTypeDefaultHandled(OldVT) && IsTypeDefaultHandled(VT)) {
23550 // For each dynamic insertelt, the default way will save the vector to
23551 // the stack, store at an offset, and load the modified vector. This can
23552 // dramatically increase code size if we have a chain of insertelts on a
23553 // large vector: requiring O(V*C) stores/loads where V = length of
23554 // vector and C is length of chain. If each insertelt is only fed into the
23555 // next, the vector is write-only across this chain, and we can just
23556 // save once before the chain and load after in O(V + C) operations.
23558 unsigned NumDynamic = 1;
23559 while (true) {
23560 SDValue InVec = Seq.back()->getOperand(0);
23561 if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
23562 break;
23563 Seq.push_back(InVec.getNode());
23564 NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
23565 }
23566
23567 // It always and only makes sense to lower this sequence when we have more
23568 // than one dynamic insertelt, since we will not have more than V constant
23569 // insertelts, so we will be reducing the total number of stores+loads.
23570 if (NumDynamic > 1) {
23571 // In cases where the vector is illegal it will be broken down into
23572 // parts and stored in parts - we should use the alignment for the
23573 // smallest part.
23574 Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
23576 DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
23577 auto &MF = DAG.getMachineFunction();
23578 int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
23579 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
23580
23581 // Save the vector to the stack
23582 SDValue InVec = Seq.back()->getOperand(0);
23583 if (!IsByteSized)
23584 InVec = DAG.getNode(ISD::ANY_EXTEND, DL, VT, InVec);
23585 SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
23586 PtrInfo, SmallestAlign);
23587
23588 // Lower each dynamic insertelt to a store
23589 for (SDNode *N : reverse(Seq)) {
23590 SDValue Elmnt = N->getOperand(1);
23591 SDValue Index = N->getOperand(2);
23592
23593 // Check if we have to extend the element type
23594 if (!IsByteSized && Elmnt.getValueType().bitsLT(EltVT))
23595 Elmnt = DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Elmnt);
23596
23597 // Store the new element. This may be larger than the vector element
23598 // type, so use a truncating store.
23599 SDValue EltPtr =
23600 TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
23601 EVT EltVT = Elmnt.getValueType();
23602 Store = DAG.getTruncStore(
23603 Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
23604 EltVT,
23605 commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
23606 }
23607
23608 // Load the saved vector from the stack
23609 SDValue Load =
23610 DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
23611 SDValue LoadV = Load.getValue(0);
23612 return IsByteSized ? LoadV : DAG.getAnyExtOrTrunc(LoadV, DL, OldVT);
23613 }
23614 }
23615
23616 return SDValue();
23617 }
23618
23619 if (VT.isScalableVector())
23620 return SDValue();
23621
23622 unsigned NumElts = VT.getVectorNumElements();
23623
23624 // We must know which element is being inserted for folds below here.
23625 unsigned Elt = IndexC->getZExtValue();
23626
23627 // Handle <1 x ???> vector insertion special cases.
23628 if (NumElts == 1) {
23629 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
23630 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23631 InVal.getOperand(0).getValueType() == VT &&
23632 isNullConstant(InVal.getOperand(1)))
23633 return InVal.getOperand(0);
23634 }
23635
23636 // Canonicalize insert_vector_elt dag nodes.
23637 // Example:
23638 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
23639 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
23640 //
23641 // Do this only if the child insert_vector node has one use; also
23642 // do this only if indices are both constants and Idx1 < Idx0.
23643 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
23644 && isa<ConstantSDNode>(InVec.getOperand(2))) {
23645 unsigned OtherElt = InVec.getConstantOperandVal(2);
23646 if (Elt < OtherElt) {
23647 // Swap nodes.
23648 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
23649 InVec.getOperand(0), InVal, EltNo);
23650 AddToWorklist(NewOp.getNode());
23651 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
23652 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
23653 }
23654 }
23655
23656 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
23657 return Shuf;
23658
23659 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
23660 return Shuf;
23661
23662 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
23663 return Shuf;
23664
23665 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
23666 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
23667 // vXi1 vector - we don't need to recurse.
23668 if (NumElts == 1)
23669 return DAG.getBuildVector(VT, DL, {InVal});
23670
23671 // If we haven't already collected the element, insert into the op list.
23672 EVT MaxEltVT = InVal.getValueType();
23673 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
23674 unsigned Idx) {
23675 if (!Ops[Idx]) {
23676 Ops[Idx] = Elt;
23677 if (VT.isInteger()) {
23678 EVT EltVT = Elt.getValueType();
23679 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
23680 }
23681 }
23682 };
23683
23684 // Ensure all the operands are the same value type, fill any missing
23685 // operands with UNDEF and create the BUILD_VECTOR.
23686 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
23687 bool FreezeUndef = false) {
23688 assert(Ops.size() == NumElts && "Unexpected vector size");
23689 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
23690 : DAG.getUNDEF(MaxEltVT);
23691 for (SDValue &Op : Ops) {
23692 if (Op)
23693 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
23694 else
23695 Op = UndefOp;
23696 }
23697 return DAG.getBuildVector(VT, DL, Ops);
23698 };
23699
23701 Ops[Elt] = InVal;
23702
23703 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
23704 for (SDValue CurVec = InVec; CurVec;) {
23705 // UNDEF - build new BUILD_VECTOR from already inserted operands.
23706 if (CurVec.isUndef())
23707 return CanonicalizeBuildVector(Ops);
23708
23709 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
23710 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
23711 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
23712
23713 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
23714 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
23715 for (unsigned I = 0; I != NumElts; ++I)
23716 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
23717 return CanonicalizeBuildVector(Ops);
23718 }
23719
23720 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
23721 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
23722 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
23723 return CanonicalizeBuildVector(Ops);
23724 }
23725
23726 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
23727 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
23728 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
23729 if (CurIdx->getAPIntValue().ult(NumElts)) {
23730 unsigned Idx = CurIdx->getZExtValue();
23731 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
23732
23733 // Found entire BUILD_VECTOR.
23734 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
23735 return CanonicalizeBuildVector(Ops);
23736
23737 CurVec = CurVec->getOperand(0);
23738 continue;
23739 }
23740
23741 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
23742 // update the shuffle mask (and second operand if we started with unary
23743 // shuffle) and create a new legal shuffle.
23744 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
23745 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
23746 SDValue LHS = SVN->getOperand(0);
23747 SDValue RHS = SVN->getOperand(1);
23748 SmallVector<int, 16> Mask(SVN->getMask());
23749 bool Merged = true;
23750 for (auto I : enumerate(Ops)) {
23751 SDValue &Op = I.value();
23752 if (Op) {
23753 SmallVector<int, 16> NewMask;
23754 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
23755 Merged = false;
23756 break;
23757 }
23758 Mask = std::move(NewMask);
23759 }
23760 }
23761 if (Merged)
23762 if (SDValue NewShuffle =
23763 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
23764 return NewShuffle;
23765 }
23766
23767 if (!LegalOperations) {
23768 bool IsNull = llvm::isNullConstant(InVal);
23769 // We can convert to AND/OR mask if all insertions are zero or -1
23770 // respectively.
23771 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23772 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23773 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23774 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23775 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23777
23778 // Build the mask and return the corresponding DAG node.
23779 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23780 unsigned MaskOpcode) {
23781 for (unsigned I = 0; I != NumElts; ++I)
23782 Mask[I] = Ops[I] ? TrueVal : FalseVal;
23783 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23784 DAG.getBuildVector(VT, DL, Mask));
23785 };
23786
23787 // If all elements are zero, we can use AND with all ones.
23788 if (IsNull)
23789 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
23790
23791 // If all elements are -1, we can use OR with zero.
23792 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
23793 }
23794 }
23795
23796 // Failed to find a match in the chain - bail.
23797 break;
23798 }
23799
23800 // See if we can fill in the missing constant elements as zeros.
23801 // TODO: Should we do this for any constant?
23802 APInt DemandedZeroElts = APInt::getZero(NumElts);
23803 for (unsigned I = 0; I != NumElts; ++I)
23804 if (!Ops[I])
23805 DemandedZeroElts.setBit(I);
23806
23807 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
23808 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
23809 : DAG.getConstantFP(0, DL, MaxEltVT);
23810 for (unsigned I = 0; I != NumElts; ++I)
23811 if (!Ops[I])
23812 Ops[I] = Zero;
23813
23814 return CanonicalizeBuildVector(Ops);
23815 }
23816 }
23817
23818 return SDValue();
23819}
23820
23821/// Transform a vector binary operation into a scalar binary operation by moving
23822/// the math/logic after an extract element of a vector.
23824 const SDLoc &DL, bool LegalTypes) {
23825 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23826 SDValue Vec = ExtElt->getOperand(0);
23827 SDValue Index = ExtElt->getOperand(1);
23828 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
23829 unsigned Opc = Vec.getOpcode();
23830 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
23831 Vec->getNumValues() != 1)
23832 return SDValue();
23833
23834 // Targets may want to avoid this to prevent an expensive register transfer.
23835 if (!TLI.shouldScalarizeBinop(Vec))
23836 return SDValue();
23837
23838 EVT ResVT = ExtElt->getValueType(0);
23839 if (Opc == ISD::SETCC &&
23840 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
23841 return SDValue();
23842
23843 // Extracting an element of a vector constant is constant-folded, so this
23844 // transform is just replacing a vector op with a scalar op while moving the
23845 // extract.
23846 SDValue Op0 = Vec.getOperand(0);
23847 SDValue Op1 = Vec.getOperand(1);
23848 APInt SplatVal;
23849 if (!isAnyConstantBuildVector(Op0, true) &&
23850 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
23851 !isAnyConstantBuildVector(Op1, true) &&
23852 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
23853 return SDValue();
23854
23855 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
23856 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
23857 if (Opc == ISD::SETCC) {
23858 EVT OpVT = Op0.getValueType().getVectorElementType();
23859 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
23860 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
23861 SDValue NewVal = DAG.getSetCC(
23862 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
23863 // We may need to sign- or zero-extend the result to match the same
23864 // behaviour as the vector version of SETCC.
23865 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
23866 if (ResVT != MVT::i1 &&
23867 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
23868 VecBoolContents != TLI.getBooleanContents(ResVT)) {
23870 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
23871 DAG.getValueType(MVT::i1));
23872 else
23873 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
23874 }
23875 return NewVal;
23876 }
23877 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
23878 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
23879 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
23880}
23881
23882// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
23883// recursively analyse all of it's users. and try to model themselves as
23884// bit sequence extractions. If all of them agree on the new, narrower element
23885// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
23886// new element type, do so now.
23887// This is mainly useful to recover from legalization that scalarized
23888// the vector as wide elements, but tries to rebuild it with narrower elements.
23889//
23890// Some more nodes could be modelled if that helps cover interesting patterns.
23891bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
23892 SDNode *N) {
23893 // We perform this optimization post type-legalization because
23894 // the type-legalizer often scalarizes integer-promoted vectors.
23895 // Performing this optimization before may cause legalizaton cycles.
23896 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
23897 return false;
23898
23899 // TODO: Add support for big-endian.
23900 if (DAG.getDataLayout().isBigEndian())
23901 return false;
23902
23903 SDValue VecOp = N->getOperand(0);
23904 EVT VecVT = VecOp.getValueType();
23905 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
23906
23907 // We must start with a constant extraction index.
23908 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
23909 if (!IndexC)
23910 return false;
23911
23912 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
23913 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
23914
23915 // TODO: deal with the case of implicit anyext of the extraction.
23916 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
23917 EVT ScalarVT = N->getValueType(0);
23918 if (VecVT.getScalarType() != ScalarVT)
23919 return false;
23920
23921 // TODO: deal with the cases other than everything being integer-typed.
23922 if (!ScalarVT.isScalarInteger())
23923 return false;
23924
23925 struct Entry {
23926 SDNode *Producer;
23927
23928 // Which bits of VecOp does it contain?
23929 unsigned BitPos;
23930 int NumBits;
23931 // NOTE: the actual width of \p Producer may be wider than NumBits!
23932
23933 Entry(Entry &&) = default;
23934 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
23935 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
23936
23937 Entry() = delete;
23938 Entry(const Entry &) = delete;
23939 Entry &operator=(const Entry &) = delete;
23940 Entry &operator=(Entry &&) = delete;
23941 };
23942 SmallVector<Entry, 32> Worklist;
23944
23945 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
23946 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
23947 /*NumBits=*/VecEltBitWidth);
23948
23949 while (!Worklist.empty()) {
23950 Entry E = Worklist.pop_back_val();
23951 // Does the node not even use any of the VecOp bits?
23952 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
23953 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
23954 return false; // Let's allow the other combines clean this up first.
23955 // Did we fail to model any of the users of the Producer?
23956 bool ProducerIsLeaf = false;
23957 // Look at each user of this Producer.
23958 for (SDNode *User : E.Producer->users()) {
23959 switch (User->getOpcode()) {
23960 // TODO: support ISD::BITCAST
23961 // TODO: support ISD::ANY_EXTEND
23962 // TODO: support ISD::ZERO_EXTEND
23963 // TODO: support ISD::SIGN_EXTEND
23964 case ISD::TRUNCATE:
23965 // Truncation simply means we keep position, but extract less bits.
23966 Worklist.emplace_back(User, E.BitPos,
23967 /*NumBits=*/User->getValueSizeInBits(0));
23968 break;
23969 // TODO: support ISD::SRA
23970 // TODO: support ISD::SHL
23971 case ISD::SRL:
23972 // We should be shifting the Producer by a constant amount.
23973 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
23974 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
23975 // Logical right-shift means that we start extraction later,
23976 // but stop it at the same position we did previously.
23977 unsigned ShAmt = ShAmtC->getZExtValue();
23978 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
23979 break;
23980 }
23981 [[fallthrough]];
23982 default:
23983 // We can not model this user of the Producer.
23984 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
23985 ProducerIsLeaf = true;
23986 // Profitability check: all users that we can not model
23987 // must be ISD::BUILD_VECTOR's.
23988 if (User->getOpcode() != ISD::BUILD_VECTOR)
23989 return false;
23990 break;
23991 }
23992 }
23993 if (ProducerIsLeaf)
23994 Leafs.emplace_back(std::move(E));
23995 }
23996
23997 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
23998
23999 // If we are still at the same element granularity, give up,
24000 if (NewVecEltBitWidth == VecEltBitWidth)
24001 return false;
24002
24003 // The vector width must be a multiple of the new element width.
24004 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
24005 return false;
24006
24007 // All leafs must agree on the new element width.
24008 // All leafs must not expect any "padding" bits ontop of that width.
24009 // All leafs must start extraction from multiple of that width.
24010 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
24011 return (unsigned)E.NumBits == NewVecEltBitWidth &&
24012 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
24013 E.BitPos % NewVecEltBitWidth == 0;
24014 }))
24015 return false;
24016
24017 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
24018 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
24019 VecVT.getSizeInBits() / NewVecEltBitWidth);
24020
24021 if (LegalTypes &&
24022 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
24023 return false;
24024
24025 if (LegalOperations &&
24026 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
24028 return false;
24029
24030 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
24031 for (const Entry &E : Leafs) {
24032 SDLoc DL(E.Producer);
24033 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
24034 assert(NewIndex < NewVecVT.getVectorNumElements() &&
24035 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
24036 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
24037 DAG.getVectorIdxConstant(NewIndex, DL));
24038 CombineTo(E.Producer, V);
24039 }
24040
24041 return true;
24042}
24043
24044SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
24045 SDValue VecOp = N->getOperand(0);
24046 SDValue Index = N->getOperand(1);
24047 EVT ScalarVT = N->getValueType(0);
24048 EVT VecVT = VecOp.getValueType();
24049 if (VecOp.isUndef())
24050 return DAG.getUNDEF(ScalarVT);
24051
24052 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
24053 //
24054 // This only really matters if the index is non-constant since other combines
24055 // on the constant elements already work.
24056 SDLoc DL(N);
24057 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
24058 Index == VecOp.getOperand(2)) {
24059 SDValue Elt = VecOp.getOperand(1);
24060 AddUsersToWorklist(VecOp.getNode());
24061 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
24062 }
24063
24064 // (vextract (scalar_to_vector val, 0) -> val
24065 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24066 // Only 0'th element of SCALAR_TO_VECTOR is defined.
24067 if (DAG.isKnownNeverZero(Index))
24068 return DAG.getUNDEF(ScalarVT);
24069
24070 // Check if the result type doesn't match the inserted element type.
24071 // The inserted element and extracted element may have mismatched bitwidth.
24072 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
24073 SDValue InOp = VecOp.getOperand(0);
24074 if (InOp.getValueType() != ScalarVT) {
24075 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24076 if (InOp.getValueType().bitsGT(ScalarVT))
24077 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
24078 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
24079 }
24080 return InOp;
24081 }
24082
24083 // extract_vector_elt of out-of-bounds element -> UNDEF
24084 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24085 if (IndexC && VecVT.isFixedLengthVector() &&
24086 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
24087 return DAG.getUNDEF(ScalarVT);
24088
24089 // extract_vector_elt (build_vector x, y), 1 -> y
24090 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
24091 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
24092 TLI.isTypeLegal(VecVT)) {
24093 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
24094 VecVT.isFixedLengthVector()) &&
24095 "BUILD_VECTOR used for scalable vectors");
24096 unsigned IndexVal =
24097 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
24098 SDValue Elt = VecOp.getOperand(IndexVal);
24099 EVT InEltVT = Elt.getValueType();
24100
24101 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
24102 isNullConstant(Elt)) {
24103 // Sometimes build_vector's scalar input types do not match result type.
24104 if (ScalarVT == InEltVT)
24105 return Elt;
24106
24107 // TODO: It may be useful to truncate if free if the build_vector
24108 // implicitly converts.
24109 }
24110 }
24111
24112 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
24113 return BO;
24114
24115 if (VecVT.isScalableVector())
24116 return SDValue();
24117
24118 // All the code from this point onwards assumes fixed width vectors, but it's
24119 // possible that some of the combinations could be made to work for scalable
24120 // vectors too.
24121 unsigned NumElts = VecVT.getVectorNumElements();
24122 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
24123
24124 // See if the extracted element is constant, in which case fold it if its
24125 // a legal fp immediate.
24126 if (IndexC && ScalarVT.isFloatingPoint()) {
24127 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
24128 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
24129 if (KnownElt.isConstant()) {
24130 APFloat CstFP =
24131 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
24132 if (TLI.isFPImmLegal(CstFP, ScalarVT))
24133 return DAG.getConstantFP(CstFP, DL, ScalarVT);
24134 }
24135 }
24136
24137 // TODO: These transforms should not require the 'hasOneUse' restriction, but
24138 // there are regressions on multiple targets without it. We can end up with a
24139 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
24140 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
24141 VecOp.hasOneUse()) {
24142 // The vector index of the LSBs of the source depend on the endian-ness.
24143 bool IsLE = DAG.getDataLayout().isLittleEndian();
24144 unsigned ExtractIndex = IndexC->getZExtValue();
24145 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
24146 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
24147 SDValue BCSrc = VecOp.getOperand(0);
24148 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
24149 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
24150
24151 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
24152 if (LegalTypes && BCSrc.getValueType().isInteger() &&
24153 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24154 BCSrc.getScalarValueSizeInBits() ==
24156 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
24157 // trunc i64 X to i32
24158 SDValue X = BCSrc.getOperand(0);
24159 EVT XVT = X.getValueType();
24160 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
24161 "Extract element and scalar to vector can't change element type "
24162 "from FP to integer.");
24163 unsigned XBitWidth = X.getValueSizeInBits();
24164 unsigned Scale = XBitWidth / VecEltBitWidth;
24165 BCTruncElt = IsLE ? 0 : Scale - 1;
24166
24167 // An extract element return value type can be wider than its vector
24168 // operand element type. In that case, the high bits are undefined, so
24169 // it's possible that we may need to extend rather than truncate.
24170 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
24171 assert(XBitWidth % VecEltBitWidth == 0 &&
24172 "Scalar bitwidth must be a multiple of vector element bitwidth");
24173
24174 if (ExtractIndex != BCTruncElt) {
24175 unsigned ShiftIndex =
24176 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
24177 X = DAG.getNode(
24178 ISD::SRL, DL, XVT, X,
24179 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
24180 }
24181
24182 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
24183 }
24184 }
24185 }
24186
24187 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
24188 // We only perform this optimization before the op legalization phase because
24189 // we may introduce new vector instructions which are not backed by TD
24190 // patterns. For example on AVX, extracting elements from a wide vector
24191 // without using extract_subvector. However, if we can find an underlying
24192 // scalar value, then we can always use that.
24193 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
24194 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
24195 // Find the new index to extract from.
24196 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
24197
24198 // Extracting an undef index is undef.
24199 if (OrigElt == -1)
24200 return DAG.getUNDEF(ScalarVT);
24201
24202 // Select the right vector half to extract from.
24203 SDValue SVInVec;
24204 if (OrigElt < (int)NumElts) {
24205 SVInVec = VecOp.getOperand(0);
24206 } else {
24207 SVInVec = VecOp.getOperand(1);
24208 OrigElt -= NumElts;
24209 }
24210
24211 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
24212 // TODO: Check if shuffle mask is legal?
24213 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
24214 !VecOp.hasOneUse())
24215 return SDValue();
24216
24217 SDValue InOp = SVInVec.getOperand(OrigElt);
24218 if (InOp.getValueType() != ScalarVT) {
24219 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24220 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
24221 }
24222
24223 return InOp;
24224 }
24225
24226 // FIXME: We should handle recursing on other vector shuffles and
24227 // scalar_to_vector here as well.
24228
24229 if (!LegalOperations ||
24230 // FIXME: Should really be just isOperationLegalOrCustom.
24233 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
24234 DAG.getVectorIdxConstant(OrigElt, DL));
24235 }
24236 }
24237
24238 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
24239 // simplify it based on the (valid) extraction indices.
24240 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
24241 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24242 Use->getOperand(0) == VecOp &&
24243 isa<ConstantSDNode>(Use->getOperand(1));
24244 })) {
24245 APInt DemandedElts = APInt::getZero(NumElts);
24246 for (SDNode *User : VecOp->users()) {
24247 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
24248 if (CstElt->getAPIntValue().ult(NumElts))
24249 DemandedElts.setBit(CstElt->getZExtValue());
24250 }
24251 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
24252 // We simplified the vector operand of this extract element. If this
24253 // extract is not dead, visit it again so it is folded properly.
24254 if (N->getOpcode() != ISD::DELETED_NODE)
24255 AddToWorklist(N);
24256 return SDValue(N, 0);
24257 }
24258 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
24259 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
24260 // We simplified the vector operand of this extract element. If this
24261 // extract is not dead, visit it again so it is folded properly.
24262 if (N->getOpcode() != ISD::DELETED_NODE)
24263 AddToWorklist(N);
24264 return SDValue(N, 0);
24265 }
24266 }
24267
24268 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
24269 return SDValue(N, 0);
24270
24271 // Everything under here is trying to match an extract of a loaded value.
24272 // If the result of load has to be truncated, then it's not necessarily
24273 // profitable.
24274 bool BCNumEltsChanged = false;
24275 EVT ExtVT = VecVT.getVectorElementType();
24276 EVT LVT = ExtVT;
24277 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
24278 return SDValue();
24279
24280 if (VecOp.getOpcode() == ISD::BITCAST) {
24281 // Don't duplicate a load with other uses.
24282 if (!VecOp.hasOneUse())
24283 return SDValue();
24284
24285 EVT BCVT = VecOp.getOperand(0).getValueType();
24286 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
24287 return SDValue();
24288 if (NumElts != BCVT.getVectorNumElements())
24289 BCNumEltsChanged = true;
24290 VecOp = VecOp.getOperand(0);
24291 ExtVT = BCVT.getVectorElementType();
24292 }
24293
24294 // extract (vector load $addr), i --> load $addr + i * size
24295 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
24296 ISD::isNormalLoad(VecOp.getNode()) &&
24297 !Index->hasPredecessor(VecOp.getNode())) {
24298 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
24299 if (VecLoad && VecLoad->isSimple()) {
24300 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
24301 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
24302 ++OpsNarrowed;
24303 return Scalarized;
24304 }
24305 }
24306 }
24307
24308 // Perform only after legalization to ensure build_vector / vector_shuffle
24309 // optimizations have already been done.
24310 if (!LegalOperations || !IndexC)
24311 return SDValue();
24312
24313 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
24314 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
24315 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24316 int Elt = IndexC->getZExtValue();
24317 LoadSDNode *LN0 = nullptr;
24318 if (ISD::isNormalLoad(VecOp.getNode())) {
24319 LN0 = cast<LoadSDNode>(VecOp);
24320 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24321 VecOp.getOperand(0).getValueType() == ExtVT &&
24322 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
24323 // Don't duplicate a load with other uses.
24324 if (!VecOp.hasOneUse())
24325 return SDValue();
24326
24327 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
24328 }
24329 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
24330 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24331 // =>
24332 // (load $addr+1*size)
24333
24334 // Don't duplicate a load with other uses.
24335 if (!VecOp.hasOneUse())
24336 return SDValue();
24337
24338 // If the bit convert changed the number of elements, it is unsafe
24339 // to examine the mask.
24340 if (BCNumEltsChanged)
24341 return SDValue();
24342
24343 // Select the input vector, guarding against out of range extract vector.
24344 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
24345 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
24346
24347 if (VecOp.getOpcode() == ISD::BITCAST) {
24348 // Don't duplicate a load with other uses.
24349 if (!VecOp.hasOneUse())
24350 return SDValue();
24351
24352 VecOp = VecOp.getOperand(0);
24353 }
24354 if (ISD::isNormalLoad(VecOp.getNode())) {
24355 LN0 = cast<LoadSDNode>(VecOp);
24356 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24357 Index = DAG.getConstant(Elt, DL, Index.getValueType());
24358 }
24359 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24360 VecVT.getVectorElementType() == ScalarVT &&
24361 (!LegalTypes ||
24362 TLI.isTypeLegal(
24364 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24365 // -> extract_vector_elt a, 0
24366 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24367 // -> extract_vector_elt a, 1
24368 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24369 // -> extract_vector_elt b, 0
24370 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24371 // -> extract_vector_elt b, 1
24372 EVT ConcatVT = VecOp.getOperand(0).getValueType();
24373 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24374 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
24375 Index.getValueType());
24376
24377 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
24379 ConcatVT.getVectorElementType(),
24380 ConcatOp, NewIdx);
24381 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
24382 }
24383
24384 // Make sure we found a non-volatile load and the extractelement is
24385 // the only use.
24386 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
24387 return SDValue();
24388
24389 // If Idx was -1 above, Elt is going to be -1, so just return undef.
24390 if (Elt == -1)
24391 return DAG.getUNDEF(LVT);
24392
24393 if (SDValue Scalarized =
24394 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
24395 ++OpsNarrowed;
24396 return Scalarized;
24397 }
24398
24399 return SDValue();
24400}
24401
24402// Simplify (build_vec (ext )) to (bitcast (build_vec ))
24403SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
24404 // We perform this optimization post type-legalization because
24405 // the type-legalizer often scalarizes integer-promoted vectors.
24406 // Performing this optimization before may create bit-casts which
24407 // will be type-legalized to complex code sequences.
24408 // We perform this optimization only before the operation legalizer because we
24409 // may introduce illegal operations.
24410 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24411 return SDValue();
24412
24413 unsigned NumInScalars = N->getNumOperands();
24414 SDLoc DL(N);
24415 EVT VT = N->getValueType(0);
24416
24417 // Check to see if this is a BUILD_VECTOR of a bunch of values
24418 // which come from any_extend or zero_extend nodes. If so, we can create
24419 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
24420 // optimizations. We do not handle sign-extend because we can't fill the sign
24421 // using shuffles.
24422 EVT SourceType = MVT::Other;
24423 bool AllAnyExt = true;
24424
24425 for (unsigned i = 0; i != NumInScalars; ++i) {
24426 SDValue In = N->getOperand(i);
24427 // Ignore undef inputs.
24428 if (In.isUndef()) continue;
24429
24430 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
24431 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
24432
24433 // Abort if the element is not an extension.
24434 if (!ZeroExt && !AnyExt) {
24435 SourceType = MVT::Other;
24436 break;
24437 }
24438
24439 // The input is a ZeroExt or AnyExt. Check the original type.
24440 EVT InTy = In.getOperand(0).getValueType();
24441
24442 // Check that all of the widened source types are the same.
24443 if (SourceType == MVT::Other)
24444 // First time.
24445 SourceType = InTy;
24446 else if (InTy != SourceType) {
24447 // Multiple income types. Abort.
24448 SourceType = MVT::Other;
24449 break;
24450 }
24451
24452 // Check if all of the extends are ANY_EXTENDs.
24453 AllAnyExt &= AnyExt;
24454 }
24455
24456 // In order to have valid types, all of the inputs must be extended from the
24457 // same source type and all of the inputs must be any or zero extend.
24458 // Scalar sizes must be a power of two.
24459 EVT OutScalarTy = VT.getScalarType();
24460 bool ValidTypes =
24461 SourceType != MVT::Other &&
24464
24465 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
24466 // turn into a single shuffle instruction.
24467 if (!ValidTypes)
24468 return SDValue();
24469
24470 // If we already have a splat buildvector, then don't fold it if it means
24471 // introducing zeros.
24472 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
24473 return SDValue();
24474
24475 bool isLE = DAG.getDataLayout().isLittleEndian();
24476 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
24477 assert(ElemRatio > 1 && "Invalid element size ratio");
24478 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
24479 DAG.getConstant(0, DL, SourceType);
24480
24481 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
24482 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
24483
24484 // Populate the new build_vector
24485 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24486 SDValue Cast = N->getOperand(i);
24487 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
24488 Cast.getOpcode() == ISD::ZERO_EXTEND ||
24489 Cast.isUndef()) && "Invalid cast opcode");
24490 SDValue In;
24491 if (Cast.isUndef())
24492 In = DAG.getUNDEF(SourceType);
24493 else
24494 In = Cast->getOperand(0);
24495 unsigned Index = isLE ? (i * ElemRatio) :
24496 (i * ElemRatio + (ElemRatio - 1));
24497
24498 assert(Index < Ops.size() && "Invalid index");
24499 Ops[Index] = In;
24500 }
24501
24502 // The type of the new BUILD_VECTOR node.
24503 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
24504 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
24505 "Invalid vector size");
24506 // Check if the new vector type is legal.
24507 if (!isTypeLegal(VecVT) ||
24508 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
24510 return SDValue();
24511
24512 // Make the new BUILD_VECTOR.
24513 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
24514
24515 // The new BUILD_VECTOR node has the potential to be further optimized.
24516 AddToWorklist(BV.getNode());
24517 // Bitcast to the desired type.
24518 return DAG.getBitcast(VT, BV);
24519}
24520
24521// Simplify (build_vec (trunc $1)
24522// (trunc (srl $1 half-width))
24523// (trunc (srl $1 (2 * half-width))))
24524// to (bitcast $1)
24525SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
24526 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24527
24528 EVT VT = N->getValueType(0);
24529
24530 // Don't run this before LegalizeTypes if VT is legal.
24531 // Targets may have other preferences.
24532 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
24533 return SDValue();
24534
24535 // Only for little endian
24536 if (!DAG.getDataLayout().isLittleEndian())
24537 return SDValue();
24538
24539 EVT OutScalarTy = VT.getScalarType();
24540 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
24541
24542 // Only for power of two types to be sure that bitcast works well
24543 if (!isPowerOf2_64(ScalarTypeBitsize))
24544 return SDValue();
24545
24546 unsigned NumInScalars = N->getNumOperands();
24547
24548 // Look through bitcasts
24549 auto PeekThroughBitcast = [](SDValue Op) {
24550 if (Op.getOpcode() == ISD::BITCAST)
24551 return Op.getOperand(0);
24552 return Op;
24553 };
24554
24555 // The source value where all the parts are extracted.
24556 SDValue Src;
24557 for (unsigned i = 0; i != NumInScalars; ++i) {
24558 SDValue In = PeekThroughBitcast(N->getOperand(i));
24559 // Ignore undef inputs.
24560 if (In.isUndef()) continue;
24561
24562 if (In.getOpcode() != ISD::TRUNCATE)
24563 return SDValue();
24564
24565 In = PeekThroughBitcast(In.getOperand(0));
24566
24567 if (In.getOpcode() != ISD::SRL) {
24568 // For now only build_vec without shuffling, handle shifts here in the
24569 // future.
24570 if (i != 0)
24571 return SDValue();
24572
24573 Src = In;
24574 } else {
24575 // In is SRL
24576 SDValue part = PeekThroughBitcast(In.getOperand(0));
24577
24578 if (!Src) {
24579 Src = part;
24580 } else if (Src != part) {
24581 // Vector parts do not stem from the same variable
24582 return SDValue();
24583 }
24584
24585 SDValue ShiftAmtVal = In.getOperand(1);
24586 if (!isa<ConstantSDNode>(ShiftAmtVal))
24587 return SDValue();
24588
24589 uint64_t ShiftAmt = In.getConstantOperandVal(1);
24590
24591 // The extracted value is not extracted at the right position
24592 if (ShiftAmt != i * ScalarTypeBitsize)
24593 return SDValue();
24594 }
24595 }
24596
24597 // Only cast if the size is the same
24598 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
24599 return SDValue();
24600
24601 return DAG.getBitcast(VT, Src);
24602}
24603
24604SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
24605 ArrayRef<int> VectorMask,
24606 SDValue VecIn1, SDValue VecIn2,
24607 unsigned LeftIdx, bool DidSplitVec) {
24608 EVT VT = N->getValueType(0);
24609 EVT InVT1 = VecIn1.getValueType();
24610 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
24611
24612 unsigned NumElems = VT.getVectorNumElements();
24613 unsigned ShuffleNumElems = NumElems;
24614
24615 // If we artificially split a vector in two already, then the offsets in the
24616 // operands will all be based off of VecIn1, even those in VecIn2.
24617 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
24618
24619 uint64_t VTSize = VT.getFixedSizeInBits();
24620 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
24621 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
24622
24623 assert(InVT2Size <= InVT1Size &&
24624 "Inputs must be sorted to be in non-increasing vector size order.");
24625
24626 // We can't generate a shuffle node with mismatched input and output types.
24627 // Try to make the types match the type of the output.
24628 if (InVT1 != VT || InVT2 != VT) {
24629 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
24630 // If the output vector length is a multiple of both input lengths,
24631 // we can concatenate them and pad the rest with undefs.
24632 unsigned NumConcats = VTSize / InVT1Size;
24633 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
24634 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
24635 ConcatOps[0] = VecIn1;
24636 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
24637 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24638 VecIn2 = SDValue();
24639 } else if (InVT1Size == VTSize * 2) {
24640 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
24641 return SDValue();
24642
24643 if (!VecIn2.getNode()) {
24644 // If we only have one input vector, and it's twice the size of the
24645 // output, split it in two.
24646 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
24647 DAG.getVectorIdxConstant(NumElems, DL));
24648 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
24649 // Since we now have shorter input vectors, adjust the offset of the
24650 // second vector's start.
24651 Vec2Offset = NumElems;
24652 } else {
24653 assert(InVT2Size <= InVT1Size &&
24654 "Second input is not going to be larger than the first one.");
24655
24656 // VecIn1 is wider than the output, and we have another, possibly
24657 // smaller input. Pad the smaller input with undefs, shuffle at the
24658 // input vector width, and extract the output.
24659 // The shuffle type is different than VT, so check legality again.
24660 if (LegalOperations &&
24662 return SDValue();
24663
24664 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
24665 // lower it back into a BUILD_VECTOR. So if the inserted type is
24666 // illegal, don't even try.
24667 if (InVT1 != InVT2) {
24668 if (!TLI.isTypeLegal(InVT2))
24669 return SDValue();
24670 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24671 }
24672 ShuffleNumElems = NumElems * 2;
24673 }
24674 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
24675 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
24676 ConcatOps[0] = VecIn2;
24677 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24678 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
24679 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
24680 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
24681 return SDValue();
24682 // If dest vector has less than two elements, then use shuffle and extract
24683 // from larger regs will cost even more.
24684 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
24685 return SDValue();
24686 assert(InVT2Size <= InVT1Size &&
24687 "Second input is not going to be larger than the first one.");
24688
24689 // VecIn1 is wider than the output, and we have another, possibly
24690 // smaller input. Pad the smaller input with undefs, shuffle at the
24691 // input vector width, and extract the output.
24692 // The shuffle type is different than VT, so check legality again.
24693 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
24694 return SDValue();
24695
24696 if (InVT1 != InVT2) {
24697 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24698 }
24699 ShuffleNumElems = InVT1Size / VTSize * NumElems;
24700 } else {
24701 // TODO: Support cases where the length mismatch isn't exactly by a
24702 // factor of 2.
24703 // TODO: Move this check upwards, so that if we have bad type
24704 // mismatches, we don't create any DAG nodes.
24705 return SDValue();
24706 }
24707 }
24708
24709 // Initialize mask to undef.
24710 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
24711
24712 // Only need to run up to the number of elements actually used, not the
24713 // total number of elements in the shuffle - if we are shuffling a wider
24714 // vector, the high lanes should be set to undef.
24715 for (unsigned i = 0; i != NumElems; ++i) {
24716 if (VectorMask[i] <= 0)
24717 continue;
24718
24719 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
24720 if (VectorMask[i] == (int)LeftIdx) {
24721 Mask[i] = ExtIndex;
24722 } else if (VectorMask[i] == (int)LeftIdx + 1) {
24723 Mask[i] = Vec2Offset + ExtIndex;
24724 }
24725 }
24726
24727 // The type the input vectors may have changed above.
24728 InVT1 = VecIn1.getValueType();
24729
24730 // If we already have a VecIn2, it should have the same type as VecIn1.
24731 // If we don't, get an undef/zero vector of the appropriate type.
24732 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
24733 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
24734
24735 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
24736 if (ShuffleNumElems > NumElems)
24737 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
24738
24739 return Shuffle;
24740}
24741
24743 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24744
24745 // First, determine where the build vector is not undef.
24746 // TODO: We could extend this to handle zero elements as well as undefs.
24747 int NumBVOps = BV->getNumOperands();
24748 int ZextElt = -1;
24749 for (int i = 0; i != NumBVOps; ++i) {
24750 SDValue Op = BV->getOperand(i);
24751 if (Op.isUndef())
24752 continue;
24753 if (ZextElt == -1)
24754 ZextElt = i;
24755 else
24756 return SDValue();
24757 }
24758 // Bail out if there's no non-undef element.
24759 if (ZextElt == -1)
24760 return SDValue();
24761
24762 // The build vector contains some number of undef elements and exactly
24763 // one other element. That other element must be a zero-extended scalar
24764 // extracted from a vector at a constant index to turn this into a shuffle.
24765 // Also, require that the build vector does not implicitly truncate/extend
24766 // its elements.
24767 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
24768 EVT VT = BV->getValueType(0);
24769 SDValue Zext = BV->getOperand(ZextElt);
24770 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
24774 return SDValue();
24775
24776 // The zero-extend must be a multiple of the source size, and we must be
24777 // building a vector of the same size as the source of the extract element.
24778 SDValue Extract = Zext.getOperand(0);
24779 unsigned DestSize = Zext.getValueSizeInBits();
24780 unsigned SrcSize = Extract.getValueSizeInBits();
24781 if (DestSize % SrcSize != 0 ||
24782 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
24783 return SDValue();
24784
24785 // Create a shuffle mask that will combine the extracted element with zeros
24786 // and undefs.
24787 int ZextRatio = DestSize / SrcSize;
24788 int NumMaskElts = NumBVOps * ZextRatio;
24789 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
24790 for (int i = 0; i != NumMaskElts; ++i) {
24791 if (i / ZextRatio == ZextElt) {
24792 // The low bits of the (potentially translated) extracted element map to
24793 // the source vector. The high bits map to zero. We will use a zero vector
24794 // as the 2nd source operand of the shuffle, so use the 1st element of
24795 // that vector (mask value is number-of-elements) for the high bits.
24796 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
24797 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
24798 : NumMaskElts;
24799 }
24800
24801 // Undef elements of the build vector remain undef because we initialize
24802 // the shuffle mask with -1.
24803 }
24804
24805 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
24806 // bitcast (shuffle V, ZeroVec, VectorMask)
24807 SDLoc DL(BV);
24808 EVT VecVT = Extract.getOperand(0).getValueType();
24809 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
24810 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24811 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
24812 ZeroVec, ShufMask, DAG);
24813 if (!Shuf)
24814 return SDValue();
24815 return DAG.getBitcast(VT, Shuf);
24816}
24817
24818// FIXME: promote to STLExtras.
24819template <typename R, typename T>
24820static auto getFirstIndexOf(R &&Range, const T &Val) {
24821 auto I = find(Range, Val);
24822 if (I == Range.end())
24823 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
24824 return std::distance(Range.begin(), I);
24825}
24826
24827// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
24828// operations. If the types of the vectors we're extracting from allow it,
24829// turn this into a vector_shuffle node.
24830SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
24831 SDLoc DL(N);
24832 EVT VT = N->getValueType(0);
24833
24834 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
24835 if (!isTypeLegal(VT))
24836 return SDValue();
24837
24839 return V;
24840
24841 // May only combine to shuffle after legalize if shuffle is legal.
24842 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
24843 return SDValue();
24844
24845 bool UsesZeroVector = false;
24846 unsigned NumElems = N->getNumOperands();
24847
24848 // Record, for each element of the newly built vector, which input vector
24849 // that element comes from. -1 stands for undef, 0 for the zero vector,
24850 // and positive values for the input vectors.
24851 // VectorMask maps each element to its vector number, and VecIn maps vector
24852 // numbers to their initial SDValues.
24853
24854 SmallVector<int, 8> VectorMask(NumElems, -1);
24856 VecIn.push_back(SDValue());
24857
24858 // If we have a single extract_element with a constant index, track the index
24859 // value.
24860 unsigned OneConstExtractIndex = ~0u;
24861
24862 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
24863 unsigned NumExtracts = 0;
24864
24865 for (unsigned i = 0; i != NumElems; ++i) {
24866 SDValue Op = N->getOperand(i);
24867
24868 if (Op.isUndef())
24869 continue;
24870
24871 // See if we can use a blend with a zero vector.
24872 // TODO: Should we generalize this to a blend with an arbitrary constant
24873 // vector?
24875 UsesZeroVector = true;
24876 VectorMask[i] = 0;
24877 continue;
24878 }
24879
24880 // Not an undef or zero. If the input is something other than an
24881 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
24882 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
24883 return SDValue();
24884
24885 SDValue ExtractedFromVec = Op.getOperand(0);
24886 if (ExtractedFromVec.getValueType().isScalableVector())
24887 return SDValue();
24888 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24889 if (!ExtractIdx)
24890 return SDValue();
24891
24892 if (ExtractIdx->getAsAPIntVal().uge(
24893 ExtractedFromVec.getValueType().getVectorNumElements()))
24894 return SDValue();
24895
24896 // All inputs must have the same element type as the output.
24897 if (VT.getVectorElementType() !=
24898 ExtractedFromVec.getValueType().getVectorElementType())
24899 return SDValue();
24900
24901 OneConstExtractIndex = ExtractIdx->getZExtValue();
24902 ++NumExtracts;
24903
24904 // Have we seen this input vector before?
24905 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
24906 // a map back from SDValues to numbers isn't worth it.
24907 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
24908 if (Idx == -1) { // A new source vector?
24909 Idx = VecIn.size();
24910 VecIn.push_back(ExtractedFromVec);
24911 }
24912
24913 VectorMask[i] = Idx;
24914 }
24915
24916 // If we didn't find at least one input vector, bail out.
24917 if (VecIn.size() < 2)
24918 return SDValue();
24919
24920 // If all the Operands of BUILD_VECTOR extract from same
24921 // vector, then split the vector efficiently based on the maximum
24922 // vector access index and adjust the VectorMask and
24923 // VecIn accordingly.
24924 bool DidSplitVec = false;
24925 if (VecIn.size() == 2) {
24926 // If we only found a single constant indexed extract_vector_elt feeding the
24927 // build_vector, do not produce a more complicated shuffle if the extract is
24928 // cheap with other constant/undef elements. Skip broadcast patterns with
24929 // multiple uses in the build_vector.
24930
24931 // TODO: This should be more aggressive about skipping the shuffle
24932 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
24933 // index.
24934 if (NumExtracts == 1 &&
24937 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
24938 return SDValue();
24939
24940 unsigned MaxIndex = 0;
24941 unsigned NearestPow2 = 0;
24942 SDValue Vec = VecIn.back();
24943 EVT InVT = Vec.getValueType();
24944 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
24945
24946 for (unsigned i = 0; i < NumElems; i++) {
24947 if (VectorMask[i] <= 0)
24948 continue;
24949 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
24950 IndexVec[i] = Index;
24951 MaxIndex = std::max(MaxIndex, Index);
24952 }
24953
24954 NearestPow2 = PowerOf2Ceil(MaxIndex);
24955 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
24956 NumElems * 2 < NearestPow2) {
24957 unsigned SplitSize = NearestPow2 / 2;
24958 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
24959 InVT.getVectorElementType(), SplitSize);
24960 if (TLI.isTypeLegal(SplitVT) &&
24961 SplitSize + SplitVT.getVectorNumElements() <=
24962 InVT.getVectorNumElements()) {
24963 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24964 DAG.getVectorIdxConstant(SplitSize, DL));
24965 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
24966 DAG.getVectorIdxConstant(0, DL));
24967 VecIn.pop_back();
24968 VecIn.push_back(VecIn1);
24969 VecIn.push_back(VecIn2);
24970 DidSplitVec = true;
24971
24972 for (unsigned i = 0; i < NumElems; i++) {
24973 if (VectorMask[i] <= 0)
24974 continue;
24975 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
24976 }
24977 }
24978 }
24979 }
24980
24981 // Sort input vectors by decreasing vector element count,
24982 // while preserving the relative order of equally-sized vectors.
24983 // Note that we keep the first "implicit zero vector as-is.
24984 SmallVector<SDValue, 8> SortedVecIn(VecIn);
24985 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
24986 [](const SDValue &a, const SDValue &b) {
24987 return a.getValueType().getVectorNumElements() >
24988 b.getValueType().getVectorNumElements();
24989 });
24990
24991 // We now also need to rebuild the VectorMask, because it referenced element
24992 // order in VecIn, and we just sorted them.
24993 for (int &SourceVectorIndex : VectorMask) {
24994 if (SourceVectorIndex <= 0)
24995 continue;
24996 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
24997 assert(Idx > 0 && Idx < SortedVecIn.size() &&
24998 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
24999 SourceVectorIndex = Idx;
25000 }
25001
25002 VecIn = std::move(SortedVecIn);
25003
25004 // TODO: Should this fire if some of the input vectors has illegal type (like
25005 // it does now), or should we let legalization run its course first?
25006
25007 // Shuffle phase:
25008 // Take pairs of vectors, and shuffle them so that the result has elements
25009 // from these vectors in the correct places.
25010 // For example, given:
25011 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
25012 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
25013 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
25014 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
25015 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
25016 // We will generate:
25017 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
25018 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
25019 SmallVector<SDValue, 4> Shuffles;
25020 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
25021 unsigned LeftIdx = 2 * In + 1;
25022 SDValue VecLeft = VecIn[LeftIdx];
25023 SDValue VecRight =
25024 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
25025
25026 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
25027 VecRight, LeftIdx, DidSplitVec))
25028 Shuffles.push_back(Shuffle);
25029 else
25030 return SDValue();
25031 }
25032
25033 // If we need the zero vector as an "ingredient" in the blend tree, add it
25034 // to the list of shuffles.
25035 if (UsesZeroVector)
25036 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
25037 : DAG.getConstantFP(0.0, DL, VT));
25038
25039 // If we only have one shuffle, we're done.
25040 if (Shuffles.size() == 1)
25041 return Shuffles[0];
25042
25043 // Update the vector mask to point to the post-shuffle vectors.
25044 for (int &Vec : VectorMask)
25045 if (Vec == 0)
25046 Vec = Shuffles.size() - 1;
25047 else
25048 Vec = (Vec - 1) / 2;
25049
25050 // More than one shuffle. Generate a binary tree of blends, e.g. if from
25051 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
25052 // generate:
25053 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
25054 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
25055 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
25056 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
25057 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
25058 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
25059 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
25060
25061 // Make sure the initial size of the shuffle list is even.
25062 if (Shuffles.size() % 2)
25063 Shuffles.push_back(DAG.getUNDEF(VT));
25064
25065 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
25066 if (CurSize % 2) {
25067 Shuffles[CurSize] = DAG.getUNDEF(VT);
25068 CurSize++;
25069 }
25070 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
25071 int Left = 2 * In;
25072 int Right = 2 * In + 1;
25073 SmallVector<int, 8> Mask(NumElems, -1);
25074 SDValue L = Shuffles[Left];
25075 ArrayRef<int> LMask;
25076 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
25077 L.use_empty() && L.getOperand(1).isUndef() &&
25078 L.getOperand(0).getValueType() == L.getValueType();
25079 if (IsLeftShuffle) {
25080 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
25081 L = L.getOperand(0);
25082 }
25083 SDValue R = Shuffles[Right];
25084 ArrayRef<int> RMask;
25085 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
25086 R.use_empty() && R.getOperand(1).isUndef() &&
25087 R.getOperand(0).getValueType() == R.getValueType();
25088 if (IsRightShuffle) {
25089 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
25090 R = R.getOperand(0);
25091 }
25092 for (unsigned I = 0; I != NumElems; ++I) {
25093 if (VectorMask[I] == Left) {
25094 Mask[I] = I;
25095 if (IsLeftShuffle)
25096 Mask[I] = LMask[I];
25097 VectorMask[I] = In;
25098 } else if (VectorMask[I] == Right) {
25099 Mask[I] = I + NumElems;
25100 if (IsRightShuffle)
25101 Mask[I] = RMask[I] + NumElems;
25102 VectorMask[I] = In;
25103 }
25104 }
25105
25106 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
25107 }
25108 }
25109 return Shuffles[0];
25110}
25111
25112// Try to turn a build vector of zero extends of extract vector elts into a
25113// a vector zero extend and possibly an extract subvector.
25114// TODO: Support sign extend?
25115// TODO: Allow undef elements?
25116SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
25117 if (LegalOperations)
25118 return SDValue();
25119
25120 EVT VT = N->getValueType(0);
25121
25122 bool FoundZeroExtend = false;
25123 SDValue Op0 = N->getOperand(0);
25124 auto checkElem = [&](SDValue Op) -> int64_t {
25125 unsigned Opc = Op.getOpcode();
25126 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
25127 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
25128 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
25129 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
25130 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
25131 return C->getZExtValue();
25132 return -1;
25133 };
25134
25135 // Make sure the first element matches
25136 // (zext (extract_vector_elt X, C))
25137 // Offset must be a constant multiple of the
25138 // known-minimum vector length of the result type.
25139 int64_t Offset = checkElem(Op0);
25140 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
25141 return SDValue();
25142
25143 unsigned NumElems = N->getNumOperands();
25144 SDValue In = Op0.getOperand(0).getOperand(0);
25145 EVT InSVT = In.getValueType().getScalarType();
25146 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
25147
25148 // Don't create an illegal input type after type legalization.
25149 if (LegalTypes && !TLI.isTypeLegal(InVT))
25150 return SDValue();
25151
25152 // Ensure all the elements come from the same vector and are adjacent.
25153 for (unsigned i = 1; i != NumElems; ++i) {
25154 if ((Offset + i) != checkElem(N->getOperand(i)))
25155 return SDValue();
25156 }
25157
25158 SDLoc DL(N);
25159 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
25160 Op0.getOperand(0).getOperand(1));
25161 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
25162 VT, In);
25163}
25164
25165// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
25166// and all other elements being constant zero's, granularize the BUILD_VECTOR's
25167// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
25168// This patten can appear during legalization.
25169//
25170// NOTE: This can be generalized to allow more than a single
25171// non-constant-zero op, UNDEF's, and to be KnownBits-based,
25172SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
25173 // Don't run this after legalization. Targets may have other preferences.
25174 if (Level >= AfterLegalizeDAG)
25175 return SDValue();
25176
25177 // FIXME: support big-endian.
25178 if (DAG.getDataLayout().isBigEndian())
25179 return SDValue();
25180
25181 EVT VT = N->getValueType(0);
25182 EVT OpVT = N->getOperand(0).getValueType();
25183 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
25184
25185 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25186
25187 if (!TLI.isTypeLegal(OpIntVT) ||
25188 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
25189 return SDValue();
25190
25191 unsigned EltBitwidth = VT.getScalarSizeInBits();
25192 // NOTE: the actual width of operands may be wider than that!
25193
25194 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
25195 // active bits they all have? We'll want to truncate them all to that width.
25196 unsigned ActiveBits = 0;
25197 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
25198 for (auto I : enumerate(N->ops())) {
25199 SDValue Op = I.value();
25200 // FIXME: support UNDEF elements?
25201 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
25202 unsigned OpActiveBits =
25203 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
25204 if (OpActiveBits == 0) {
25205 KnownZeroOps.setBit(I.index());
25206 continue;
25207 }
25208 // Profitability check: don't allow non-zero constant operands.
25209 return SDValue();
25210 }
25211 // Profitability check: there must only be a single non-zero operand,
25212 // and it must be the first operand of the BUILD_VECTOR.
25213 if (I.index() != 0)
25214 return SDValue();
25215 // The operand must be a zero-extension itself.
25216 // FIXME: this could be generalized to known leading zeros check.
25217 if (Op.getOpcode() != ISD::ZERO_EXTEND)
25218 return SDValue();
25219 unsigned CurrActiveBits =
25220 Op.getOperand(0).getValueSizeInBits().getFixedValue();
25221 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
25222 ActiveBits = CurrActiveBits;
25223 // We want to at least halve the element size.
25224 if (2 * ActiveBits > EltBitwidth)
25225 return SDValue();
25226 }
25227
25228 // This BUILD_VECTOR must have at least one non-constant-zero operand.
25229 if (ActiveBits == 0)
25230 return SDValue();
25231
25232 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
25233 // into how many chunks can we split our element width?
25234 EVT NewScalarIntVT, NewIntVT;
25235 std::optional<unsigned> Factor;
25236 // We can split the element into at least two chunks, but not into more
25237 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
25238 // for which the element width is a multiple of it,
25239 // and the resulting types/operations on that chunk width are legal.
25240 assert(2 * ActiveBits <= EltBitwidth &&
25241 "We know that half or less bits of the element are active.");
25242 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
25243 if (EltBitwidth % Scale != 0)
25244 continue;
25245 unsigned ChunkBitwidth = EltBitwidth / Scale;
25246 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
25247 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
25248 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
25249 Scale * N->getNumOperands());
25250 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
25251 (LegalOperations &&
25252 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
25254 continue;
25255 Factor = Scale;
25256 break;
25257 }
25258 if (!Factor)
25259 return SDValue();
25260
25261 SDLoc DL(N);
25262 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
25263
25264 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
25266 NewOps.reserve(NewIntVT.getVectorNumElements());
25267 for (auto I : enumerate(N->ops())) {
25268 SDValue Op = I.value();
25269 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
25270 unsigned SrcOpIdx = I.index();
25271 if (KnownZeroOps[SrcOpIdx]) {
25272 NewOps.append(*Factor, ZeroOp);
25273 continue;
25274 }
25275 Op = DAG.getBitcast(OpIntVT, Op);
25276 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
25277 NewOps.emplace_back(Op);
25278 NewOps.append(*Factor - 1, ZeroOp);
25279 }
25280 assert(NewOps.size() == NewIntVT.getVectorNumElements());
25281 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
25282 NewBV = DAG.getBitcast(VT, NewBV);
25283 return NewBV;
25284}
25285
25286SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
25287 EVT VT = N->getValueType(0);
25288
25289 // A vector built entirely of undefs is undef.
25291 return DAG.getUNDEF(VT);
25292
25293 // If this is a splat of a bitcast from another vector, change to a
25294 // concat_vector.
25295 // For example:
25296 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
25297 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
25298 //
25299 // If X is a build_vector itself, the concat can become a larger build_vector.
25300 // TODO: Maybe this is useful for non-splat too?
25301 if (!LegalOperations) {
25302 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
25303 // Only change build_vector to a concat_vector if the splat value type is
25304 // same as the vector element type.
25305 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25307 EVT SrcVT = Splat.getValueType();
25308 if (SrcVT.isVector()) {
25309 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25310 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
25311 SrcVT.getVectorElementType(), NumElts);
25312 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
25313 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
25314 SDValue Concat =
25315 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
25316 return DAG.getBitcast(VT, Concat);
25317 }
25318 }
25319 }
25320 }
25321
25322 // Check if we can express BUILD VECTOR via subvector extract.
25323 if (!LegalTypes && (N->getNumOperands() > 1)) {
25324 SDValue Op0 = N->getOperand(0);
25325 auto checkElem = [&](SDValue Op) -> uint64_t {
25326 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25327 (Op0.getOperand(0) == Op.getOperand(0)))
25328 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
25329 return CNode->getZExtValue();
25330 return -1;
25331 };
25332
25333 int Offset = checkElem(Op0);
25334 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
25335 if (Offset + i != checkElem(N->getOperand(i))) {
25336 Offset = -1;
25337 break;
25338 }
25339 }
25340
25341 if ((Offset == 0) &&
25342 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
25343 return Op0.getOperand(0);
25344 if ((Offset != -1) &&
25345 ((Offset % N->getValueType(0).getVectorNumElements()) ==
25346 0)) // IDX must be multiple of output size.
25347 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
25348 Op0.getOperand(0), Op0.getOperand(1));
25349 }
25350
25351 if (SDValue V = convertBuildVecZextToZext(N))
25352 return V;
25353
25354 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25355 return V;
25356
25357 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25358 return V;
25359
25360 if (SDValue V = reduceBuildVecTruncToBitCast(N))
25361 return V;
25362
25363 if (SDValue V = reduceBuildVecToShuffle(N))
25364 return V;
25365
25366 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
25367 // Do this late as some of the above may replace the splat.
25370 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25371 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
25372 }
25373
25374 return SDValue();
25375}
25376
25378 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25379 EVT OpVT = N->getOperand(0).getValueType();
25380
25381 // If the operands are legal vectors, leave them alone.
25382 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
25383 return SDValue();
25384
25385 SDLoc DL(N);
25386 EVT VT = N->getValueType(0);
25388 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25389
25390 // Keep track of what we encounter.
25391 EVT AnyFPVT;
25392
25393 for (const SDValue &Op : N->ops()) {
25394 if (ISD::BITCAST == Op.getOpcode() &&
25395 !Op.getOperand(0).getValueType().isVector())
25396 Ops.push_back(Op.getOperand(0));
25397 else if (Op.isUndef())
25398 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
25399 else
25400 return SDValue();
25401
25402 // Note whether we encounter an integer or floating point scalar.
25403 // If it's neither, bail out, it could be something weird like x86mmx.
25404 EVT LastOpVT = Ops.back().getValueType();
25405 if (LastOpVT.isFloatingPoint())
25406 AnyFPVT = LastOpVT;
25407 else if (!LastOpVT.isInteger())
25408 return SDValue();
25409 }
25410
25411 // If any of the operands is a floating point scalar bitcast to a vector,
25412 // use floating point types throughout, and bitcast everything.
25413 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
25414 if (AnyFPVT != EVT()) {
25415 SVT = AnyFPVT;
25416 for (SDValue &Op : Ops) {
25417 if (Op.getValueType() == SVT)
25418 continue;
25419 if (Op.isUndef())
25420 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
25421 else
25422 Op = DAG.getBitcast(SVT, Op);
25423 }
25424 }
25425
25426 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
25427 VT.getSizeInBits() / SVT.getSizeInBits());
25428 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
25429}
25430
25431// Attempt to merge nested concat_vectors/undefs.
25432// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
25433// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
25435 SelectionDAG &DAG) {
25436 EVT VT = N->getValueType(0);
25437
25438 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
25439 EVT SubVT;
25440 SDValue FirstConcat;
25441 for (const SDValue &Op : N->ops()) {
25442 if (Op.isUndef())
25443 continue;
25444 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
25445 return SDValue();
25446 if (!FirstConcat) {
25447 SubVT = Op.getOperand(0).getValueType();
25448 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
25449 return SDValue();
25450 FirstConcat = Op;
25451 continue;
25452 }
25453 if (SubVT != Op.getOperand(0).getValueType())
25454 return SDValue();
25455 }
25456 assert(FirstConcat && "Concat of all-undefs found");
25457
25458 SmallVector<SDValue> ConcatOps;
25459 for (const SDValue &Op : N->ops()) {
25460 if (Op.isUndef()) {
25461 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
25462 continue;
25463 }
25464 ConcatOps.append(Op->op_begin(), Op->op_end());
25465 }
25466 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
25467}
25468
25469// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
25470// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
25471// most two distinct vectors the same size as the result, attempt to turn this
25472// into a legal shuffle.
25474 EVT VT = N->getValueType(0);
25475 EVT OpVT = N->getOperand(0).getValueType();
25476
25477 // We currently can't generate an appropriate shuffle for a scalable vector.
25478 if (VT.isScalableVector())
25479 return SDValue();
25480
25481 int NumElts = VT.getVectorNumElements();
25482 int NumOpElts = OpVT.getVectorNumElements();
25483
25484 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
25486
25487 for (SDValue Op : N->ops()) {
25489
25490 // UNDEF nodes convert to UNDEF shuffle mask values.
25491 if (Op.isUndef()) {
25492 Mask.append((unsigned)NumOpElts, -1);
25493 continue;
25494 }
25495
25496 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25497 return SDValue();
25498
25499 // What vector are we extracting the subvector from and at what index?
25500 SDValue ExtVec = Op.getOperand(0);
25501 int ExtIdx = Op.getConstantOperandVal(1);
25502
25503 // We want the EVT of the original extraction to correctly scale the
25504 // extraction index.
25505 EVT ExtVT = ExtVec.getValueType();
25506 ExtVec = peekThroughBitcasts(ExtVec);
25507
25508 // UNDEF nodes convert to UNDEF shuffle mask values.
25509 if (ExtVec.isUndef()) {
25510 Mask.append((unsigned)NumOpElts, -1);
25511 continue;
25512 }
25513
25514 // Ensure that we are extracting a subvector from a vector the same
25515 // size as the result.
25516 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
25517 return SDValue();
25518
25519 // Scale the subvector index to account for any bitcast.
25520 int NumExtElts = ExtVT.getVectorNumElements();
25521 if (0 == (NumExtElts % NumElts))
25522 ExtIdx /= (NumExtElts / NumElts);
25523 else if (0 == (NumElts % NumExtElts))
25524 ExtIdx *= (NumElts / NumExtElts);
25525 else
25526 return SDValue();
25527
25528 // At most we can reference 2 inputs in the final shuffle.
25529 if (SV0.isUndef() || SV0 == ExtVec) {
25530 SV0 = ExtVec;
25531 for (int i = 0; i != NumOpElts; ++i)
25532 Mask.push_back(i + ExtIdx);
25533 } else if (SV1.isUndef() || SV1 == ExtVec) {
25534 SV1 = ExtVec;
25535 for (int i = 0; i != NumOpElts; ++i)
25536 Mask.push_back(i + ExtIdx + NumElts);
25537 } else {
25538 return SDValue();
25539 }
25540 }
25541
25542 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25543 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
25544 DAG.getBitcast(VT, SV1), Mask, DAG);
25545}
25546
25548 unsigned CastOpcode = N->getOperand(0).getOpcode();
25549 switch (CastOpcode) {
25550 case ISD::SINT_TO_FP:
25551 case ISD::UINT_TO_FP:
25552 case ISD::FP_TO_SINT:
25553 case ISD::FP_TO_UINT:
25554 // TODO: Allow more opcodes?
25555 // case ISD::BITCAST:
25556 // case ISD::TRUNCATE:
25557 // case ISD::ZERO_EXTEND:
25558 // case ISD::SIGN_EXTEND:
25559 // case ISD::FP_EXTEND:
25560 break;
25561 default:
25562 return SDValue();
25563 }
25564
25565 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
25566 if (!SrcVT.isVector())
25567 return SDValue();
25568
25569 // All operands of the concat must be the same kind of cast from the same
25570 // source type.
25572 for (SDValue Op : N->ops()) {
25573 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
25574 Op.getOperand(0).getValueType() != SrcVT)
25575 return SDValue();
25576 SrcOps.push_back(Op.getOperand(0));
25577 }
25578
25579 // The wider cast must be supported by the target. This is unusual because
25580 // the operation support type parameter depends on the opcode. In addition,
25581 // check the other type in the cast to make sure this is really legal.
25582 EVT VT = N->getValueType(0);
25583 EVT SrcEltVT = SrcVT.getVectorElementType();
25584 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
25585 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
25586 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25587 switch (CastOpcode) {
25588 case ISD::SINT_TO_FP:
25589 case ISD::UINT_TO_FP:
25590 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
25591 !TLI.isTypeLegal(VT))
25592 return SDValue();
25593 break;
25594 case ISD::FP_TO_SINT:
25595 case ISD::FP_TO_UINT:
25596 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
25597 !TLI.isTypeLegal(ConcatSrcVT))
25598 return SDValue();
25599 break;
25600 default:
25601 llvm_unreachable("Unexpected cast opcode");
25602 }
25603
25604 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
25605 SDLoc DL(N);
25606 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
25607 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
25608}
25609
25610// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
25611// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
25612// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
25614 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25615 bool LegalOperations) {
25616 EVT VT = N->getValueType(0);
25617 EVT OpVT = N->getOperand(0).getValueType();
25618 if (VT.isScalableVector())
25619 return SDValue();
25620
25621 // For now, only allow simple 2-operand concatenations.
25622 if (N->getNumOperands() != 2)
25623 return SDValue();
25624
25625 // Don't create illegal types/shuffles when not allowed to.
25626 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25627 (LegalOperations &&
25629 return SDValue();
25630
25631 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
25632 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
25633 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
25634 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
25635 // (4) and for now, the SHUFFLE_VECTOR must be unary.
25636 ShuffleVectorSDNode *SVN = nullptr;
25637 for (SDValue Op : N->ops()) {
25638 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
25639 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
25640 all_of(N->ops(), [CurSVN](SDValue Op) {
25641 // FIXME: can we allow UNDEF operands?
25642 return !Op.isUndef() &&
25643 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
25644 })) {
25645 SVN = CurSVN;
25646 break;
25647 }
25648 }
25649 if (!SVN)
25650 return SDValue();
25651
25652 // We are going to pad the shuffle operands, so any indice, that was picking
25653 // from the second operand, must be adjusted.
25654 SmallVector<int, 16> AdjustedMask(SVN->getMask());
25655 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
25656
25657 // Identity masks for the operands of the (padded) shuffle.
25658 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
25659 MutableArrayRef<int> FirstShufOpIdentityMask =
25660 MutableArrayRef<int>(IdentityMask)
25662 MutableArrayRef<int> SecondShufOpIdentityMask =
25664 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
25665 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
25667
25668 // New combined shuffle mask.
25670 Mask.reserve(VT.getVectorNumElements());
25671 for (SDValue Op : N->ops()) {
25672 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
25673 if (Op.getNode() == SVN) {
25674 append_range(Mask, AdjustedMask);
25675 continue;
25676 }
25677 if (Op == SVN->getOperand(0)) {
25678 append_range(Mask, FirstShufOpIdentityMask);
25679 continue;
25680 }
25681 if (Op == SVN->getOperand(1)) {
25682 append_range(Mask, SecondShufOpIdentityMask);
25683 continue;
25684 }
25685 llvm_unreachable("Unexpected operand!");
25686 }
25687
25688 // Don't create illegal shuffle masks.
25689 if (!TLI.isShuffleMaskLegal(Mask, VT))
25690 return SDValue();
25691
25692 // Pad the shuffle operands with UNDEF.
25693 SDLoc dl(N);
25694 std::array<SDValue, 2> ShufOps;
25695 for (auto I : zip(SVN->ops(), ShufOps)) {
25696 SDValue ShufOp = std::get<0>(I);
25697 SDValue &NewShufOp = std::get<1>(I);
25698 if (ShufOp.isUndef())
25699 NewShufOp = DAG.getUNDEF(VT);
25700 else {
25701 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
25702 DAG.getUNDEF(OpVT));
25703 ShufOpParts[0] = ShufOp;
25704 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
25705 }
25706 }
25707 // Finally, create the new wide shuffle.
25708 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
25709}
25710
25712 const TargetLowering &TLI,
25713 bool LegalTypes,
25714 bool LegalOperations) {
25715 EVT VT = N->getValueType(0);
25716
25717 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
25718 // the type and operation is legal. The Hexagon target has custom
25719 // legalization for SPLAT_VECTOR that splits the operation into two parts and
25720 // concatenates them. Therefore, custom lowering must also be rejected in
25721 // order to avoid an infinite loop.
25722 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25723 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25724 return SDValue();
25725
25726 SDValue Op0 = N->getOperand(0);
25727 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25728 return SDValue();
25729
25730 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25731}
25732
25733SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
25734 // If we only have one input vector, we don't need to do any concatenation.
25735 if (N->getNumOperands() == 1)
25736 return N->getOperand(0);
25737
25738 // Check if all of the operands are undefs.
25739 EVT VT = N->getValueType(0);
25741 return DAG.getUNDEF(VT);
25742
25743 // Optimize concat_vectors where all but the first of the vectors are undef.
25744 if (all_of(drop_begin(N->ops()),
25745 [](const SDValue &Op) { return Op.isUndef(); })) {
25746 SDValue In = N->getOperand(0);
25747 assert(In.getValueType().isVector() && "Must concat vectors");
25748
25749 // If the input is a concat_vectors, just make a larger concat by padding
25750 // with smaller undefs.
25751 //
25752 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
25753 // here could cause an infinite loop. That legalizing happens when LegalDAG
25754 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
25755 // scalable.
25756 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
25757 !(LegalDAG && In.getValueType().isScalableVector())) {
25758 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
25760 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
25761 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25762 }
25763
25765
25766 // concat_vectors(scalar_to_vector(scalar), undef) ->
25767 // scalar_to_vector(scalar)
25768 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25769 Scalar.hasOneUse()) {
25770 EVT SVT = Scalar.getValueType().getVectorElementType();
25771 if (SVT == Scalar.getOperand(0).getValueType())
25772 Scalar = Scalar.getOperand(0);
25773 }
25774
25775 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
25776 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
25777 // If the bitcast type isn't legal, it might be a trunc of a legal type;
25778 // look through the trunc so we can still do the transform:
25779 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
25780 if (Scalar->getOpcode() == ISD::TRUNCATE &&
25781 !TLI.isTypeLegal(Scalar.getValueType()) &&
25782 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
25783 Scalar = Scalar->getOperand(0);
25784
25785 EVT SclTy = Scalar.getValueType();
25786
25787 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
25788 return SDValue();
25789
25790 // Bail out if the vector size is not a multiple of the scalar size.
25791 if (VT.getSizeInBits() % SclTy.getSizeInBits())
25792 return SDValue();
25793
25794 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
25795 if (VNTNumElms < 2)
25796 return SDValue();
25797
25798 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
25799 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
25800 return SDValue();
25801
25802 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
25803 return DAG.getBitcast(VT, Res);
25804 }
25805 }
25806
25807 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
25808 // We have already tested above for an UNDEF only concatenation.
25809 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
25810 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
25811 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
25812 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
25813 };
25814 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
25816 EVT SVT = VT.getScalarType();
25817
25818 EVT MinVT = SVT;
25819 if (!SVT.isFloatingPoint()) {
25820 // If BUILD_VECTOR are from built from integer, they may have different
25821 // operand types. Get the smallest type and truncate all operands to it.
25822 bool FoundMinVT = false;
25823 for (const SDValue &Op : N->ops())
25824 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25825 EVT OpSVT = Op.getOperand(0).getValueType();
25826 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
25827 FoundMinVT = true;
25828 }
25829 assert(FoundMinVT && "Concat vector type mismatch");
25830 }
25831
25832 for (const SDValue &Op : N->ops()) {
25833 EVT OpVT = Op.getValueType();
25834 unsigned NumElts = OpVT.getVectorNumElements();
25835
25836 if (Op.isUndef())
25837 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
25838
25839 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
25840 if (SVT.isFloatingPoint()) {
25841 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
25842 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
25843 } else {
25844 for (unsigned i = 0; i != NumElts; ++i)
25845 Opnds.push_back(
25846 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
25847 }
25848 }
25849 }
25850
25851 assert(VT.getVectorNumElements() == Opnds.size() &&
25852 "Concat vector type mismatch");
25853 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
25854 }
25855
25856 if (SDValue V =
25857 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
25858 return V;
25859
25860 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
25861 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
25863 return V;
25864
25865 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
25866 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
25868 return V;
25869
25870 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
25872 return V;
25873 }
25874
25875 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
25876 return V;
25877
25879 N, DAG, TLI, LegalTypes, LegalOperations))
25880 return V;
25881
25882 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
25883 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
25884 // operands and look for a CONCAT operations that place the incoming vectors
25885 // at the exact same location.
25886 //
25887 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
25888 SDValue SingleSource = SDValue();
25889 unsigned PartNumElem =
25890 N->getOperand(0).getValueType().getVectorMinNumElements();
25891
25892 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
25893 SDValue Op = N->getOperand(i);
25894
25895 if (Op.isUndef())
25896 continue;
25897
25898 // Check if this is the identity extract:
25899 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25900 return SDValue();
25901
25902 // Find the single incoming vector for the extract_subvector.
25903 if (SingleSource.getNode()) {
25904 if (Op.getOperand(0) != SingleSource)
25905 return SDValue();
25906 } else {
25907 SingleSource = Op.getOperand(0);
25908
25909 // Check the source type is the same as the type of the result.
25910 // If not, this concat may extend the vector, so we can not
25911 // optimize it away.
25912 if (SingleSource.getValueType() != N->getValueType(0))
25913 return SDValue();
25914 }
25915
25916 // Check that we are reading from the identity index.
25917 unsigned IdentityIndex = i * PartNumElem;
25918 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
25919 return SDValue();
25920 }
25921
25922 if (SingleSource.getNode())
25923 return SingleSource;
25924
25925 return SDValue();
25926}
25927
25928SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
25929 // Check to see if all operands are identical.
25930 if (!llvm::all_equal(N->op_values()))
25931 return SDValue();
25932
25933 // Check to see if the identical operand is a splat.
25934 if (!DAG.isSplatValue(N->getOperand(0)))
25935 return SDValue();
25936
25937 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
25939 Ops.append(N->op_values().begin(), N->op_values().end());
25940 return CombineTo(N, &Ops);
25941}
25942
25943// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
25944// if the subvector can be sourced for free.
25945static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
25946 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
25947 V.getOperand(1).getValueType() == SubVT &&
25948 V.getConstantOperandAPInt(2) == Index) {
25949 return V.getOperand(1);
25950 }
25951 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
25952 V.getOperand(0).getValueType() == SubVT &&
25953 (Index % SubVT.getVectorMinNumElements()) == 0) {
25954 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
25955 return V.getOperand(SubIdx);
25956 }
25957 return SDValue();
25958}
25959
25961 unsigned Index, const SDLoc &DL,
25962 SelectionDAG &DAG,
25963 bool LegalOperations) {
25964 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25965 unsigned BinOpcode = BinOp.getOpcode();
25966 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
25967 return SDValue();
25968
25969 EVT VecVT = BinOp.getValueType();
25970 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
25971 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
25972 return SDValue();
25973 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
25974 return SDValue();
25975
25976 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
25977 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
25978
25979 // TODO: We could handle the case where only 1 operand is being inserted by
25980 // creating an extract of the other operand, but that requires checking
25981 // number of uses and/or costs.
25982 if (!Sub0 || !Sub1)
25983 return SDValue();
25984
25985 // We are inserting both operands of the wide binop only to extract back
25986 // to the narrow vector size. Eliminate all of the insert/extract:
25987 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
25988 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
25989}
25990
25991/// If we are extracting a subvector produced by a wide binary operator try
25992/// to use a narrow binary operator and/or avoid concatenation and extraction.
25993static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
25994 const SDLoc &DL, SelectionDAG &DAG,
25995 bool LegalOperations) {
25996 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
25997 // some of these bailouts with other transforms.
25998
25999 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
26000 LegalOperations))
26001 return V;
26002
26003 // We are looking for an optionally bitcasted wide vector binary operator
26004 // feeding an extract subvector.
26005 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26006 SDValue BinOp = peekThroughBitcasts(Src);
26007 unsigned BOpcode = BinOp.getOpcode();
26008 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
26009 return SDValue();
26010
26011 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
26012 // reduced to the unary fneg when it is visited, and we probably want to deal
26013 // with fneg in a target-specific way.
26014 if (BOpcode == ISD::FSUB) {
26015 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
26016 if (C && C->getValueAPF().isNegZero())
26017 return SDValue();
26018 }
26019
26020 // The binop must be a vector type, so we can extract some fraction of it.
26021 EVT WideBVT = BinOp.getValueType();
26022 // The optimisations below currently assume we are dealing with fixed length
26023 // vectors. It is possible to add support for scalable vectors, but at the
26024 // moment we've done no analysis to prove whether they are profitable or not.
26025 if (!WideBVT.isFixedLengthVector())
26026 return SDValue();
26027
26028 assert((Index % VT.getVectorNumElements()) == 0 &&
26029 "Extract index is not a multiple of the vector length.");
26030
26031 // Bail out if this is not a proper multiple width extraction.
26032 unsigned WideWidth = WideBVT.getSizeInBits();
26033 unsigned NarrowWidth = VT.getSizeInBits();
26034 if (WideWidth % NarrowWidth != 0)
26035 return SDValue();
26036
26037 // Bail out if we are extracting a fraction of a single operation. This can
26038 // occur because we potentially looked through a bitcast of the binop.
26039 unsigned NarrowingRatio = WideWidth / NarrowWidth;
26040 unsigned WideNumElts = WideBVT.getVectorNumElements();
26041 if (WideNumElts % NarrowingRatio != 0)
26042 return SDValue();
26043
26044 // Bail out if the target does not support a narrower version of the binop.
26045 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
26046 WideNumElts / NarrowingRatio);
26047 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
26048 LegalOperations))
26049 return SDValue();
26050
26051 // If extraction is cheap, we don't need to look at the binop operands
26052 // for concat ops. The narrow binop alone makes this transform profitable.
26053 // We can't just reuse the original extract index operand because we may have
26054 // bitcasted.
26055 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
26056 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
26057 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
26058 BinOp.hasOneUse() && Src->hasOneUse()) {
26059 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
26060 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
26061 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26062 BinOp.getOperand(0), NewExtIndex);
26063 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26064 BinOp.getOperand(1), NewExtIndex);
26065 SDValue NarrowBinOp =
26066 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
26067 return DAG.getBitcast(VT, NarrowBinOp);
26068 }
26069
26070 // Only handle the case where we are doubling and then halving. A larger ratio
26071 // may require more than two narrow binops to replace the wide binop.
26072 if (NarrowingRatio != 2)
26073 return SDValue();
26074
26075 // TODO: The motivating case for this transform is an x86 AVX1 target. That
26076 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
26077 // flavors, but no other 256-bit integer support. This could be extended to
26078 // handle any binop, but that may require fixing/adding other folds to avoid
26079 // codegen regressions.
26080 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
26081 return SDValue();
26082
26083 // We need at least one concatenation operation of a binop operand to make
26084 // this transform worthwhile. The concat must double the input vector sizes.
26085 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
26086 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
26087 return V.getOperand(ConcatOpNum);
26088 return SDValue();
26089 };
26090 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
26091 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
26092
26093 if (SubVecL || SubVecR) {
26094 // If a binop operand was not the result of a concat, we must extract a
26095 // half-sized operand for our new narrow binop:
26096 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
26097 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
26098 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
26099 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
26100 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
26101 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26102 BinOp.getOperand(0), IndexC);
26103
26104 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
26105 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26106 BinOp.getOperand(1), IndexC);
26107
26108 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
26109 return DAG.getBitcast(VT, NarrowBinOp);
26110 }
26111
26112 return SDValue();
26113}
26114
26115/// If we are extracting a subvector from a wide vector load, convert to a
26116/// narrow load to eliminate the extraction:
26117/// (extract_subvector (load wide vector)) --> (load narrow vector)
26118static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
26119 const SDLoc &DL, SelectionDAG &DAG) {
26120 // TODO: Add support for big-endian. The offset calculation must be adjusted.
26121 if (DAG.getDataLayout().isBigEndian())
26122 return SDValue();
26123
26124 auto *Ld = dyn_cast<LoadSDNode>(Src);
26125 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
26126 return SDValue();
26127
26128 // We can only create byte sized loads.
26129 if (!VT.isByteSized())
26130 return SDValue();
26131
26132 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26133 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, VT))
26134 return SDValue();
26135
26136 unsigned NumElts = VT.getVectorMinNumElements();
26137 // A fixed length vector being extracted from a scalable vector
26138 // may not be any *smaller* than the scalable one.
26139 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
26140 return SDValue();
26141
26142 // The definition of EXTRACT_SUBVECTOR states that the index must be a
26143 // multiple of the minimum number of elements in the result type.
26144 assert(Index % NumElts == 0 && "The extract subvector index is not a "
26145 "multiple of the result's element count");
26146
26147 // It's fine to use TypeSize here as we know the offset will not be negative.
26148 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
26149 std::optional<unsigned> ByteOffset;
26150 if (Offset.isFixed())
26151 ByteOffset = Offset.getFixedValue();
26152
26153 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
26154 return SDValue();
26155
26156 // The narrow load will be offset from the base address of the old load if
26157 // we are extracting from something besides index 0 (little-endian).
26158 // TODO: Use "BaseIndexOffset" to make this more effective.
26159 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
26160
26162 MachineMemOperand *MMO;
26163 if (Offset.isScalable()) {
26164 MachinePointerInfo MPI =
26166 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
26167 } else
26168 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
26169 VT.getStoreSize());
26170
26171 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
26172 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
26173 return NewLd;
26174}
26175
26176/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
26177/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
26178/// EXTRACT_SUBVECTOR(Op?, ?),
26179/// Mask'))
26180/// iff it is legal and profitable to do so. Notably, the trimmed mask
26181/// (containing only the elements that are extracted)
26182/// must reference at most two subvectors.
26184 unsigned Index,
26185 const SDLoc &DL,
26186 SelectionDAG &DAG,
26187 bool LegalOperations) {
26188 // Only deal with non-scalable vectors.
26189 EVT WideVT = Src.getValueType();
26190 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
26191 return SDValue();
26192
26193 // The operand must be a shufflevector.
26194 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
26195 if (!WideShuffleVector)
26196 return SDValue();
26197
26198 // The old shuffleneeds to go away.
26199 if (!WideShuffleVector->hasOneUse())
26200 return SDValue();
26201
26202 // And the narrow shufflevector that we'll form must be legal.
26203 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26204 if (LegalOperations &&
26206 return SDValue();
26207
26208 int NumEltsExtracted = NarrowVT.getVectorNumElements();
26209 assert((Index % NumEltsExtracted) == 0 &&
26210 "Extract index is not a multiple of the output vector length.");
26211
26212 int WideNumElts = WideVT.getVectorNumElements();
26213
26214 SmallVector<int, 16> NewMask;
26215 NewMask.reserve(NumEltsExtracted);
26216 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
26217 DemandedSubvectors;
26218
26219 // Try to decode the wide mask into narrow mask from at most two subvectors.
26220 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
26221 assert((M >= -1) && (M < (2 * WideNumElts)) &&
26222 "Out-of-bounds shuffle mask?");
26223
26224 if (M < 0) {
26225 // Does not depend on operands, does not require adjustment.
26226 NewMask.emplace_back(M);
26227 continue;
26228 }
26229
26230 // From which operand of the shuffle does this shuffle mask element pick?
26231 int WideShufOpIdx = M / WideNumElts;
26232 // Which element of that operand is picked?
26233 int OpEltIdx = M % WideNumElts;
26234
26235 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
26236 "Shuffle mask vector decomposition failure.");
26237
26238 // And which NumEltsExtracted-sized subvector of that operand is that?
26239 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
26240 // And which element within that subvector of that operand is that?
26241 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
26242
26243 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
26244 "Shuffle mask subvector decomposition failure.");
26245
26246 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
26247 WideShufOpIdx * WideNumElts) == M &&
26248 "Shuffle mask full decomposition failure.");
26249
26250 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
26251
26252 if (Op.isUndef()) {
26253 // Picking from an undef operand. Let's adjust mask instead.
26254 NewMask.emplace_back(-1);
26255 continue;
26256 }
26257
26258 const std::pair<SDValue, int> DemandedSubvector =
26259 std::make_pair(Op, OpSubvecIdx);
26260
26261 if (DemandedSubvectors.insert(DemandedSubvector)) {
26262 if (DemandedSubvectors.size() > 2)
26263 return SDValue(); // We can't handle more than two subvectors.
26264 // How many elements into the WideVT does this subvector start?
26265 int Index = NumEltsExtracted * OpSubvecIdx;
26266 // Bail out if the extraction isn't going to be cheap.
26267 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
26268 return SDValue();
26269 }
26270
26271 // Ok, but from which operand of the new shuffle will this element pick?
26272 int NewOpIdx =
26273 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
26274 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
26275
26276 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
26277 NewMask.emplace_back(AdjM);
26278 }
26279 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
26280 assert(DemandedSubvectors.size() <= 2 &&
26281 "Should have ended up demanding at most two subvectors.");
26282
26283 // Did we discover that the shuffle does not actually depend on operands?
26284 if (DemandedSubvectors.empty())
26285 return DAG.getUNDEF(NarrowVT);
26286
26287 // Profitability check: only deal with extractions from the first subvector
26288 // unless the mask becomes an identity mask.
26289 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
26290 any_of(NewMask, [](int M) { return M < 0; }))
26291 for (auto &DemandedSubvector : DemandedSubvectors)
26292 if (DemandedSubvector.second != 0)
26293 return SDValue();
26294
26295 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
26296 // operand[s]/index[es], so there is no point in checking for it's legality.
26297
26298 // Do not turn a legal shuffle into an illegal one.
26299 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
26300 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
26301 return SDValue();
26302
26304 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
26305 &DemandedSubvector : DemandedSubvectors) {
26306 // How many elements into the WideVT does this subvector start?
26307 int Index = NumEltsExtracted * DemandedSubvector.second;
26308 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
26309 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
26310 DemandedSubvector.first, IndexC));
26311 }
26312 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
26313 "Should end up with either one or two ops");
26314
26315 // If we ended up with only one operand, pad with an undef.
26316 if (NewOps.size() == 1)
26317 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
26318
26319 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
26320}
26321
26322SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26323 EVT NVT = N->getValueType(0);
26324 SDValue V = N->getOperand(0);
26325 uint64_t ExtIdx = N->getConstantOperandVal(1);
26326 SDLoc DL(N);
26327
26328 // Extract from UNDEF is UNDEF.
26329 if (V.isUndef())
26330 return DAG.getUNDEF(NVT);
26331
26332 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
26333 return NarrowLoad;
26334
26335 // Combine an extract of an extract into a single extract_subvector.
26336 // ext (ext X, C), 0 --> ext X, C
26337 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26338 // The index has to be a multiple of the new result type's known minimum
26339 // vector length.
26340 if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
26341 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
26342 V.getConstantOperandVal(1)) &&
26344 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
26345 V.getOperand(1));
26346 }
26347 }
26348
26349 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26350 if (V.getOpcode() == ISD::SPLAT_VECTOR)
26351 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
26352 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
26353 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
26354
26355 // extract_subvector(insert_subvector(x,y,c1),c2)
26356 // --> extract_subvector(y,c2-c1)
26357 // iff we're just extracting from the inserted subvector.
26358 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26359 SDValue InsSub = V.getOperand(1);
26360 EVT InsSubVT = InsSub.getValueType();
26361 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26362 unsigned InsIdx = V.getConstantOperandVal(2);
26363 unsigned NumSubElts = NVT.getVectorMinNumElements();
26364 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26365 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
26366 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26367 V.getValueType().isFixedLengthVector())
26368 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
26369 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
26370 }
26371
26372 // Try to move vector bitcast after extract_subv by scaling extraction index:
26373 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26374 if (V.getOpcode() == ISD::BITCAST &&
26375 V.getOperand(0).getValueType().isVector() &&
26376 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
26377 SDValue SrcOp = V.getOperand(0);
26378 EVT SrcVT = SrcOp.getValueType();
26379 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26380 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26381 if ((SrcNumElts % DestNumElts) == 0) {
26382 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26383 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26384 EVT NewExtVT =
26385 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
26387 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
26388 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26389 V.getOperand(0), NewIndex);
26390 return DAG.getBitcast(NVT, NewExtract);
26391 }
26392 }
26393 if ((DestNumElts % SrcNumElts) == 0) {
26394 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
26395 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
26396 ElementCount NewExtEC =
26397 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
26398 EVT ScalarVT = SrcVT.getScalarType();
26399 if ((ExtIdx % DestSrcRatio) == 0) {
26400 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
26401 EVT NewExtVT =
26402 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
26404 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26405 SDValue NewExtract =
26406 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26407 V.getOperand(0), NewIndex);
26408 return DAG.getBitcast(NVT, NewExtract);
26409 }
26410 if (NewExtEC.isScalar() &&
26412 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26413 SDValue NewExtract =
26414 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
26415 V.getOperand(0), NewIndex);
26416 return DAG.getBitcast(NVT, NewExtract);
26417 }
26418 }
26419 }
26420 }
26421 }
26422
26423 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
26424 unsigned ExtNumElts = NVT.getVectorMinNumElements();
26425 EVT ConcatSrcVT = V.getOperand(0).getValueType();
26426 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
26427 "Concat and extract subvector do not change element type");
26428
26429 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
26430 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
26431
26432 // If the concatenated source types match this extract, it's a direct
26433 // simplification:
26434 // extract_subvec (concat V1, V2, ...), i --> Vi
26435 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
26436 return V.getOperand(ConcatOpIdx);
26437
26438 // If the concatenated source vectors are a multiple length of this extract,
26439 // then extract a fraction of one of those source vectors directly from a
26440 // concat operand. Example:
26441 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
26442 // v2i8 extract_subvec v8i8 Y, 6
26443 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
26444 ConcatSrcNumElts % ExtNumElts == 0) {
26445 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
26446 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
26447 "Trying to extract from >1 concat operand?");
26448 assert(NewExtIdx % ExtNumElts == 0 &&
26449 "Extract index is not a multiple of the input vector length.");
26450 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
26451 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
26452 V.getOperand(ConcatOpIdx), NewIndexC);
26453 }
26454 }
26455
26457 NVT, V, ExtIdx, DL, DAG, LegalOperations))
26458 return Shuffle;
26459
26460 if (SDValue NarrowBOp =
26461 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
26462 return NarrowBOp;
26463
26465
26466 // If the input is a build vector. Try to make a smaller build vector.
26467 if (V.getOpcode() == ISD::BUILD_VECTOR) {
26468 EVT InVT = V.getValueType();
26469 unsigned ExtractSize = NVT.getSizeInBits();
26470 unsigned EltSize = InVT.getScalarSizeInBits();
26471 // Only do this if we won't split any elements.
26472 if (ExtractSize % EltSize == 0) {
26473 unsigned NumElems = ExtractSize / EltSize;
26474 EVT EltVT = InVT.getVectorElementType();
26475 EVT ExtractVT =
26476 NumElems == 1 ? EltVT
26477 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
26478 if ((Level < AfterLegalizeDAG ||
26479 (NumElems == 1 ||
26480 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
26481 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
26482 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
26483
26484 if (NumElems == 1) {
26485 SDValue Src = V->getOperand(IdxVal);
26486 if (EltVT != Src.getValueType())
26487 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
26488 return DAG.getBitcast(NVT, Src);
26489 }
26490
26491 // Extract the pieces from the original build_vector.
26492 SDValue BuildVec =
26493 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
26494 return DAG.getBitcast(NVT, BuildVec);
26495 }
26496 }
26497 }
26498
26499 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26500 // Handle only simple case where vector being inserted and vector
26501 // being extracted are of same size.
26502 EVT SmallVT = V.getOperand(1).getValueType();
26503 if (NVT.bitsEq(SmallVT)) {
26504 // Combine:
26505 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
26506 // Into:
26507 // indices are equal or bit offsets are equal => V1
26508 // otherwise => (extract_subvec V1, ExtIdx)
26509 uint64_t InsIdx = V.getConstantOperandVal(2);
26510 if (InsIdx * SmallVT.getScalarSizeInBits() ==
26511 ExtIdx * NVT.getScalarSizeInBits()) {
26512 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
26513 return DAG.getBitcast(NVT, V.getOperand(1));
26514 } else {
26515 return DAG.getNode(
26517 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
26518 N->getOperand(1));
26519 }
26520 }
26521 }
26522
26523 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
26524 // simplify it based on the (valid) extractions.
26525 if (!V.getValueType().isScalableVector() &&
26526 llvm::all_of(V->users(), [&](SDNode *Use) {
26527 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26528 Use->getOperand(0) == V;
26529 })) {
26530 unsigned NumElts = V.getValueType().getVectorNumElements();
26531 APInt DemandedElts = APInt::getZero(NumElts);
26532 for (SDNode *User : V->users()) {
26533 unsigned ExtIdx = User->getConstantOperandVal(1);
26534 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
26535 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
26536 }
26537 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
26538 // We simplified the vector operand of this extract subvector. If this
26539 // extract is not dead, visit it again so it is folded properly.
26540 if (N->getOpcode() != ISD::DELETED_NODE)
26541 AddToWorklist(N);
26542 return SDValue(N, 0);
26543 }
26544 } else {
26546 return SDValue(N, 0);
26547 }
26548
26549 return SDValue();
26550}
26551
26552/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
26553/// followed by concatenation. Narrow vector ops may have better performance
26554/// than wide ops, and this can unlock further narrowing of other vector ops.
26555/// Targets can invert this transform later if it is not profitable.
26557 SelectionDAG &DAG) {
26558 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
26559 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
26560 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
26561 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
26562 return SDValue();
26563
26564 // Split the wide shuffle mask into halves. Any mask element that is accessing
26565 // operand 1 is offset down to account for narrowing of the vectors.
26566 ArrayRef<int> Mask = Shuf->getMask();
26567 EVT VT = Shuf->getValueType(0);
26568 unsigned NumElts = VT.getVectorNumElements();
26569 unsigned HalfNumElts = NumElts / 2;
26570 SmallVector<int, 16> Mask0(HalfNumElts, -1);
26571 SmallVector<int, 16> Mask1(HalfNumElts, -1);
26572 for (unsigned i = 0; i != NumElts; ++i) {
26573 if (Mask[i] == -1)
26574 continue;
26575 // If we reference the upper (undef) subvector then the element is undef.
26576 if ((Mask[i] % NumElts) >= HalfNumElts)
26577 continue;
26578 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
26579 if (i < HalfNumElts)
26580 Mask0[i] = M;
26581 else
26582 Mask1[i - HalfNumElts] = M;
26583 }
26584
26585 // Ask the target if this is a valid transform.
26586 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26587 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
26588 HalfNumElts);
26589 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
26590 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
26591 return SDValue();
26592
26593 // shuffle (concat X, undef), (concat Y, undef), Mask -->
26594 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
26595 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
26596 SDLoc DL(Shuf);
26597 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
26598 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
26599 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
26600}
26601
26602// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
26603// or turn a shuffle of a single concat into simpler shuffle then concat.
26605 EVT VT = N->getValueType(0);
26606 unsigned NumElts = VT.getVectorNumElements();
26607
26608 SDValue N0 = N->getOperand(0);
26609 SDValue N1 = N->getOperand(1);
26611 ArrayRef<int> Mask = SVN->getMask();
26612
26614 EVT ConcatVT = N0.getOperand(0).getValueType();
26615 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
26616 unsigned NumConcats = NumElts / NumElemsPerConcat;
26617
26618 auto IsUndefMaskElt = [](int i) { return i == -1; };
26619
26620 // Special case: shuffle(concat(A,B)) can be more efficiently represented
26621 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
26622 // half vector elements.
26623 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
26624 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
26625 IsUndefMaskElt)) {
26626 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
26627 N0.getOperand(1),
26628 Mask.slice(0, NumElemsPerConcat));
26629 N1 = DAG.getUNDEF(ConcatVT);
26630 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
26631 }
26632
26633 // Look at every vector that's inserted. We're looking for exact
26634 // subvector-sized copies from a concatenated vector
26635 for (unsigned I = 0; I != NumConcats; ++I) {
26636 unsigned Begin = I * NumElemsPerConcat;
26637 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
26638
26639 // Make sure we're dealing with a copy.
26640 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
26641 Ops.push_back(DAG.getUNDEF(ConcatVT));
26642 continue;
26643 }
26644
26645 int OpIdx = -1;
26646 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
26647 if (IsUndefMaskElt(SubMask[i]))
26648 continue;
26649 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
26650 return SDValue();
26651 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
26652 if (0 <= OpIdx && EltOpIdx != OpIdx)
26653 return SDValue();
26654 OpIdx = EltOpIdx;
26655 }
26656 assert(0 <= OpIdx && "Unknown concat_vectors op");
26657
26658 if (OpIdx < (int)N0.getNumOperands())
26659 Ops.push_back(N0.getOperand(OpIdx));
26660 else
26661 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
26662 }
26663
26664 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26665}
26666
26667// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26668// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26669//
26670// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
26671// a simplification in some sense, but it isn't appropriate in general: some
26672// BUILD_VECTORs are substantially cheaper than others. The general case
26673// of a BUILD_VECTOR requires inserting each element individually (or
26674// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
26675// all constants is a single constant pool load. A BUILD_VECTOR where each
26676// element is identical is a splat. A BUILD_VECTOR where most of the operands
26677// are undef lowers to a small number of element insertions.
26678//
26679// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
26680// We don't fold shuffles where one side is a non-zero constant, and we don't
26681// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
26682// non-constant operands. This seems to work out reasonably well in practice.
26684 SelectionDAG &DAG,
26685 const TargetLowering &TLI) {
26686 EVT VT = SVN->getValueType(0);
26687 unsigned NumElts = VT.getVectorNumElements();
26688 SDValue N0 = SVN->getOperand(0);
26689 SDValue N1 = SVN->getOperand(1);
26690
26691 if (!N0->hasOneUse())
26692 return SDValue();
26693
26694 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
26695 // discussed above.
26696 if (!N1.isUndef()) {
26697 if (!N1->hasOneUse())
26698 return SDValue();
26699
26700 bool N0AnyConst = isAnyConstantBuildVector(N0);
26701 bool N1AnyConst = isAnyConstantBuildVector(N1);
26702 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
26703 return SDValue();
26704 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
26705 return SDValue();
26706 }
26707
26708 // If both inputs are splats of the same value then we can safely merge this
26709 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
26710 bool IsSplat = false;
26711 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
26712 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
26713 if (BV0 && BV1)
26714 if (SDValue Splat0 = BV0->getSplatValue())
26715 IsSplat = (Splat0 == BV1->getSplatValue());
26716
26718 SmallSet<SDValue, 16> DuplicateOps;
26719 for (int M : SVN->getMask()) {
26720 SDValue Op = DAG.getUNDEF(VT.getScalarType());
26721 if (M >= 0) {
26722 int Idx = M < (int)NumElts ? M : M - NumElts;
26723 SDValue &S = (M < (int)NumElts ? N0 : N1);
26724 if (S.getOpcode() == ISD::BUILD_VECTOR) {
26725 Op = S.getOperand(Idx);
26726 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
26727 SDValue Op0 = S.getOperand(0);
26728 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
26729 } else {
26730 // Operand can't be combined - bail out.
26731 return SDValue();
26732 }
26733 }
26734
26735 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
26736 // generating a splat; semantically, this is fine, but it's likely to
26737 // generate low-quality code if the target can't reconstruct an appropriate
26738 // shuffle.
26739 if (!Op.isUndef() && !isIntOrFPConstant(Op))
26740 if (!IsSplat && !DuplicateOps.insert(Op).second)
26741 return SDValue();
26742
26743 Ops.push_back(Op);
26744 }
26745
26746 // BUILD_VECTOR requires all inputs to be of the same type, find the
26747 // maximum type and extend them all.
26748 EVT SVT = VT.getScalarType();
26749 if (SVT.isInteger())
26750 for (SDValue &Op : Ops)
26751 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
26752 if (SVT != VT.getScalarType())
26753 for (SDValue &Op : Ops)
26754 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
26755 : (TLI.isZExtFree(Op.getValueType(), SVT)
26756 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
26757 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
26758 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
26759}
26760
26761// Match shuffles that can be converted to *_vector_extend_in_reg.
26762// This is often generated during legalization.
26763// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
26764// and returns the EVT to which the extension should be performed.
26765// NOTE: this assumes that the src is the first operand of the shuffle.
26767 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
26768 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26769 bool LegalOperations) {
26770 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26771
26772 // TODO Add support for big-endian when we have a test case.
26773 if (!VT.isInteger() || IsBigEndian)
26774 return std::nullopt;
26775
26776 unsigned NumElts = VT.getVectorNumElements();
26777 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26778
26779 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
26780 // power-of-2 extensions as they are the most likely.
26781 // FIXME: should try Scale == NumElts case too,
26782 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
26783 // The vector width must be a multiple of Scale.
26784 if (NumElts % Scale != 0)
26785 continue;
26786
26787 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
26788 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
26789
26790 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
26791 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
26792 continue;
26793
26794 if (Match(Scale))
26795 return OutVT;
26796 }
26797
26798 return std::nullopt;
26799}
26800
26801// Match shuffles that can be converted to any_vector_extend_in_reg.
26802// This is often generated during legalization.
26803// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
26805 SelectionDAG &DAG,
26806 const TargetLowering &TLI,
26807 bool LegalOperations) {
26808 EVT VT = SVN->getValueType(0);
26809 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26810
26811 // TODO Add support for big-endian when we have a test case.
26812 if (!VT.isInteger() || IsBigEndian)
26813 return SDValue();
26814
26815 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
26816 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
26817 Mask = SVN->getMask()](unsigned Scale) {
26818 for (unsigned i = 0; i != NumElts; ++i) {
26819 if (Mask[i] < 0)
26820 continue;
26821 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
26822 continue;
26823 return false;
26824 }
26825 return true;
26826 };
26827
26828 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
26829 SDValue N0 = SVN->getOperand(0);
26830 // Never create an illegal type. Only create unsupported operations if we
26831 // are pre-legalization.
26832 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26833 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
26834 if (!OutVT)
26835 return SDValue();
26836 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
26837}
26838
26839// Match shuffles that can be converted to zero_extend_vector_inreg.
26840// This is often generated during legalization.
26841// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
26843 SelectionDAG &DAG,
26844 const TargetLowering &TLI,
26845 bool LegalOperations) {
26846 bool LegalTypes = true;
26847 EVT VT = SVN->getValueType(0);
26848 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
26849 unsigned NumElts = VT.getVectorNumElements();
26850 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26851
26852 // TODO: add support for big-endian when we have a test case.
26853 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26854 if (!VT.isInteger() || IsBigEndian)
26855 return SDValue();
26856
26857 SmallVector<int, 16> Mask(SVN->getMask());
26858 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
26859 for (int &Indice : Mask) {
26860 if (Indice < 0)
26861 continue;
26862 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
26863 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
26864 Fn(Indice, OpIdx, OpEltIdx);
26865 }
26866 };
26867
26868 // Which elements of which operand does this shuffle demand?
26869 std::array<APInt, 2> OpsDemandedElts;
26870 for (APInt &OpDemandedElts : OpsDemandedElts)
26871 OpDemandedElts = APInt::getZero(NumElts);
26872 ForEachDecomposedIndice(
26873 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
26874 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
26875 });
26876
26877 // Element-wise(!), which of these demanded elements are know to be zero?
26878 std::array<APInt, 2> OpsKnownZeroElts;
26879 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
26880 std::get<2>(I) =
26881 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
26882
26883 // Manifest zeroable element knowledge in the shuffle mask.
26884 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
26885 // this is a local invention, but it won't leak into DAG.
26886 // FIXME: should we not manifest them, but just check when matching?
26887 bool HadZeroableElts = false;
26888 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
26889 int &Indice, int OpIdx, int OpEltIdx) {
26890 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
26891 Indice = -2; // Zeroable element.
26892 HadZeroableElts = true;
26893 }
26894 });
26895
26896 // Don't proceed unless we've refined at least one zeroable mask indice.
26897 // If we didn't, then we are still trying to match the same shuffle mask
26898 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
26899 // and evidently failed. Proceeding will lead to endless combine loops.
26900 if (!HadZeroableElts)
26901 return SDValue();
26902
26903 // The shuffle may be more fine-grained than we want. Widen elements first.
26904 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
26905 SmallVector<int, 16> ScaledMask;
26906 getShuffleMaskWithWidestElts(Mask, ScaledMask);
26907 assert(Mask.size() >= ScaledMask.size() &&
26908 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
26909 int Prescale = Mask.size() / ScaledMask.size();
26910
26911 NumElts = ScaledMask.size();
26912 EltSizeInBits *= Prescale;
26913
26914 EVT PrescaledVT = EVT::getVectorVT(
26915 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
26916 NumElts);
26917
26918 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
26919 return SDValue();
26920
26921 // For example,
26922 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
26923 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
26924 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
26925 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
26926 "Unexpected mask scaling factor.");
26927 ArrayRef<int> Mask = ScaledMask;
26928 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
26929 SrcElt != NumSrcElts; ++SrcElt) {
26930 // Analyze the shuffle mask in Scale-sized chunks.
26931 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
26932 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
26933 Mask = Mask.drop_front(MaskChunk.size());
26934 // The first indice in this chunk must be SrcElt, but not zero!
26935 // FIXME: undef should be fine, but that results in more-defined result.
26936 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
26937 return false;
26938 // The rest of the indices in this chunk must be zeros.
26939 // FIXME: undef should be fine, but that results in more-defined result.
26940 if (!all_of(MaskChunk.drop_front(1),
26941 [](int Indice) { return Indice == -2; }))
26942 return false;
26943 }
26944 assert(Mask.empty() && "Did not process the whole mask?");
26945 return true;
26946 };
26947
26948 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
26949 for (bool Commuted : {false, true}) {
26950 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
26951 if (Commuted)
26953 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
26954 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
26955 LegalOperations);
26956 if (OutVT)
26957 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
26958 DAG.getBitcast(PrescaledVT, Op)));
26959 }
26960 return SDValue();
26961}
26962
26963// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
26964// each source element of a large type into the lowest elements of a smaller
26965// destination type. This is often generated during legalization.
26966// If the source node itself was a '*_extend_vector_inreg' node then we should
26967// then be able to remove it.
26969 SelectionDAG &DAG) {
26970 EVT VT = SVN->getValueType(0);
26971 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26972
26973 // TODO Add support for big-endian when we have a test case.
26974 if (!VT.isInteger() || IsBigEndian)
26975 return SDValue();
26976
26978
26979 unsigned Opcode = N0.getOpcode();
26980 if (!ISD::isExtVecInRegOpcode(Opcode))
26981 return SDValue();
26982
26983 SDValue N00 = N0.getOperand(0);
26984 ArrayRef<int> Mask = SVN->getMask();
26985 unsigned NumElts = VT.getVectorNumElements();
26986 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26987 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
26988 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
26989
26990 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
26991 return SDValue();
26992 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
26993
26994 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
26995 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
26996 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
26997 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
26998 for (unsigned i = 0; i != NumElts; ++i) {
26999 if (Mask[i] < 0)
27000 continue;
27001 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
27002 continue;
27003 return false;
27004 }
27005 return true;
27006 };
27007
27008 // At the moment we just handle the case where we've truncated back to the
27009 // same size as before the extension.
27010 // TODO: handle more extension/truncation cases as cases arise.
27011 if (EltSizeInBits != ExtSrcSizeInBits)
27012 return SDValue();
27013 if (VT.getSizeInBits() != N00.getValueSizeInBits())
27014 return SDValue();
27015
27016 // We can remove *extend_vector_inreg only if the truncation happens at
27017 // the same scale as the extension.
27018 if (isTruncate(ExtScale))
27019 return DAG.getBitcast(VT, N00);
27020
27021 return SDValue();
27022}
27023
27024// Combine shuffles of splat-shuffles of the form:
27025// shuffle (shuffle V, undef, splat-mask), undef, M
27026// If splat-mask contains undef elements, we need to be careful about
27027// introducing undef's in the folded mask which are not the result of composing
27028// the masks of the shuffles.
27030 SelectionDAG &DAG) {
27031 EVT VT = Shuf->getValueType(0);
27032 unsigned NumElts = VT.getVectorNumElements();
27033
27034 if (!Shuf->getOperand(1).isUndef())
27035 return SDValue();
27036
27037 // See if this unary non-splat shuffle actually *is* a splat shuffle,
27038 // in disguise, with all demanded elements being identical.
27039 // FIXME: this can be done per-operand.
27040 if (!Shuf->isSplat()) {
27041 APInt DemandedElts(NumElts, 0);
27042 for (int Idx : Shuf->getMask()) {
27043 if (Idx < 0)
27044 continue; // Ignore sentinel indices.
27045 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
27046 DemandedElts.setBit(Idx);
27047 }
27048 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
27049 APInt UndefElts;
27050 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
27051 // Even if all demanded elements are splat, some of them could be undef.
27052 // Which lowest demanded element is *not* known-undef?
27053 std::optional<unsigned> MinNonUndefIdx;
27054 for (int Idx : Shuf->getMask()) {
27055 if (Idx < 0 || UndefElts[Idx])
27056 continue; // Ignore sentinel indices, and undef elements.
27057 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
27058 }
27059 if (!MinNonUndefIdx)
27060 return DAG.getUNDEF(VT); // All undef - result is undef.
27061 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
27062 SmallVector<int, 8> SplatMask(Shuf->getMask());
27063 for (int &Idx : SplatMask) {
27064 if (Idx < 0)
27065 continue; // Passthrough sentinel indices.
27066 // Otherwise, just pick the lowest demanded non-undef element.
27067 // Or sentinel undef, if we know we'd pick a known-undef element.
27068 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
27069 }
27070 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
27071 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
27072 Shuf->getOperand(1), SplatMask);
27073 }
27074 }
27075
27076 // If the inner operand is a known splat with no undefs, just return that directly.
27077 // TODO: Create DemandedElts mask from Shuf's mask.
27078 // TODO: Allow undef elements and merge with the shuffle code below.
27079 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
27080 return Shuf->getOperand(0);
27081
27083 if (!Splat || !Splat->isSplat())
27084 return SDValue();
27085
27086 ArrayRef<int> ShufMask = Shuf->getMask();
27087 ArrayRef<int> SplatMask = Splat->getMask();
27088 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
27089
27090 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
27091 // every undef mask element in the splat-shuffle has a corresponding undef
27092 // element in the user-shuffle's mask or if the composition of mask elements
27093 // would result in undef.
27094 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
27095 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
27096 // In this case it is not legal to simplify to the splat-shuffle because we
27097 // may be exposing the users of the shuffle an undef element at index 1
27098 // which was not there before the combine.
27099 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
27100 // In this case the composition of masks yields SplatMask, so it's ok to
27101 // simplify to the splat-shuffle.
27102 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
27103 // In this case the composed mask includes all undef elements of SplatMask
27104 // and in addition sets element zero to undef. It is safe to simplify to
27105 // the splat-shuffle.
27106 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
27107 ArrayRef<int> SplatMask) {
27108 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
27109 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
27110 SplatMask[UserMask[i]] != -1)
27111 return false;
27112 return true;
27113 };
27114 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
27115 return Shuf->getOperand(0);
27116
27117 // Create a new shuffle with a mask that is composed of the two shuffles'
27118 // masks.
27119 SmallVector<int, 32> NewMask;
27120 for (int Idx : ShufMask)
27121 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
27122
27123 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
27124 Splat->getOperand(0), Splat->getOperand(1),
27125 NewMask);
27126}
27127
27128// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
27129// the mask can be treated as a larger type.
27131 SelectionDAG &DAG,
27132 const TargetLowering &TLI,
27133 bool LegalOperations) {
27134 SDValue Op0 = SVN->getOperand(0);
27135 SDValue Op1 = SVN->getOperand(1);
27136 EVT VT = SVN->getValueType(0);
27137 if (Op0.getOpcode() != ISD::BITCAST)
27138 return SDValue();
27139 EVT InVT = Op0.getOperand(0).getValueType();
27140 if (!InVT.isVector() ||
27141 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
27142 Op1.getOperand(0).getValueType() != InVT)))
27143 return SDValue();
27145 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
27146 return SDValue();
27147
27148 int VTLanes = VT.getVectorNumElements();
27149 int InLanes = InVT.getVectorNumElements();
27150 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
27151 (LegalOperations &&
27153 return SDValue();
27154 int Factor = VTLanes / InLanes;
27155
27156 // Check that each group of lanes in the mask are either undef or make a valid
27157 // mask for the wider lane type.
27158 ArrayRef<int> Mask = SVN->getMask();
27159 SmallVector<int> NewMask;
27160 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
27161 return SDValue();
27162
27163 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
27164 return SDValue();
27165
27166 // Create the new shuffle with the new mask and bitcast it back to the
27167 // original type.
27168 SDLoc DL(SVN);
27169 Op0 = Op0.getOperand(0);
27170 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
27171 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
27172 return DAG.getBitcast(VT, NewShuf);
27173}
27174
27175/// Combine shuffle of shuffle of the form:
27176/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
27178 SelectionDAG &DAG) {
27179 if (!OuterShuf->getOperand(1).isUndef())
27180 return SDValue();
27181 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
27182 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
27183 return SDValue();
27184
27185 ArrayRef<int> OuterMask = OuterShuf->getMask();
27186 ArrayRef<int> InnerMask = InnerShuf->getMask();
27187 unsigned NumElts = OuterMask.size();
27188 assert(NumElts == InnerMask.size() && "Mask length mismatch");
27189 SmallVector<int, 32> CombinedMask(NumElts, -1);
27190 int SplatIndex = -1;
27191 for (unsigned i = 0; i != NumElts; ++i) {
27192 // Undef lanes remain undef.
27193 int OuterMaskElt = OuterMask[i];
27194 if (OuterMaskElt == -1)
27195 continue;
27196
27197 // Peek through the shuffle masks to get the underlying source element.
27198 int InnerMaskElt = InnerMask[OuterMaskElt];
27199 if (InnerMaskElt == -1)
27200 continue;
27201
27202 // Initialize the splatted element.
27203 if (SplatIndex == -1)
27204 SplatIndex = InnerMaskElt;
27205
27206 // Non-matching index - this is not a splat.
27207 if (SplatIndex != InnerMaskElt)
27208 return SDValue();
27209
27210 CombinedMask[i] = InnerMaskElt;
27211 }
27212 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
27213 getSplatIndex(CombinedMask) != -1) &&
27214 "Expected a splat mask");
27215
27216 // TODO: The transform may be a win even if the mask is not legal.
27217 EVT VT = OuterShuf->getValueType(0);
27218 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
27219 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
27220 return SDValue();
27221
27222 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
27223 InnerShuf->getOperand(1), CombinedMask);
27224}
27225
27226/// If the shuffle mask is taking exactly one element from the first vector
27227/// operand and passing through all other elements from the second vector
27228/// operand, return the index of the mask element that is choosing an element
27229/// from the first operand. Otherwise, return -1.
27231 int MaskSize = Mask.size();
27232 int EltFromOp0 = -1;
27233 // TODO: This does not match if there are undef elements in the shuffle mask.
27234 // Should we ignore undefs in the shuffle mask instead? The trade-off is
27235 // removing an instruction (a shuffle), but losing the knowledge that some
27236 // vector lanes are not needed.
27237 for (int i = 0; i != MaskSize; ++i) {
27238 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
27239 // We're looking for a shuffle of exactly one element from operand 0.
27240 if (EltFromOp0 != -1)
27241 return -1;
27242 EltFromOp0 = i;
27243 } else if (Mask[i] != i + MaskSize) {
27244 // Nothing from operand 1 can change lanes.
27245 return -1;
27246 }
27247 }
27248 return EltFromOp0;
27249}
27250
27251/// If a shuffle inserts exactly one element from a source vector operand into
27252/// another vector operand and we can access the specified element as a scalar,
27253/// then we can eliminate the shuffle.
27254SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
27255 // First, check if we are taking one element of a vector and shuffling that
27256 // element into another vector.
27257 ArrayRef<int> Mask = Shuf->getMask();
27258 SmallVector<int, 16> CommutedMask(Mask);
27259 SDValue Op0 = Shuf->getOperand(0);
27260 SDValue Op1 = Shuf->getOperand(1);
27261 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
27262 if (ShufOp0Index == -1) {
27263 // Commute mask and check again.
27265 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
27266 if (ShufOp0Index == -1)
27267 return SDValue();
27268 // Commute operands to match the commuted shuffle mask.
27269 std::swap(Op0, Op1);
27270 Mask = CommutedMask;
27271 }
27272
27273 // The shuffle inserts exactly one element from operand 0 into operand 1.
27274 // Now see if we can access that element as a scalar via a real insert element
27275 // instruction.
27276 // TODO: We can try harder to locate the element as a scalar. Examples: it
27277 // could be an operand of BUILD_VECTOR, or a constant.
27278 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
27279 "Shuffle mask value must be from operand 0");
27280
27281 SDValue Elt;
27282 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
27283 m_SpecificInt(Mask[ShufOp0Index])))) {
27284 // There's an existing insertelement with constant insertion index, so we
27285 // don't need to check the legality/profitability of a replacement operation
27286 // that differs at most in the constant value. The target should be able to
27287 // lower any of those in a similar way. If not, legalization will expand
27288 // this to a scalar-to-vector plus shuffle.
27289 //
27290 // Note that the shuffle may move the scalar from the position that the
27291 // insert element used. Therefore, our new insert element occurs at the
27292 // shuffle's mask index value, not the insert's index value.
27293 //
27294 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
27295 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27296 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27297 Op1, Elt, NewInsIndex);
27298 }
27299
27300 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
27301 return SDValue();
27302
27304 Mask[ShufOp0Index] == 0) {
27305 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27306 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27307 Op1, Elt, NewInsIndex);
27308 }
27309
27310 return SDValue();
27311}
27312
27313/// If we have a unary shuffle of a shuffle, see if it can be folded away
27314/// completely. This has the potential to lose undef knowledge because the first
27315/// shuffle may not have an undef mask element where the second one does. So
27316/// only call this after doing simplifications based on demanded elements.
27318 // shuf (shuf0 X, Y, Mask0), undef, Mask
27319 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
27320 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
27321 return SDValue();
27322
27323 ArrayRef<int> Mask = Shuf->getMask();
27324 ArrayRef<int> Mask0 = Shuf0->getMask();
27325 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
27326 // Ignore undef elements.
27327 if (Mask[i] == -1)
27328 continue;
27329 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
27330
27331 // Is the element of the shuffle operand chosen by this shuffle the same as
27332 // the element chosen by the shuffle operand itself?
27333 if (Mask0[Mask[i]] != Mask0[i])
27334 return SDValue();
27335 }
27336 // Every element of this shuffle is identical to the result of the previous
27337 // shuffle, so we can replace this value.
27338 return Shuf->getOperand(0);
27339}
27340
27341SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27342 EVT VT = N->getValueType(0);
27343 unsigned NumElts = VT.getVectorNumElements();
27344
27345 SDValue N0 = N->getOperand(0);
27346 SDValue N1 = N->getOperand(1);
27347
27348 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27349
27350 // Canonicalize shuffle undef, undef -> undef
27351 if (N0.isUndef() && N1.isUndef())
27352 return DAG.getUNDEF(VT);
27353
27354 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
27355
27356 // Canonicalize shuffle v, v -> v, undef
27357 if (N0 == N1)
27358 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
27359 createUnaryMask(SVN->getMask(), NumElts));
27360
27361 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27362 if (N0.isUndef())
27363 return DAG.getCommutedVectorShuffle(*SVN);
27364
27365 // Remove references to rhs if it is undef
27366 if (N1.isUndef()) {
27367 bool Changed = false;
27368 SmallVector<int, 8> NewMask;
27369 for (unsigned i = 0; i != NumElts; ++i) {
27370 int Idx = SVN->getMaskElt(i);
27371 if (Idx >= (int)NumElts) {
27372 Idx = -1;
27373 Changed = true;
27374 }
27375 NewMask.push_back(Idx);
27376 }
27377 if (Changed)
27378 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
27379 }
27380
27381 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
27382 return InsElt;
27383
27384 // A shuffle of a single vector that is a splatted value can always be folded.
27385 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
27386 return V;
27387
27388 if (SDValue V = formSplatFromShuffles(SVN, DAG))
27389 return V;
27390
27391 // If it is a splat, check if the argument vector is another splat or a
27392 // build_vector.
27393 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
27394 int SplatIndex = SVN->getSplatIndex();
27395 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
27396 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
27397 // splat (vector_bo L, R), Index -->
27398 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
27399 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
27400 SDLoc DL(N);
27401 EVT EltVT = VT.getScalarType();
27402 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
27403 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
27404 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
27405 SDValue NewBO =
27406 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
27407 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
27408 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
27409 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
27410 }
27411
27412 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
27413 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
27414 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
27415 N0.hasOneUse()) {
27416 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
27417 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
27418
27420 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
27421 if (Idx->getAPIntValue() == SplatIndex)
27422 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
27423
27424 // Look through a bitcast if LE and splatting lane 0, through to a
27425 // scalar_to_vector or a build_vector.
27426 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
27427 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
27430 EVT N00VT = N0.getOperand(0).getValueType();
27431 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
27432 VT.isInteger() && N00VT.isInteger()) {
27433 EVT InVT =
27436 SDLoc(N), InVT);
27437 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
27438 }
27439 }
27440 }
27441
27442 // If this is a bit convert that changes the element type of the vector but
27443 // not the number of vector elements, look through it. Be careful not to
27444 // look though conversions that change things like v4f32 to v2f64.
27445 SDNode *V = N0.getNode();
27446 if (V->getOpcode() == ISD::BITCAST) {
27447 SDValue ConvInput = V->getOperand(0);
27448 if (ConvInput.getValueType().isVector() &&
27449 ConvInput.getValueType().getVectorNumElements() == NumElts)
27450 V = ConvInput.getNode();
27451 }
27452
27453 if (V->getOpcode() == ISD::BUILD_VECTOR) {
27454 assert(V->getNumOperands() == NumElts &&
27455 "BUILD_VECTOR has wrong number of operands");
27456 SDValue Base;
27457 bool AllSame = true;
27458 for (unsigned i = 0; i != NumElts; ++i) {
27459 if (!V->getOperand(i).isUndef()) {
27460 Base = V->getOperand(i);
27461 break;
27462 }
27463 }
27464 // Splat of <u, u, u, u>, return <u, u, u, u>
27465 if (!Base.getNode())
27466 return N0;
27467 for (unsigned i = 0; i != NumElts; ++i) {
27468 if (V->getOperand(i) != Base) {
27469 AllSame = false;
27470 break;
27471 }
27472 }
27473 // Splat of <x, x, x, x>, return <x, x, x, x>
27474 if (AllSame)
27475 return N0;
27476
27477 // Canonicalize any other splat as a build_vector, but avoid defining any
27478 // undefined elements in the mask.
27479 SDValue Splatted = V->getOperand(SplatIndex);
27480 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
27481 EVT EltVT = Splatted.getValueType();
27482
27483 for (unsigned i = 0; i != NumElts; ++i) {
27484 if (SVN->getMaskElt(i) < 0)
27485 Ops[i] = DAG.getUNDEF(EltVT);
27486 }
27487
27488 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
27489
27490 // We may have jumped through bitcasts, so the type of the
27491 // BUILD_VECTOR may not match the type of the shuffle.
27492 if (V->getValueType(0) != VT)
27493 NewBV = DAG.getBitcast(VT, NewBV);
27494 return NewBV;
27495 }
27496 }
27497
27498 // Simplify source operands based on shuffle mask.
27500 return SDValue(N, 0);
27501
27502 // This is intentionally placed after demanded elements simplification because
27503 // it could eliminate knowledge of undef elements created by this shuffle.
27504 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
27505 return ShufOp;
27506
27507 // Match shuffles that can be converted to any_vector_extend_in_reg.
27508 if (SDValue V =
27509 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
27510 return V;
27511
27512 // Combine "truncate_vector_in_reg" style shuffles.
27513 if (SDValue V = combineTruncationShuffle(SVN, DAG))
27514 return V;
27515
27516 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
27517 Level < AfterLegalizeVectorOps &&
27518 (N1.isUndef() ||
27519 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
27520 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
27521 if (SDValue V = partitionShuffleOfConcats(N, DAG))
27522 return V;
27523 }
27524
27525 // A shuffle of a concat of the same narrow vector can be reduced to use
27526 // only low-half elements of a concat with undef:
27527 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
27528 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
27529 N0.getNumOperands() == 2 &&
27530 N0.getOperand(0) == N0.getOperand(1)) {
27531 int HalfNumElts = (int)NumElts / 2;
27532 SmallVector<int, 8> NewMask;
27533 for (unsigned i = 0; i != NumElts; ++i) {
27534 int Idx = SVN->getMaskElt(i);
27535 if (Idx >= HalfNumElts) {
27536 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
27537 Idx -= HalfNumElts;
27538 }
27539 NewMask.push_back(Idx);
27540 }
27541 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
27542 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
27543 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
27544 N0.getOperand(0), UndefVec);
27545 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
27546 }
27547 }
27548
27549 // See if we can replace a shuffle with an insert_subvector.
27550 // e.g. v2i32 into v8i32:
27551 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
27552 // --> insert_subvector(lhs,rhs1,4).
27553 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
27555 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
27556 // Ensure RHS subvectors are legal.
27557 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
27558 EVT SubVT = RHS.getOperand(0).getValueType();
27559 int NumSubVecs = RHS.getNumOperands();
27560 int NumSubElts = SubVT.getVectorNumElements();
27561 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
27562 if (!TLI.isTypeLegal(SubVT))
27563 return SDValue();
27564
27565 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
27566 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
27567 return SDValue();
27568
27569 // Search [NumSubElts] spans for RHS sequence.
27570 // TODO: Can we avoid nested loops to increase performance?
27571 SmallVector<int> InsertionMask(NumElts);
27572 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
27573 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
27574 // Reset mask to identity.
27575 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
27576
27577 // Add subvector insertion.
27578 std::iota(InsertionMask.begin() + SubIdx,
27579 InsertionMask.begin() + SubIdx + NumSubElts,
27580 NumElts + (SubVec * NumSubElts));
27581
27582 // See if the shuffle mask matches the reference insertion mask.
27583 bool MatchingShuffle = true;
27584 for (int i = 0; i != (int)NumElts; ++i) {
27585 int ExpectIdx = InsertionMask[i];
27586 int ActualIdx = Mask[i];
27587 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
27588 MatchingShuffle = false;
27589 break;
27590 }
27591 }
27592
27593 if (MatchingShuffle)
27594 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
27595 SubIdx);
27596 }
27597 }
27598 return SDValue();
27599 };
27600 ArrayRef<int> Mask = SVN->getMask();
27601 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
27602 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
27603 return InsertN1;
27604 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
27605 SmallVector<int> CommuteMask(Mask);
27607 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
27608 return InsertN0;
27609 }
27610 }
27611
27612 // If we're not performing a select/blend shuffle, see if we can convert the
27613 // shuffle into a AND node, with all the out-of-lane elements are known zero.
27614 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27615 bool IsInLaneMask = true;
27616 ArrayRef<int> Mask = SVN->getMask();
27617 SmallVector<int, 16> ClearMask(NumElts, -1);
27618 APInt DemandedLHS = APInt::getZero(NumElts);
27619 APInt DemandedRHS = APInt::getZero(NumElts);
27620 for (int I = 0; I != (int)NumElts; ++I) {
27621 int M = Mask[I];
27622 if (M < 0)
27623 continue;
27624 ClearMask[I] = M == I ? I : (I + NumElts);
27625 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
27626 if (M != I) {
27627 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
27628 Demanded.setBit(M % NumElts);
27629 }
27630 }
27631 // TODO: Should we try to mask with N1 as well?
27632 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
27633 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
27634 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
27635 SDLoc DL(N);
27636 EVT IntVT = VT.changeVectorElementTypeToInteger();
27637 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
27638 // Transform the type to a legal type so that the buildvector constant
27639 // elements are not illegal. Make sure that the result is larger than the
27640 // original type, incase the value is split into two (eg i64->i32).
27641 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
27642 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
27643 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
27644 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
27645 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
27646 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
27647 for (int I = 0; I != (int)NumElts; ++I)
27648 if (0 <= Mask[I])
27649 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
27650
27651 // See if a clear mask is legal instead of going via
27652 // XformToShuffleWithZero which loses UNDEF mask elements.
27653 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
27654 return DAG.getBitcast(
27655 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
27656 DAG.getConstant(0, DL, IntVT), ClearMask));
27657
27658 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
27659 return DAG.getBitcast(
27660 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
27661 DAG.getBuildVector(IntVT, DL, AndMask)));
27662 }
27663 }
27664 }
27665
27666 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27667 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27668 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
27669 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
27670 return Res;
27671
27672 // If this shuffle only has a single input that is a bitcasted shuffle,
27673 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
27674 // back to their original types.
27675 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
27676 N1.isUndef() && Level < AfterLegalizeVectorOps &&
27677 TLI.isTypeLegal(VT)) {
27678
27680 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
27681 EVT SVT = VT.getScalarType();
27682 EVT InnerVT = BC0->getValueType(0);
27683 EVT InnerSVT = InnerVT.getScalarType();
27684
27685 // Determine which shuffle works with the smaller scalar type.
27686 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
27687 EVT ScaleSVT = ScaleVT.getScalarType();
27688
27689 if (TLI.isTypeLegal(ScaleVT) &&
27690 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
27691 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
27692 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27693 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27694
27695 // Scale the shuffle masks to the smaller scalar type.
27696 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
27697 SmallVector<int, 8> InnerMask;
27698 SmallVector<int, 8> OuterMask;
27699 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
27700 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
27701
27702 // Merge the shuffle masks.
27703 SmallVector<int, 8> NewMask;
27704 for (int M : OuterMask)
27705 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
27706
27707 // Test for shuffle mask legality over both commutations.
27708 SDValue SV0 = BC0->getOperand(0);
27709 SDValue SV1 = BC0->getOperand(1);
27710 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27711 if (!LegalMask) {
27712 std::swap(SV0, SV1);
27714 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27715 }
27716
27717 if (LegalMask) {
27718 SV0 = DAG.getBitcast(ScaleVT, SV0);
27719 SV1 = DAG.getBitcast(ScaleVT, SV1);
27720 return DAG.getBitcast(
27721 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
27722 }
27723 }
27724 }
27725 }
27726
27727 // Match shuffles of bitcasts, so long as the mask can be treated as the
27728 // larger type.
27729 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
27730 return V;
27731
27732 // Compute the combined shuffle mask for a shuffle with SV0 as the first
27733 // operand, and SV1 as the second operand.
27734 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
27735 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
27736 auto MergeInnerShuffle =
27737 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
27738 ShuffleVectorSDNode *OtherSVN, SDValue N1,
27739 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
27740 SmallVectorImpl<int> &Mask) -> bool {
27741 // Don't try to fold splats; they're likely to simplify somehow, or they
27742 // might be free.
27743 if (OtherSVN->isSplat())
27744 return false;
27745
27746 SV0 = SV1 = SDValue();
27747 Mask.clear();
27748
27749 for (unsigned i = 0; i != NumElts; ++i) {
27750 int Idx = SVN->getMaskElt(i);
27751 if (Idx < 0) {
27752 // Propagate Undef.
27753 Mask.push_back(Idx);
27754 continue;
27755 }
27756
27757 if (Commute)
27758 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
27759
27760 SDValue CurrentVec;
27761 if (Idx < (int)NumElts) {
27762 // This shuffle index refers to the inner shuffle N0. Lookup the inner
27763 // shuffle mask to identify which vector is actually referenced.
27764 Idx = OtherSVN->getMaskElt(Idx);
27765 if (Idx < 0) {
27766 // Propagate Undef.
27767 Mask.push_back(Idx);
27768 continue;
27769 }
27770 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
27771 : OtherSVN->getOperand(1);
27772 } else {
27773 // This shuffle index references an element within N1.
27774 CurrentVec = N1;
27775 }
27776
27777 // Simple case where 'CurrentVec' is UNDEF.
27778 if (CurrentVec.isUndef()) {
27779 Mask.push_back(-1);
27780 continue;
27781 }
27782
27783 // Canonicalize the shuffle index. We don't know yet if CurrentVec
27784 // will be the first or second operand of the combined shuffle.
27785 Idx = Idx % NumElts;
27786 if (!SV0.getNode() || SV0 == CurrentVec) {
27787 // Ok. CurrentVec is the left hand side.
27788 // Update the mask accordingly.
27789 SV0 = CurrentVec;
27790 Mask.push_back(Idx);
27791 continue;
27792 }
27793 if (!SV1.getNode() || SV1 == CurrentVec) {
27794 // Ok. CurrentVec is the right hand side.
27795 // Update the mask accordingly.
27796 SV1 = CurrentVec;
27797 Mask.push_back(Idx + NumElts);
27798 continue;
27799 }
27800
27801 // Last chance - see if the vector is another shuffle and if it
27802 // uses one of the existing candidate shuffle ops.
27803 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
27804 int InnerIdx = CurrentSVN->getMaskElt(Idx);
27805 if (InnerIdx < 0) {
27806 Mask.push_back(-1);
27807 continue;
27808 }
27809 SDValue InnerVec = (InnerIdx < (int)NumElts)
27810 ? CurrentSVN->getOperand(0)
27811 : CurrentSVN->getOperand(1);
27812 if (InnerVec.isUndef()) {
27813 Mask.push_back(-1);
27814 continue;
27815 }
27816 InnerIdx %= NumElts;
27817 if (InnerVec == SV0) {
27818 Mask.push_back(InnerIdx);
27819 continue;
27820 }
27821 if (InnerVec == SV1) {
27822 Mask.push_back(InnerIdx + NumElts);
27823 continue;
27824 }
27825 }
27826
27827 // Bail out if we cannot convert the shuffle pair into a single shuffle.
27828 return false;
27829 }
27830
27831 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27832 return true;
27833
27834 // Avoid introducing shuffles with illegal mask.
27835 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27836 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27837 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27838 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
27839 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
27840 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
27841 if (TLI.isShuffleMaskLegal(Mask, VT))
27842 return true;
27843
27844 std::swap(SV0, SV1);
27846 return TLI.isShuffleMaskLegal(Mask, VT);
27847 };
27848
27849 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27850 // Canonicalize shuffles according to rules:
27851 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
27852 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
27853 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
27854 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27856 // The incoming shuffle must be of the same type as the result of the
27857 // current shuffle.
27858 assert(N1->getOperand(0).getValueType() == VT &&
27859 "Shuffle types don't match");
27860
27861 SDValue SV0 = N1->getOperand(0);
27862 SDValue SV1 = N1->getOperand(1);
27863 bool HasSameOp0 = N0 == SV0;
27864 bool IsSV1Undef = SV1.isUndef();
27865 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
27866 // Commute the operands of this shuffle so merging below will trigger.
27867 return DAG.getCommutedVectorShuffle(*SVN);
27868 }
27869
27870 // Canonicalize splat shuffles to the RHS to improve merging below.
27871 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
27872 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
27873 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
27874 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
27875 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
27876 return DAG.getCommutedVectorShuffle(*SVN);
27877 }
27878
27879 // Try to fold according to rules:
27880 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
27881 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
27882 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
27883 // Don't try to fold shuffles with illegal type.
27884 // Only fold if this shuffle is the only user of the other shuffle.
27885 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
27886 for (int i = 0; i != 2; ++i) {
27887 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
27888 N->isOnlyUserOf(N->getOperand(i).getNode())) {
27889 // The incoming shuffle must be of the same type as the result of the
27890 // current shuffle.
27891 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
27892 assert(OtherSV->getOperand(0).getValueType() == VT &&
27893 "Shuffle types don't match");
27894
27895 SDValue SV0, SV1;
27896 SmallVector<int, 4> Mask;
27897 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
27898 SV0, SV1, Mask)) {
27899 // Check if all indices in Mask are Undef. In case, propagate Undef.
27900 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
27901 return DAG.getUNDEF(VT);
27902
27903 return DAG.getVectorShuffle(VT, SDLoc(N),
27904 SV0 ? SV0 : DAG.getUNDEF(VT),
27905 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
27906 }
27907 }
27908 }
27909
27910 // Merge shuffles through binops if we are able to merge it with at least
27911 // one other shuffles.
27912 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
27913 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
27914 unsigned SrcOpcode = N0.getOpcode();
27915 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
27916 (N1.isUndef() ||
27917 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
27918 // Get binop source ops, or just pass on the undef.
27919 SDValue Op00 = N0.getOperand(0);
27920 SDValue Op01 = N0.getOperand(1);
27921 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
27922 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
27923 // TODO: We might be able to relax the VT check but we don't currently
27924 // have any isBinOp() that has different result/ops VTs so play safe until
27925 // we have test coverage.
27926 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
27927 Op01.getValueType() == VT && Op11.getValueType() == VT &&
27928 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
27929 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
27930 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
27931 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
27932 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
27933 SmallVectorImpl<int> &Mask, bool LeftOp,
27934 bool Commute) {
27935 SDValue InnerN = Commute ? N1 : N0;
27936 SDValue Op0 = LeftOp ? Op00 : Op01;
27937 SDValue Op1 = LeftOp ? Op10 : Op11;
27938 if (Commute)
27939 std::swap(Op0, Op1);
27940 // Only accept the merged shuffle if we don't introduce undef elements,
27941 // or the inner shuffle already contained undef elements.
27942 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
27943 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
27944 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
27945 Mask) &&
27946 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
27947 llvm::none_of(Mask, [](int M) { return M < 0; }));
27948 };
27949
27950 // Ensure we don't increase the number of shuffles - we must merge a
27951 // shuffle from at least one of the LHS and RHS ops.
27952 bool MergedLeft = false;
27953 SDValue LeftSV0, LeftSV1;
27954 SmallVector<int, 4> LeftMask;
27955 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
27956 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
27957 MergedLeft = true;
27958 } else {
27959 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27960 LeftSV0 = Op00, LeftSV1 = Op10;
27961 }
27962
27963 bool MergedRight = false;
27964 SDValue RightSV0, RightSV1;
27965 SmallVector<int, 4> RightMask;
27966 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
27967 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
27968 MergedRight = true;
27969 } else {
27970 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
27971 RightSV0 = Op01, RightSV1 = Op11;
27972 }
27973
27974 if (MergedLeft || MergedRight) {
27975 SDLoc DL(N);
27977 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
27978 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
27980 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
27981 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
27982 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
27983 }
27984 }
27985 }
27986 }
27987
27988 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
27989 return V;
27990
27991 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
27992 // Perform this really late, because it could eliminate knowledge
27993 // of undef elements created by this shuffle.
27994 if (Level < AfterLegalizeTypes)
27995 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
27996 LegalOperations))
27997 return V;
27998
27999 return SDValue();
28000}
28001
28002SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
28003 EVT VT = N->getValueType(0);
28004 if (!VT.isFixedLengthVector())
28005 return SDValue();
28006
28007 // Try to convert a scalar binop with an extracted vector element to a vector
28008 // binop. This is intended to reduce potentially expensive register moves.
28009 // TODO: Check if both operands are extracted.
28010 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
28011 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
28012 SDValue Scalar = N->getOperand(0);
28013 unsigned Opcode = Scalar.getOpcode();
28014 EVT VecEltVT = VT.getScalarType();
28015 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
28016 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
28017 Scalar.getOperand(0).getValueType() == VecEltVT &&
28018 Scalar.getOperand(1).getValueType() == VecEltVT &&
28019 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
28020 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
28021 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
28022 // Match an extract element and get a shuffle mask equivalent.
28023 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
28024
28025 for (int i : {0, 1}) {
28026 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
28027 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
28028 SDValue EE = Scalar.getOperand(i);
28029 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
28030 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
28031 EE.getOperand(0).getValueType() == VT &&
28033 // Mask = {ExtractIndex, undef, undef....}
28034 ShufMask[0] = EE.getConstantOperandVal(1);
28035 // Make sure the shuffle is legal if we are crossing lanes.
28036 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
28037 SDLoc DL(N);
28038 SDValue V[] = {EE.getOperand(0),
28039 DAG.getConstant(C->getAPIntValue(), DL, VT)};
28040 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
28041 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
28042 ShufMask);
28043 }
28044 }
28045 }
28046 }
28047
28048 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
28049 // with a VECTOR_SHUFFLE and possible truncate.
28050 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
28051 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
28052 return SDValue();
28053
28054 // If we have an implicit truncate, truncate here if it is legal.
28055 if (VecEltVT != Scalar.getValueType() &&
28056 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
28057 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
28058 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
28059 }
28060
28061 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
28062 if (!ExtIndexC)
28063 return SDValue();
28064
28065 SDValue SrcVec = Scalar.getOperand(0);
28066 EVT SrcVT = SrcVec.getValueType();
28067 unsigned SrcNumElts = SrcVT.getVectorNumElements();
28068 unsigned VTNumElts = VT.getVectorNumElements();
28069 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
28070 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
28071 SmallVector<int, 8> Mask(SrcNumElts, -1);
28072 Mask[0] = ExtIndexC->getZExtValue();
28073 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
28074 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
28075 if (!LegalShuffle)
28076 return SDValue();
28077
28078 // If the initial vector is the same size, the shuffle is the result.
28079 if (VT == SrcVT)
28080 return LegalShuffle;
28081
28082 // If not, shorten the shuffled vector.
28083 if (VTNumElts != SrcNumElts) {
28084 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
28085 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
28086 SrcVT.getVectorElementType(), VTNumElts);
28087 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
28088 ZeroIdx);
28089 }
28090 }
28091
28092 return SDValue();
28093}
28094
28095SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
28096 EVT VT = N->getValueType(0);
28097 SDValue N0 = N->getOperand(0);
28098 SDValue N1 = N->getOperand(1);
28099 SDValue N2 = N->getOperand(2);
28100 uint64_t InsIdx = N->getConstantOperandVal(2);
28101
28102 // Remove insert of UNDEF/POISON.
28103 if (N1.isUndef()) {
28104 if (N1.getOpcode() == ISD::POISON || N0.getOpcode() == ISD::UNDEF)
28105 return N0;
28106 return DAG.getFreeze(N0);
28107 }
28108
28109 // If this is an insert of an extracted vector into an undef/poison vector, we
28110 // can just use the input to the extract if the types match, and can simplify
28111 // in some cases even if they don't.
28112 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
28113 N1.getOperand(1) == N2) {
28114 EVT N1VT = N1.getValueType();
28115 EVT SrcVT = N1.getOperand(0).getValueType();
28116 if (SrcVT == VT) {
28117 // Need to ensure that result isn't more poisonous if skipping both the
28118 // extract+insert.
28119 if (N0.getOpcode() == ISD::POISON)
28120 return N1.getOperand(0);
28121 if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
28122 unsigned SubVecNumElts = N1VT.getVectorNumElements();
28123 APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
28124 InsIdx + SubVecNumElts);
28125 if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
28126 return N1.getOperand(0);
28127 } else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
28128 return N1.getOperand(0);
28129 }
28130 // TODO: To remove the zero check, need to adjust the offset to
28131 // a multiple of the new src type.
28132 if (isNullConstant(N2)) {
28133 if (VT.knownBitsGE(SrcVT) &&
28134 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
28135 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
28136 VT, N0, N1.getOperand(0), N2);
28137 else if (VT.knownBitsLE(SrcVT) &&
28138 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
28139 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
28140 VT, N1.getOperand(0), N2);
28141 }
28142 }
28143
28144 // Handle case where we've ended up inserting back into the source vector
28145 // we extracted the subvector from.
28146 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
28147 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
28148 N1.getOperand(1) == N2)
28149 return N0;
28150
28151 // Simplify scalar inserts into an undef vector:
28152 // insert_subvector undef, (splat X), N2 -> splat X
28153 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
28154 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
28155 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
28156
28157 // insert_subvector (splat X), (splat X), N2 -> splat X
28158 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
28159 N0.getOperand(0) == N1.getOperand(0))
28160 return N0;
28161
28162 // If we are inserting a bitcast value into an undef, with the same
28163 // number of elements, just use the bitcast input of the extract.
28164 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
28165 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
28166 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
28168 N1.getOperand(0).getOperand(1) == N2 &&
28170 VT.getVectorElementCount() &&
28172 VT.getSizeInBits()) {
28173 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
28174 }
28175
28176 // If both N1 and N2 are bitcast values on which insert_subvector
28177 // would makes sense, pull the bitcast through.
28178 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
28179 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
28180 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
28181 SDValue CN0 = N0.getOperand(0);
28182 SDValue CN1 = N1.getOperand(0);
28183 EVT CN0VT = CN0.getValueType();
28184 EVT CN1VT = CN1.getValueType();
28185 if (CN0VT.isVector() && CN1VT.isVector() &&
28186 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
28188 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
28189 CN0.getValueType(), CN0, CN1, N2);
28190 return DAG.getBitcast(VT, NewINSERT);
28191 }
28192 }
28193
28194 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
28195 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
28196 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
28197 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28198 N0.getOperand(1).getValueType() == N1.getValueType() &&
28199 N0.getOperand(2) == N2)
28200 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
28201 N1, N2);
28202
28203 // Eliminate an intermediate insert into an undef vector:
28204 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
28205 // insert_subvector undef, X, 0
28206 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
28207 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
28208 isNullConstant(N2))
28209 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
28210 N1.getOperand(1), N2);
28211
28212 // Push subvector bitcasts to the output, adjusting the index as we go.
28213 // insert_subvector(bitcast(v), bitcast(s), c1)
28214 // -> bitcast(insert_subvector(v, s, c2))
28215 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
28216 N1.getOpcode() == ISD::BITCAST) {
28217 SDValue N0Src = peekThroughBitcasts(N0);
28218 SDValue N1Src = peekThroughBitcasts(N1);
28219 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
28220 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
28221 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
28222 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
28223 EVT NewVT;
28224 SDLoc DL(N);
28225 SDValue NewIdx;
28226 LLVMContext &Ctx = *DAG.getContext();
28227 ElementCount NumElts = VT.getVectorElementCount();
28228 unsigned EltSizeInBits = VT.getScalarSizeInBits();
28229 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
28230 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
28231 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
28232 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
28233 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
28234 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
28235 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
28236 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
28237 NumElts.divideCoefficientBy(Scale));
28238 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
28239 }
28240 }
28241 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
28242 SDValue Res = DAG.getBitcast(NewVT, N0Src);
28243 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
28244 return DAG.getBitcast(VT, Res);
28245 }
28246 }
28247 }
28248
28249 // Canonicalize insert_subvector dag nodes.
28250 // Example:
28251 // (insert_subvector (insert_subvector A, Idx0), Idx1)
28252 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
28253 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
28254 N1.getValueType() == N0.getOperand(1).getValueType()) {
28255 unsigned OtherIdx = N0.getConstantOperandVal(2);
28256 if (InsIdx < OtherIdx) {
28257 // Swap nodes.
28258 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
28259 N0.getOperand(0), N1, N2);
28260 AddToWorklist(NewOp.getNode());
28261 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
28262 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
28263 }
28264 }
28265
28266 // If the input vector is a concatenation, and the insert replaces
28267 // one of the pieces, we can optimize into a single concat_vectors.
28268 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
28269 N0.getOperand(0).getValueType() == N1.getValueType() &&
28272 unsigned Factor = N1.getValueType().getVectorMinNumElements();
28274 Ops[InsIdx / Factor] = N1;
28275 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
28276 }
28277
28278 // Simplify source operands based on insertion.
28280 return SDValue(N, 0);
28281
28282 return SDValue();
28283}
28284
28285SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
28286 SDValue N0 = N->getOperand(0);
28287
28288 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
28289 if (N0->getOpcode() == ISD::FP16_TO_FP)
28290 return N0->getOperand(0);
28291
28292 return SDValue();
28293}
28294
28295SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
28296 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28297 auto Op = N->getOpcode();
28298 assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
28299 "opcode should be FP16_TO_FP or BF16_TO_FP.");
28300 SDValue N0 = N->getOperand(0);
28301
28302 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
28303 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28304 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
28305 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
28306 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
28307 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
28308 }
28309 }
28310
28311 if (SDValue CastEliminated = eliminateFPCastPair(N))
28312 return CastEliminated;
28313
28314 // Sometimes constants manage to survive very late in the pipeline, e.g.,
28315 // because they are wrapped inside the <1 x f16> type. Try one last time to
28316 // get rid of them.
28317 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
28318 N->getValueType(0), {N0});
28319 return Folded;
28320}
28321
28322SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28323 SDValue N0 = N->getOperand(0);
28324
28325 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
28326 if (N0->getOpcode() == ISD::BF16_TO_FP)
28327 return N0->getOperand(0);
28328
28329 return SDValue();
28330}
28331
28332SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28333 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28334 return visitFP16_TO_FP(N);
28335}
28336
28337SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28338 SDValue N0 = N->getOperand(0);
28339 EVT VT = N0.getValueType();
28340 unsigned Opcode = N->getOpcode();
28341
28342 // VECREDUCE over 1-element vector is just an extract.
28343 if (VT.getVectorElementCount().isScalar()) {
28344 SDLoc dl(N);
28345 SDValue Res =
28347 DAG.getVectorIdxConstant(0, dl));
28348 if (Res.getValueType() != N->getValueType(0))
28349 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
28350 return Res;
28351 }
28352
28353 // On an boolean vector an and/or reduction is the same as a umin/umax
28354 // reduction. Convert them if the latter is legal while the former isn't.
28355 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
28356 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28357 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
28358 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
28359 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
28361 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
28362 }
28363
28364 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28365 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28366 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28367 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
28368 SDValue Vec = N0.getOperand(0);
28369 SDValue Subvec = N0.getOperand(1);
28370 if ((Opcode == ISD::VECREDUCE_OR &&
28371 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
28372 (Opcode == ISD::VECREDUCE_AND &&
28373 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
28374 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
28375 }
28376
28377 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28378 // Same for zext and anyext, and for and/or/xor reductions.
28379 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
28380 Opcode == ISD::VECREDUCE_XOR) &&
28381 (N0.getOpcode() == ISD::SIGN_EXTEND ||
28382 N0.getOpcode() == ISD::ZERO_EXTEND ||
28383 N0.getOpcode() == ISD::ANY_EXTEND) &&
28384 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
28385 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
28387 N0.getOperand(0));
28388 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
28389 }
28390 return SDValue();
28391}
28392
28393SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
28394 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28395
28396 // FSUB -> FMA combines:
28397 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
28398 AddToWorklist(Fused.getNode());
28399 return Fused;
28400 }
28401 return SDValue();
28402}
28403
28404SDValue DAGCombiner::visitVPOp(SDNode *N) {
28405
28406 if (N->getOpcode() == ISD::VP_GATHER)
28407 if (SDValue SD = visitVPGATHER(N))
28408 return SD;
28409
28410 if (N->getOpcode() == ISD::VP_SCATTER)
28411 if (SDValue SD = visitVPSCATTER(N))
28412 return SD;
28413
28414 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
28415 if (SDValue SD = visitVP_STRIDED_LOAD(N))
28416 return SD;
28417
28418 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
28419 if (SDValue SD = visitVP_STRIDED_STORE(N))
28420 return SD;
28421
28422 // VP operations in which all vector elements are disabled - either by
28423 // determining that the mask is all false or that the EVL is 0 - can be
28424 // eliminated.
28425 bool AreAllEltsDisabled = false;
28426 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
28427 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
28428 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
28429 AreAllEltsDisabled |=
28430 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
28431
28432 // This is the only generic VP combine we support for now.
28433 if (!AreAllEltsDisabled) {
28434 switch (N->getOpcode()) {
28435 case ISD::VP_FADD:
28436 return visitVP_FADD(N);
28437 case ISD::VP_FSUB:
28438 return visitVP_FSUB(N);
28439 case ISD::VP_FMA:
28440 return visitFMA<VPMatchContext>(N);
28441 case ISD::VP_SELECT:
28442 return visitVP_SELECT(N);
28443 case ISD::VP_MUL:
28444 return visitMUL<VPMatchContext>(N);
28445 case ISD::VP_SUB:
28446 return foldSubCtlzNot<VPMatchContext>(N, DAG);
28447 default:
28448 break;
28449 }
28450 return SDValue();
28451 }
28452
28453 // Binary operations can be replaced by UNDEF.
28454 if (ISD::isVPBinaryOp(N->getOpcode()))
28455 return DAG.getUNDEF(N->getValueType(0));
28456
28457 // VP Memory operations can be replaced by either the chain (stores) or the
28458 // chain + undef (loads).
28459 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
28460 if (MemSD->writeMem())
28461 return MemSD->getChain();
28462 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
28463 }
28464
28465 // Reduction operations return the start operand when no elements are active.
28466 if (ISD::isVPReduction(N->getOpcode()))
28467 return N->getOperand(0);
28468
28469 return SDValue();
28470}
28471
28472SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
28473 SDValue Chain = N->getOperand(0);
28474 SDValue Ptr = N->getOperand(1);
28475 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28476
28477 // Check if the memory, where FP state is written to, is used only in a single
28478 // load operation.
28479 LoadSDNode *LdNode = nullptr;
28480 for (auto *U : Ptr->users()) {
28481 if (U == N)
28482 continue;
28483 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
28484 if (LdNode && LdNode != Ld)
28485 return SDValue();
28486 LdNode = Ld;
28487 continue;
28488 }
28489 return SDValue();
28490 }
28491 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28492 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28494 return SDValue();
28495
28496 // Check if the loaded value is used only in a store operation.
28497 StoreSDNode *StNode = nullptr;
28498 for (SDUse &U : LdNode->uses()) {
28499 if (U.getResNo() == 0) {
28500 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
28501 if (StNode)
28502 return SDValue();
28503 StNode = St;
28504 } else {
28505 return SDValue();
28506 }
28507 }
28508 }
28509 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28510 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28511 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28512 return SDValue();
28513
28514 // Create new node GET_FPENV_MEM, which uses the store address to write FP
28515 // environment.
28516 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
28517 StNode->getMemOperand());
28518 CombineTo(StNode, Res, false);
28519 return Res;
28520}
28521
28522SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
28523 SDValue Chain = N->getOperand(0);
28524 SDValue Ptr = N->getOperand(1);
28525 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28526
28527 // Check if the address of FP state is used also in a store operation only.
28528 StoreSDNode *StNode = nullptr;
28529 for (auto *U : Ptr->users()) {
28530 if (U == N)
28531 continue;
28532 if (auto *St = dyn_cast<StoreSDNode>(U)) {
28533 if (StNode && StNode != St)
28534 return SDValue();
28535 StNode = St;
28536 continue;
28537 }
28538 return SDValue();
28539 }
28540 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28541 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28542 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
28543 return SDValue();
28544
28545 // Check if the stored value is loaded from some location and the loaded
28546 // value is used only in the store operation.
28547 SDValue StValue = StNode->getValue();
28548 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
28549 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28550 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28551 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28552 return SDValue();
28553
28554 // Create new node SET_FPENV_MEM, which uses the load address to read FP
28555 // environment.
28556 SDValue Res =
28557 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
28558 LdNode->getMemOperand());
28559 return Res;
28560}
28561
28562/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
28563/// with the destination vector and a zero vector.
28564/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
28565/// vector_shuffle V, Zero, <0, 4, 2, 4>
28566SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
28567 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
28568
28569 EVT VT = N->getValueType(0);
28570 SDValue LHS = N->getOperand(0);
28571 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
28572 SDLoc DL(N);
28573
28574 // Make sure we're not running after operation legalization where it
28575 // may have custom lowered the vector shuffles.
28576 if (LegalOperations)
28577 return SDValue();
28578
28579 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
28580 return SDValue();
28581
28582 EVT RVT = RHS.getValueType();
28583 unsigned NumElts = RHS.getNumOperands();
28584
28585 // Attempt to create a valid clear mask, splitting the mask into
28586 // sub elements and checking to see if each is
28587 // all zeros or all ones - suitable for shuffle masking.
28588 auto BuildClearMask = [&](int Split) {
28589 int NumSubElts = NumElts * Split;
28590 int NumSubBits = RVT.getScalarSizeInBits() / Split;
28591
28592 SmallVector<int, 8> Indices;
28593 for (int i = 0; i != NumSubElts; ++i) {
28594 int EltIdx = i / Split;
28595 int SubIdx = i % Split;
28596 SDValue Elt = RHS.getOperand(EltIdx);
28597 // X & undef --> 0 (not undef). So this lane must be converted to choose
28598 // from the zero constant vector (same as if the element had all 0-bits).
28599 if (Elt.isUndef()) {
28600 Indices.push_back(i + NumSubElts);
28601 continue;
28602 }
28603
28604 std::optional<APInt> Bits = Elt->bitcastToAPInt();
28605 if (!Bits)
28606 return SDValue();
28607
28608 // Extract the sub element from the constant bit mask.
28609 if (DAG.getDataLayout().isBigEndian())
28610 *Bits =
28611 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
28612 else
28613 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
28614
28615 if (Bits->isAllOnes())
28616 Indices.push_back(i);
28617 else if (*Bits == 0)
28618 Indices.push_back(i + NumSubElts);
28619 else
28620 return SDValue();
28621 }
28622
28623 // Let's see if the target supports this vector_shuffle.
28624 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
28625 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
28626 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
28627 return SDValue();
28628
28629 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
28630 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
28631 DAG.getBitcast(ClearVT, LHS),
28632 Zero, Indices));
28633 };
28634
28635 // Determine maximum split level (byte level masking).
28636 int MaxSplit = 1;
28637 if (RVT.getScalarSizeInBits() % 8 == 0)
28638 MaxSplit = RVT.getScalarSizeInBits() / 8;
28639
28640 for (int Split = 1; Split <= MaxSplit; ++Split)
28641 if (RVT.getScalarSizeInBits() % Split == 0)
28642 if (SDValue S = BuildClearMask(Split))
28643 return S;
28644
28645 return SDValue();
28646}
28647
28648/// If a vector binop is performed on splat values, it may be profitable to
28649/// extract, scalarize, and insert/splat.
28651 const SDLoc &DL, bool LegalTypes) {
28652 SDValue N0 = N->getOperand(0);
28653 SDValue N1 = N->getOperand(1);
28654 unsigned Opcode = N->getOpcode();
28655 EVT VT = N->getValueType(0);
28656 EVT EltVT = VT.getVectorElementType();
28657 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28658
28659 // TODO: Remove/replace the extract cost check? If the elements are available
28660 // as scalars, then there may be no extract cost. Should we ask if
28661 // inserting a scalar back into a vector is cheap instead?
28662 int Index0, Index1;
28663 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28664 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
28665 // Extract element from splat_vector should be free.
28666 // TODO: use DAG.isSplatValue instead?
28667 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
28669 if (!Src0 || !Src1 || Index0 != Index1 ||
28670 Src0.getValueType().getVectorElementType() != EltVT ||
28671 Src1.getValueType().getVectorElementType() != EltVT ||
28672 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
28673 // If before type legalization, allow scalar types that will eventually be
28674 // made legal.
28676 Opcode, LegalTypes
28677 ? EltVT
28678 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
28679 return SDValue();
28680
28681 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
28682 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
28683 return SDValue();
28684
28685 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
28686 // All but one element should have an undef input, which will fold to a
28687 // constant or undef. Avoid splatting which would over-define potentially
28688 // undefined elements.
28689
28690 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
28691 // build_vec ..undef, (bo X, Y), undef...
28692 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
28693 DAG.ExtractVectorElements(Src0, EltsX);
28694 DAG.ExtractVectorElements(Src1, EltsY);
28695
28696 for (auto [X, Y] : zip(EltsX, EltsY))
28697 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
28698 return DAG.getBuildVector(VT, DL, EltsResult);
28699 }
28700
28701 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28702 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
28703 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
28704 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
28705
28706 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
28707 return DAG.getSplat(VT, DL, ScalarBO);
28708}
28709
28710/// Visit a vector cast operation, like FP_EXTEND.
28711SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
28712 EVT VT = N->getValueType(0);
28713 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
28714 EVT EltVT = VT.getVectorElementType();
28715 unsigned Opcode = N->getOpcode();
28716
28717 SDValue N0 = N->getOperand(0);
28718 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28719
28720 // TODO: promote operation might be also good here?
28721 int Index0;
28722 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28723 if (Src0 &&
28724 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
28725 TLI.isExtractVecEltCheap(VT, Index0)) &&
28726 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
28727 TLI.preferScalarizeSplat(N)) {
28728 EVT SrcVT = N0.getValueType();
28729 EVT SrcEltVT = SrcVT.getVectorElementType();
28730 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
28731 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28732 SDValue Elt =
28733 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
28734 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
28735 if (VT.isScalableVector())
28736 return DAG.getSplatVector(VT, DL, ScalarBO);
28738 return DAG.getBuildVector(VT, DL, Ops);
28739 }
28740 }
28741
28742 return SDValue();
28743}
28744
28745/// Visit a binary vector operation, like ADD.
28746SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
28747 EVT VT = N->getValueType(0);
28748 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
28749
28750 SDValue LHS = N->getOperand(0);
28751 SDValue RHS = N->getOperand(1);
28752 unsigned Opcode = N->getOpcode();
28753 SDNodeFlags Flags = N->getFlags();
28754
28755 // Move unary shuffles with identical masks after a vector binop:
28756 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
28757 // --> shuffle (VBinOp A, B), Undef, Mask
28758 // This does not require type legality checks because we are creating the
28759 // same types of operations that are in the original sequence. We do have to
28760 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
28761 // though. This code is adapted from the identical transform in instcombine.
28762 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
28763 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
28764 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
28765 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
28766 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
28767 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
28768 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
28769 RHS.getOperand(0), Flags);
28770 SDValue UndefV = LHS.getOperand(1);
28771 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
28772 }
28773
28774 // Try to sink a splat shuffle after a binop with a uniform constant.
28775 // This is limited to cases where neither the shuffle nor the constant have
28776 // undefined elements because that could be poison-unsafe or inhibit
28777 // demanded elements analysis. It is further limited to not change a splat
28778 // of an inserted scalar because that may be optimized better by
28779 // load-folding or other target-specific behaviors.
28780 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
28781 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
28782 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28783 // binop (splat X), (splat C) --> splat (binop X, C)
28784 SDValue X = Shuf0->getOperand(0);
28785 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
28786 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28787 Shuf0->getMask());
28788 }
28789 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
28790 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
28791 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28792 // binop (splat C), (splat X) --> splat (binop C, X)
28793 SDValue X = Shuf1->getOperand(0);
28794 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
28795 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
28796 Shuf1->getMask());
28797 }
28798 }
28799
28800 // The following pattern is likely to emerge with vector reduction ops. Moving
28801 // the binary operation ahead of insertion may allow using a narrower vector
28802 // instruction that has better performance than the wide version of the op:
28803 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
28804 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
28805 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
28806 LHS.getOperand(2) == RHS.getOperand(2) &&
28807 (LHS.hasOneUse() || RHS.hasOneUse())) {
28808 SDValue X = LHS.getOperand(1);
28809 SDValue Y = RHS.getOperand(1);
28810 SDValue Z = LHS.getOperand(2);
28811 EVT NarrowVT = X.getValueType();
28812 if (NarrowVT == Y.getValueType() &&
28813 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
28814 LegalOperations)) {
28815 // (binop undef, undef) may not return undef, so compute that result.
28816 SDValue VecC =
28817 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
28818 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
28819 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
28820 }
28821 }
28822
28823 // Make sure all but the first op are undef or constant.
28824 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
28825 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
28826 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
28827 return Op.isUndef() ||
28828 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
28829 });
28830 };
28831
28832 // The following pattern is likely to emerge with vector reduction ops. Moving
28833 // the binary operation ahead of the concat may allow using a narrower vector
28834 // instruction that has better performance than the wide version of the op:
28835 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
28836 // concat (VBinOp X, Y), VecC
28837 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
28838 (LHS.hasOneUse() || RHS.hasOneUse())) {
28839 EVT NarrowVT = LHS.getOperand(0).getValueType();
28840 if (NarrowVT == RHS.getOperand(0).getValueType() &&
28841 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
28842 unsigned NumOperands = LHS.getNumOperands();
28843 SmallVector<SDValue, 4> ConcatOps;
28844 for (unsigned i = 0; i != NumOperands; ++i) {
28845 // This constant fold for operands 1 and up.
28846 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
28847 RHS.getOperand(i)));
28848 }
28849
28850 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
28851 }
28852 }
28853
28854 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
28855 return V;
28856
28857 return SDValue();
28858}
28859
28860SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
28861 SDValue N2) {
28862 assert(N0.getOpcode() == ISD::SETCC &&
28863 "First argument must be a SetCC node!");
28864
28865 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
28866 cast<CondCodeSDNode>(N0.getOperand(2))->get());
28867
28868 // If we got a simplified select_cc node back from SimplifySelectCC, then
28869 // break it down into a new SETCC node, and a new SELECT node, and then return
28870 // the SELECT node, since we were called with a SELECT node.
28871 if (SCC.getNode()) {
28872 // Check to see if we got a select_cc back (to turn into setcc/select).
28873 // Otherwise, just return whatever node we got back, like fabs.
28874 if (SCC.getOpcode() == ISD::SELECT_CC) {
28875 const SDNodeFlags Flags = N0->getFlags();
28876 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
28877 N0.getValueType(),
28878 SCC.getOperand(0), SCC.getOperand(1),
28879 SCC.getOperand(4), Flags);
28880 AddToWorklist(SETCC.getNode());
28881 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
28882 SCC.getOperand(2), SCC.getOperand(3), Flags);
28883 }
28884
28885 return SCC;
28886 }
28887 return SDValue();
28888}
28889
28890/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
28891/// being selected between, see if we can simplify the select. Callers of this
28892/// should assume that TheSelect is deleted if this returns true. As such, they
28893/// should return the appropriate thing (e.g. the node) back to the top-level of
28894/// the DAG combiner loop to avoid it being looked at.
28895bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
28896 SDValue RHS) {
28897 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28898 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
28899 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
28900 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
28901 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
28902 SDValue Sqrt = RHS;
28903 ISD::CondCode CC;
28904 SDValue CmpLHS;
28905 const ConstantFPSDNode *Zero = nullptr;
28906
28907 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
28908 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
28909 CmpLHS = TheSelect->getOperand(0);
28910 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
28911 } else {
28912 // SELECT or VSELECT
28913 SDValue Cmp = TheSelect->getOperand(0);
28914 if (Cmp.getOpcode() == ISD::SETCC) {
28915 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
28916 CmpLHS = Cmp.getOperand(0);
28917 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
28918 }
28919 }
28920 if (Zero && Zero->isZero() &&
28921 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
28922 CC == ISD::SETULT || CC == ISD::SETLT)) {
28923 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
28924 CombineTo(TheSelect, Sqrt);
28925 return true;
28926 }
28927 }
28928 }
28929 // Cannot simplify select with vector condition
28930 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
28931
28932 // If this is a select from two identical things, try to pull the operation
28933 // through the select.
28934 if (LHS.getOpcode() != RHS.getOpcode() ||
28935 !LHS.hasOneUse() || !RHS.hasOneUse())
28936 return false;
28937
28938 // If this is a load and the token chain is identical, replace the select
28939 // of two loads with a load through a select of the address to load from.
28940 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
28941 // constants have been dropped into the constant pool.
28942 if (LHS.getOpcode() == ISD::LOAD) {
28943 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
28944 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
28945
28946 // Token chains must be identical.
28947 if (LHS.getOperand(0) != RHS.getOperand(0) ||
28948 // Do not let this transformation reduce the number of volatile loads.
28949 // Be conservative for atomics for the moment
28950 // TODO: This does appear to be legal for unordered atomics (see D66309)
28951 !LLD->isSimple() || !RLD->isSimple() ||
28952 // FIXME: If either is a pre/post inc/dec load,
28953 // we'd need to split out the address adjustment.
28954 LLD->isIndexed() || RLD->isIndexed() ||
28955 // If this is an EXTLOAD, the VT's must match.
28956 LLD->getMemoryVT() != RLD->getMemoryVT() ||
28957 // If this is an EXTLOAD, the kind of extension must match.
28958 (LLD->getExtensionType() != RLD->getExtensionType() &&
28959 // The only exception is if one of the extensions is anyext.
28960 LLD->getExtensionType() != ISD::EXTLOAD &&
28961 RLD->getExtensionType() != ISD::EXTLOAD) ||
28962 // FIXME: this discards src value information. This is
28963 // over-conservative. It would be beneficial to be able to remember
28964 // both potential memory locations. Since we are discarding
28965 // src value info, don't do the transformation if the memory
28966 // locations are not in the default address space.
28967 LLD->getPointerInfo().getAddrSpace() != 0 ||
28968 RLD->getPointerInfo().getAddrSpace() != 0 ||
28969 // We can't produce a CMOV of a TargetFrameIndex since we won't
28970 // generate the address generation required.
28973 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
28974 LLD->getBasePtr().getValueType()))
28975 return false;
28976
28977 // The loads must not depend on one another.
28978 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
28979 return false;
28980
28981 // Check that the select condition doesn't reach either load. If so,
28982 // folding this will induce a cycle into the DAG. If not, this is safe to
28983 // xform, so create a select of the addresses.
28984
28985 SmallPtrSet<const SDNode *, 32> Visited;
28987
28988 // Always fail if LLD and RLD are not independent. TheSelect is a
28989 // predecessor to all Nodes in question so we need not search past it.
28990
28991 Visited.insert(TheSelect);
28992 Worklist.push_back(LLD);
28993 Worklist.push_back(RLD);
28994
28995 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
28996 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
28997 return false;
28998
28999 SDValue Addr;
29000 if (TheSelect->getOpcode() == ISD::SELECT) {
29001 // We cannot do this optimization if any pair of {RLD, LLD} is a
29002 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
29003 // Loads, we only need to check if CondNode is a successor to one of the
29004 // loads. We can further avoid this if there's no use of their chain
29005 // value.
29006 SDNode *CondNode = TheSelect->getOperand(0).getNode();
29007 Worklist.push_back(CondNode);
29008
29009 if ((LLD->hasAnyUseOfValue(1) &&
29010 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
29011 (RLD->hasAnyUseOfValue(1) &&
29012 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
29013 return false;
29014
29015 Addr = DAG.getSelect(SDLoc(TheSelect),
29016 LLD->getBasePtr().getValueType(),
29017 TheSelect->getOperand(0), LLD->getBasePtr(),
29018 RLD->getBasePtr());
29019 } else { // Otherwise SELECT_CC
29020 // We cannot do this optimization if any pair of {RLD, LLD} is a
29021 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
29022 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
29023 // one of the loads. We can further avoid this if there's no use of their
29024 // chain value.
29025
29026 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
29027 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
29028 Worklist.push_back(CondLHS);
29029 Worklist.push_back(CondRHS);
29030
29031 if ((LLD->hasAnyUseOfValue(1) &&
29032 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
29033 (RLD->hasAnyUseOfValue(1) &&
29034 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
29035 return false;
29036
29037 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
29038 LLD->getBasePtr().getValueType(),
29039 TheSelect->getOperand(0),
29040 TheSelect->getOperand(1),
29041 LLD->getBasePtr(), RLD->getBasePtr(),
29042 TheSelect->getOperand(4));
29043 }
29044
29045 SDValue Load;
29046 // It is safe to replace the two loads if they have different alignments,
29047 // but the new load must be the minimum (most restrictive) alignment of the
29048 // inputs.
29049 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
29050 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
29051 if (!RLD->isInvariant())
29052 MMOFlags &= ~MachineMemOperand::MOInvariant;
29053 if (!RLD->isDereferenceable())
29054 MMOFlags &= ~MachineMemOperand::MODereferenceable;
29055 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
29056 // FIXME: Discards pointer and AA info.
29057 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
29058 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
29059 MMOFlags);
29060 } else {
29061 // FIXME: Discards pointer and AA info.
29062 Load = DAG.getExtLoad(
29064 : LLD->getExtensionType(),
29065 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
29066 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
29067 }
29068
29069 // Users of the select now use the result of the load.
29070 CombineTo(TheSelect, Load);
29071
29072 // Users of the old loads now use the new load's chain. We know the
29073 // old-load value is dead now.
29074 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
29075 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
29076 return true;
29077 }
29078
29079 return false;
29080}
29081
29082/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
29083/// bitwise 'and'.
29084SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
29085 SDValue N1, SDValue N2, SDValue N3,
29086 ISD::CondCode CC) {
29087 // If this is a select where the false operand is zero and the compare is a
29088 // check of the sign bit, see if we can perform the "gzip trick":
29089 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
29090 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
29091 EVT XType = N0.getValueType();
29092 EVT AType = N2.getValueType();
29093 if (!isNullConstant(N3) || !XType.bitsGE(AType))
29094 return SDValue();
29095
29096 // If the comparison is testing for a positive value, we have to invert
29097 // the sign bit mask, so only do that transform if the target has a bitwise
29098 // 'and not' instruction (the invert is free).
29099 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
29100 // (X > -1) ? A : 0
29101 // (X > 0) ? X : 0 <-- This is canonical signed max.
29102 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
29103 return SDValue();
29104 } else if (CC == ISD::SETLT) {
29105 // (X < 0) ? A : 0
29106 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
29107 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
29108 return SDValue();
29109 } else {
29110 return SDValue();
29111 }
29112
29113 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
29114 // constant.
29115 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29116 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
29117 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
29118 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
29119 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
29120 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
29121 AddToWorklist(Shift.getNode());
29122
29123 if (XType.bitsGT(AType)) {
29124 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
29125 AddToWorklist(Shift.getNode());
29126 }
29127
29128 if (CC == ISD::SETGT)
29129 Shift = DAG.getNOT(DL, Shift, AType);
29130
29131 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
29132 }
29133 }
29134
29135 unsigned ShCt = XType.getSizeInBits() - 1;
29136 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
29137 return SDValue();
29138
29139 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
29140 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
29141 AddToWorklist(Shift.getNode());
29142
29143 if (XType.bitsGT(AType)) {
29144 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
29145 AddToWorklist(Shift.getNode());
29146 }
29147
29148 if (CC == ISD::SETGT)
29149 Shift = DAG.getNOT(DL, Shift, AType);
29150
29151 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
29152}
29153
29154// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
29155SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
29156 SDValue N0 = N->getOperand(0);
29157 SDValue N1 = N->getOperand(1);
29158 SDValue N2 = N->getOperand(2);
29159 SDLoc DL(N);
29160
29161 unsigned BinOpc = N1.getOpcode();
29162 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
29163 (N1.getResNo() != N2.getResNo()))
29164 return SDValue();
29165
29166 // The use checks are intentionally on SDNode because we may be dealing
29167 // with opcodes that produce more than one SDValue.
29168 // TODO: Do we really need to check N0 (the condition operand of the select)?
29169 // But removing that clause could cause an infinite loop...
29170 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
29171 return SDValue();
29172
29173 // Binops may include opcodes that return multiple values, so all values
29174 // must be created/propagated from the newly created binops below.
29175 SDVTList OpVTs = N1->getVTList();
29176
29177 // Fold select(cond, binop(x, y), binop(z, y))
29178 // --> binop(select(cond, x, z), y)
29179 if (N1.getOperand(1) == N2.getOperand(1)) {
29180 SDValue N10 = N1.getOperand(0);
29181 SDValue N20 = N2.getOperand(0);
29182 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
29183 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
29184 SDValue NewBinOp =
29185 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
29186 return SDValue(NewBinOp.getNode(), N1.getResNo());
29187 }
29188
29189 // Fold select(cond, binop(x, y), binop(x, z))
29190 // --> binop(x, select(cond, y, z))
29191 if (N1.getOperand(0) == N2.getOperand(0)) {
29192 SDValue N11 = N1.getOperand(1);
29193 SDValue N21 = N2.getOperand(1);
29194 // Second op VT might be different (e.g. shift amount type)
29195 if (N11.getValueType() == N21.getValueType()) {
29196 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
29197 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
29198 SDValue NewBinOp =
29199 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
29200 return SDValue(NewBinOp.getNode(), N1.getResNo());
29201 }
29202 }
29203
29204 // TODO: Handle isCommutativeBinOp patterns as well?
29205 return SDValue();
29206}
29207
29208// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
29209SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
29210 SDValue N0 = N->getOperand(0);
29211 EVT VT = N->getValueType(0);
29212 bool IsFabs = N->getOpcode() == ISD::FABS;
29213 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
29214
29215 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
29216 return SDValue();
29217
29218 SDValue Int = N0.getOperand(0);
29219 EVT IntVT = Int.getValueType();
29220
29221 // The operand to cast should be integer.
29222 if (!IntVT.isInteger() || IntVT.isVector())
29223 return SDValue();
29224
29225 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
29226 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
29227 APInt SignMask;
29228 if (N0.getValueType().isVector()) {
29229 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
29230 // 0x7f...) per element and splat it.
29232 if (IsFabs)
29233 SignMask = ~SignMask;
29234 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
29235 } else {
29236 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
29237 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
29238 if (IsFabs)
29239 SignMask = ~SignMask;
29240 }
29241 SDLoc DL(N0);
29242 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
29243 DAG.getConstant(SignMask, DL, IntVT));
29244 AddToWorklist(Int.getNode());
29245 return DAG.getBitcast(VT, Int);
29246}
29247
29248/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
29249/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
29250/// in it. This may be a win when the constant is not otherwise available
29251/// because it replaces two constant pool loads with one.
29252SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
29253 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
29254 ISD::CondCode CC) {
29256 return SDValue();
29257
29258 // If we are before legalize types, we want the other legalization to happen
29259 // first (for example, to avoid messing with soft float).
29260 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
29261 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
29262 EVT VT = N2.getValueType();
29263 if (!TV || !FV || !TLI.isTypeLegal(VT))
29264 return SDValue();
29265
29266 // If a constant can be materialized without loads, this does not make sense.
29268 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
29269 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
29270 return SDValue();
29271
29272 // If both constants have multiple uses, then we won't need to do an extra
29273 // load. The values are likely around in registers for other users.
29274 if (!TV->hasOneUse() && !FV->hasOneUse())
29275 return SDValue();
29276
29277 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
29278 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
29279 Type *FPTy = Elts[0]->getType();
29280 const DataLayout &TD = DAG.getDataLayout();
29281
29282 // Create a ConstantArray of the two constants.
29283 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
29284 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
29285 TD.getPrefTypeAlign(FPTy));
29286 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
29287
29288 // Get offsets to the 0 and 1 elements of the array, so we can select between
29289 // them.
29290 SDValue Zero = DAG.getIntPtrConstant(0, DL);
29291 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
29292 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
29293 SDValue Cond =
29294 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
29295 AddToWorklist(Cond.getNode());
29296 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
29297 AddToWorklist(CstOffset.getNode());
29298 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
29299 AddToWorklist(CPIdx.getNode());
29300 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
29302 DAG.getMachineFunction()), Alignment);
29303}
29304
29305/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
29306/// where 'cond' is the comparison specified by CC.
29307SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
29308 SDValue N2, SDValue N3, ISD::CondCode CC,
29309 bool NotExtCompare) {
29310 // (x ? y : y) -> y.
29311 if (N2 == N3) return N2;
29312
29313 EVT CmpOpVT = N0.getValueType();
29314 EVT CmpResVT = getSetCCResultType(CmpOpVT);
29315 EVT VT = N2.getValueType();
29316 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
29317 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29318 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
29319
29320 // Determine if the condition we're dealing with is constant.
29321 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
29322 AddToWorklist(SCC.getNode());
29323 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
29324 // fold select_cc true, x, y -> x
29325 // fold select_cc false, x, y -> y
29326 return !(SCCC->isZero()) ? N2 : N3;
29327 }
29328 }
29329
29330 if (SDValue V =
29331 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29332 return V;
29333
29334 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29335 return V;
29336
29337 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29338 // where y is has a single bit set.
29339 // A plaintext description would be, we can turn the SELECT_CC into an AND
29340 // when the condition can be materialized as an all-ones register. Any
29341 // single bit-test can be materialized as an all-ones register with
29342 // shift-left and shift-right-arith.
29343 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
29344 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
29345 SDValue AndLHS = N0->getOperand(0);
29346 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
29347 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
29348 // Shift the tested bit over the sign bit.
29349 const APInt &AndMask = ConstAndRHS->getAPIntValue();
29350 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29351 unsigned ShCt = AndMask.getBitWidth() - 1;
29352 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
29353 SDLoc(AndLHS));
29354 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
29355
29356 // Now arithmetic right shift it all the way over, so the result is
29357 // either all-ones, or zero.
29358 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
29359 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
29360
29361 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
29362 }
29363 }
29364 }
29365
29366 // fold select C, 16, 0 -> shl C, 4
29367 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
29368 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
29369
29370 if ((Fold || Swap) &&
29371 TLI.getBooleanContents(CmpOpVT) ==
29373 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
29375
29376 if (Swap) {
29377 CC = ISD::getSetCCInverse(CC, CmpOpVT);
29378 std::swap(N2C, N3C);
29379 }
29380
29381 // If the caller doesn't want us to simplify this into a zext of a compare,
29382 // don't do it.
29383 if (NotExtCompare && N2C->isOne())
29384 return SDValue();
29385
29386 SDValue Temp, SCC;
29387 // zext (setcc n0, n1)
29388 if (LegalTypes) {
29389 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
29390 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
29391 } else {
29392 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
29393 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
29394 }
29395
29396 AddToWorklist(SCC.getNode());
29397 AddToWorklist(Temp.getNode());
29398
29399 if (N2C->isOne())
29400 return Temp;
29401
29402 unsigned ShCt = N2C->getAPIntValue().logBase2();
29403 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
29404 return SDValue();
29405
29406 // shl setcc result by log2 n2c
29407 return DAG.getNode(
29408 ISD::SHL, DL, N2.getValueType(), Temp,
29409 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
29410 }
29411
29412 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
29413 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
29414 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
29415 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
29416 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
29417 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
29418 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
29419 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
29420 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
29421 SDValue ValueOnZero = N2;
29422 SDValue Count = N3;
29423 // If the condition is NE instead of E, swap the operands.
29424 if (CC == ISD::SETNE)
29425 std::swap(ValueOnZero, Count);
29426 // Check if the value on zero is a constant equal to the bits in the type.
29427 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
29428 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
29429 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
29430 // legal, combine to just cttz.
29431 if ((Count.getOpcode() == ISD::CTTZ ||
29432 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
29433 N0 == Count.getOperand(0) &&
29434 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
29435 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
29436 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
29437 // legal, combine to just ctlz.
29438 if ((Count.getOpcode() == ISD::CTLZ ||
29439 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
29440 N0 == Count.getOperand(0) &&
29441 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
29442 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
29443 }
29444 }
29445 }
29446
29447 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
29448 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
29449 if (!NotExtCompare && N1C && N2C && N3C &&
29450 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
29451 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
29452 (N1C->isZero() && CC == ISD::SETLT)) &&
29453 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
29454 SDValue ASHR =
29455 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29457 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29458 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29459 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
29460 }
29461
29462 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
29463 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
29464 N2C->isOne() && N3C->isAllOnes() &&
29465 !TLI.shouldAvoidTransformToShift(CmpOpVT,
29466 CmpOpVT.getScalarSizeInBits() - 1)) {
29467 SDValue ASHR =
29468 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29470 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29471 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29472 DAG.getConstant(1, DL, VT));
29473 }
29474
29475 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29476 return S;
29477 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29478 return S;
29479 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
29480 return ABD;
29481
29482 return SDValue();
29483}
29484
29486 const TargetLowering &TLI) {
29487 // Match a pattern such as:
29488 // (X | (X >> C0) | (X >> C1) | ...) & Mask
29489 // This extracts contiguous parts of X and ORs them together before comparing.
29490 // We can optimize this so that we directly check (X & SomeMask) instead,
29491 // eliminating the shifts.
29492
29493 EVT VT = Root.getValueType();
29494
29495 // TODO: Support vectors?
29496 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
29497 return SDValue();
29498
29499 SDValue N0 = Root.getOperand(0);
29500 SDValue N1 = Root.getOperand(1);
29501
29502 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
29503 return SDValue();
29504
29505 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
29506
29507 SDValue Src;
29508 const auto IsSrc = [&](SDValue V) {
29509 if (!Src) {
29510 Src = V;
29511 return true;
29512 }
29513
29514 return Src == V;
29515 };
29516
29517 SmallVector<SDValue> Worklist = {N0};
29518 APInt PartsMask(VT.getSizeInBits(), 0);
29519 while (!Worklist.empty()) {
29520 SDValue V = Worklist.pop_back_val();
29521 if (!V.hasOneUse() && (Src && Src != V))
29522 return SDValue();
29523
29524 if (V.getOpcode() == ISD::OR) {
29525 Worklist.push_back(V.getOperand(0));
29526 Worklist.push_back(V.getOperand(1));
29527 continue;
29528 }
29529
29530 if (V.getOpcode() == ISD::SRL) {
29531 SDValue ShiftSrc = V.getOperand(0);
29532 SDValue ShiftAmt = V.getOperand(1);
29533
29534 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
29535 return SDValue();
29536
29537 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
29538 if (ShiftAmtVal > RootMask.getBitWidth())
29539 return SDValue();
29540
29541 PartsMask |= (RootMask << ShiftAmtVal);
29542 continue;
29543 }
29544
29545 if (IsSrc(V)) {
29546 PartsMask |= RootMask;
29547 continue;
29548 }
29549
29550 return SDValue();
29551 }
29552
29553 if (!Src)
29554 return SDValue();
29555
29556 SDLoc DL(Root);
29557 return DAG.getNode(ISD::AND, DL, VT,
29558 {Src, DAG.getConstant(PartsMask, DL, VT)});
29559}
29560
29561/// This is a stub for TargetLowering::SimplifySetCC.
29562SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
29563 ISD::CondCode Cond, const SDLoc &DL,
29564 bool foldBooleans) {
29565 TargetLowering::DAGCombinerInfo
29566 DagCombineInfo(DAG, Level, false, this);
29567 if (SDValue C =
29568 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
29569 return C;
29570
29572 isNullConstant(N1)) {
29573
29574 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
29575 return DAG.getSetCC(DL, VT, Res, N1, Cond);
29576 }
29577
29578 return SDValue();
29579}
29580
29581/// Given an ISD::SDIV node expressing a divide by constant, return
29582/// a DAG expression to select that will generate the same value by multiplying
29583/// by a magic number.
29584/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29585SDValue DAGCombiner::BuildSDIV(SDNode *N) {
29586 // when optimising for minimum size, we don't want to expand a div to a mul
29587 // and a shift.
29589 return SDValue();
29590
29592 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29593 for (SDNode *N : Built)
29594 AddToWorklist(N);
29595 return S;
29596 }
29597
29598 return SDValue();
29599}
29600
29601/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
29602/// DAG expression that will generate the same value by right shifting.
29603SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
29604 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29605 if (!C)
29606 return SDValue();
29607
29608 // Avoid division by zero.
29609 if (C->isZero())
29610 return SDValue();
29611
29613 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
29614 for (SDNode *N : Built)
29615 AddToWorklist(N);
29616 return S;
29617 }
29618
29619 return SDValue();
29620}
29621
29622/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
29623/// expression that will generate the same value by multiplying by a magic
29624/// number.
29625/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29626SDValue DAGCombiner::BuildUDIV(SDNode *N) {
29627 // when optimising for minimum size, we don't want to expand a div to a mul
29628 // and a shift.
29630 return SDValue();
29631
29633 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29634 for (SDNode *N : Built)
29635 AddToWorklist(N);
29636 return S;
29637 }
29638
29639 return SDValue();
29640}
29641
29642/// Given an ISD::SREM node expressing a remainder by constant power of 2,
29643/// return a DAG expression that will generate the same value.
29644SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
29645 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29646 if (!C)
29647 return SDValue();
29648
29649 // Avoid division by zero.
29650 if (C->isZero())
29651 return SDValue();
29652
29654 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
29655 for (SDNode *N : Built)
29656 AddToWorklist(N);
29657 return S;
29658 }
29659
29660 return SDValue();
29661}
29662
29663// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
29664//
29665// Returns the node that represents `Log2(Op)`. This may create a new node. If
29666// we are unable to compute `Log2(Op)` its return `SDValue()`.
29667//
29668// All nodes will be created at `DL` and the output will be of type `VT`.
29669//
29670// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
29671// `AssumeNonZero` if this function should simply assume (not require proving
29672// `Op` is non-zero).
29674 SDValue Op, unsigned Depth,
29675 bool AssumeNonZero) {
29676 assert(VT.isInteger() && "Only integer types are supported!");
29677
29678 auto PeekThroughCastsAndTrunc = [](SDValue V) {
29679 while (true) {
29680 switch (V.getOpcode()) {
29681 case ISD::TRUNCATE:
29682 case ISD::ZERO_EXTEND:
29683 V = V.getOperand(0);
29684 break;
29685 default:
29686 return V;
29687 }
29688 }
29689 };
29690
29691 if (VT.isScalableVector())
29692 return SDValue();
29693
29694 Op = PeekThroughCastsAndTrunc(Op);
29695
29696 // Helper for determining whether a value is a power-2 constant scalar or a
29697 // vector of such elements.
29698 SmallVector<APInt> Pow2Constants;
29699 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
29700 if (C->isZero() || C->isOpaque())
29701 return false;
29702 // TODO: We may also be able to support negative powers of 2 here.
29703 if (C->getAPIntValue().isPowerOf2()) {
29704 Pow2Constants.emplace_back(C->getAPIntValue());
29705 return true;
29706 }
29707 return false;
29708 };
29709
29710 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
29711 if (!VT.isVector())
29712 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
29713 // We need to create a build vector
29714 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
29715 return DAG.getSplat(VT, DL,
29716 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
29717 VT.getScalarType()));
29718 SmallVector<SDValue> Log2Ops;
29719 for (const APInt &Pow2 : Pow2Constants)
29720 Log2Ops.emplace_back(
29721 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
29722 return DAG.getBuildVector(VT, DL, Log2Ops);
29723 }
29724
29725 if (Depth >= DAG.MaxRecursionDepth)
29726 return SDValue();
29727
29728 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
29729 // Peek through zero extend. We can't peek through truncates since this
29730 // function is called on a shift amount. We must ensure that all of the bits
29731 // above the original shift amount are zeroed by this function.
29732 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
29733 ToCast = ToCast.getOperand(0);
29734 EVT CurVT = ToCast.getValueType();
29735 if (NewVT == CurVT)
29736 return ToCast;
29737
29738 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
29739 return DAG.getBitcast(NewVT, ToCast);
29740
29741 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
29742 };
29743
29744 // log2(X << Y) -> log2(X) + Y
29745 if (Op.getOpcode() == ISD::SHL) {
29746 // 1 << Y and X nuw/nsw << Y are all non-zero.
29747 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
29748 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
29749 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
29750 Depth + 1, AssumeNonZero))
29751 return DAG.getNode(ISD::ADD, DL, VT, LogX,
29752 CastToVT(VT, Op.getOperand(1)));
29753 }
29754
29755 // c ? X : Y -> c ? Log2(X) : Log2(Y)
29756 SDValue Cond, TVal, FVal;
29758 m_Value(FVal))))) {
29759 if (SDValue LogX =
29760 takeInexpensiveLog2(DAG, DL, VT, TVal, Depth + 1, AssumeNonZero))
29761 if (SDValue LogY =
29762 takeInexpensiveLog2(DAG, DL, VT, FVal, Depth + 1, AssumeNonZero))
29763 return DAG.getSelect(DL, VT, Cond, LogX, LogY);
29764 }
29765
29766 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
29767 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
29768 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
29769 Op.hasOneUse()) {
29770 // Use AssumeNonZero as false here. Otherwise we can hit case where
29771 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
29772 if (SDValue LogX =
29773 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
29774 /*AssumeNonZero*/ false))
29775 if (SDValue LogY =
29776 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
29777 /*AssumeNonZero*/ false))
29778 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
29779 }
29780
29781 return SDValue();
29782}
29783
29784/// Determines the LogBase2 value for a non-null input value using the
29785/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
29786SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
29787 bool KnownNonZero, bool InexpensiveOnly,
29788 std::optional<EVT> OutVT) {
29789 EVT VT = OutVT ? *OutVT : V.getValueType();
29790 SDValue InexpensiveLogBase2 =
29791 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
29792 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
29793 return InexpensiveLogBase2;
29794
29795 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
29796 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
29797 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
29798 return LogBase2;
29799}
29800
29801/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29802/// For the reciprocal, we need to find the zero of the function:
29803/// F(X) = 1/X - A [which has a zero at X = 1/A]
29804/// =>
29805/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
29806/// does not require additional intermediate precision]
29807/// For the last iteration, put numerator N into it to gain more precision:
29808/// Result = N X_i + X_i (N - N A X_i)
29809SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
29810 SDNodeFlags Flags) {
29811 if (LegalDAG)
29812 return SDValue();
29813
29814 // TODO: Handle extended types?
29815 EVT VT = Op.getValueType();
29816 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29817 VT.getScalarType() != MVT::f64)
29818 return SDValue();
29819
29820 // If estimates are explicitly disabled for this function, we're done.
29821 MachineFunction &MF = DAG.getMachineFunction();
29822 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
29823 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29824 return SDValue();
29825
29826 // Estimates may be explicitly enabled for this type with a custom number of
29827 // refinement steps.
29828 int Iterations = TLI.getDivRefinementSteps(VT, MF);
29829 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
29830 AddToWorklist(Est.getNode());
29831
29832 SDLoc DL(Op);
29833 if (Iterations) {
29834 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
29835
29836 // Newton iterations: Est = Est + Est (N - Arg * Est)
29837 // If this is the last iteration, also multiply by the numerator.
29838 for (int i = 0; i < Iterations; ++i) {
29839 SDValue MulEst = Est;
29840
29841 if (i == Iterations - 1) {
29842 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
29843 AddToWorklist(MulEst.getNode());
29844 }
29845
29846 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
29847 AddToWorklist(NewEst.getNode());
29848
29849 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
29850 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
29851 AddToWorklist(NewEst.getNode());
29852
29853 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
29854 AddToWorklist(NewEst.getNode());
29855
29856 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
29857 AddToWorklist(Est.getNode());
29858 }
29859 } else {
29860 // If no iterations are available, multiply with N.
29861 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
29862 AddToWorklist(Est.getNode());
29863 }
29864
29865 return Est;
29866 }
29867
29868 return SDValue();
29869}
29870
29871/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29872/// For the reciprocal sqrt, we need to find the zero of the function:
29873/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29874/// =>
29875/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
29876/// As a result, we precompute A/2 prior to the iteration loop.
29877SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
29878 unsigned Iterations, bool Reciprocal) {
29879 EVT VT = Arg.getValueType();
29880 SDLoc DL(Arg);
29881 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
29882
29883 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
29884 // this entire sequence requires only one FP constant.
29885 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
29886 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
29887
29888 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
29889 for (unsigned i = 0; i < Iterations; ++i) {
29890 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
29891 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
29892 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
29893 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
29894 }
29895
29896 // If non-reciprocal square root is requested, multiply the result by Arg.
29897 if (!Reciprocal)
29898 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
29899
29900 return Est;
29901}
29902
29903/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
29904/// For the reciprocal sqrt, we need to find the zero of the function:
29905/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
29906/// =>
29907/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
29908SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
29909 unsigned Iterations, bool Reciprocal) {
29910 EVT VT = Arg.getValueType();
29911 SDLoc DL(Arg);
29912 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
29913 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
29914
29915 // This routine must enter the loop below to work correctly
29916 // when (Reciprocal == false).
29917 assert(Iterations > 0);
29918
29919 // Newton iterations for reciprocal square root:
29920 // E = (E * -0.5) * ((A * E) * E + -3.0)
29921 for (unsigned i = 0; i < Iterations; ++i) {
29922 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
29923 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
29924 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
29925
29926 // When calculating a square root at the last iteration build:
29927 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
29928 // (notice a common subexpression)
29929 SDValue LHS;
29930 if (Reciprocal || (i + 1) < Iterations) {
29931 // RSQRT: LHS = (E * -0.5)
29932 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
29933 } else {
29934 // SQRT: LHS = (A * E) * -0.5
29935 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
29936 }
29937
29938 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
29939 }
29940
29941 return Est;
29942}
29943
29944/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
29945/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
29946/// Op can be zero.
29947SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
29948 if (LegalDAG)
29949 return SDValue();
29950
29951 // TODO: Handle extended types?
29952 EVT VT = Op.getValueType();
29953 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
29954 VT.getScalarType() != MVT::f64)
29955 return SDValue();
29956
29957 // If estimates are explicitly disabled for this function, we're done.
29958 MachineFunction &MF = DAG.getMachineFunction();
29959 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
29960 if (Enabled == TLI.ReciprocalEstimate::Disabled)
29961 return SDValue();
29962
29963 // Estimates may be explicitly enabled for this type with a custom number of
29964 // refinement steps.
29965 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
29966
29967 bool UseOneConstNR = false;
29968 if (SDValue Est =
29969 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
29970 Reciprocal)) {
29971 AddToWorklist(Est.getNode());
29972
29973 if (Iterations > 0)
29974 Est = UseOneConstNR
29975 ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
29976 : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
29977 if (!Reciprocal) {
29978 SDLoc DL(Op);
29979 // Try the target specific test first.
29980 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
29981
29982 // The estimate is now completely wrong if the input was exactly 0.0 or
29983 // possibly a denormal. Force the answer to 0.0 or value provided by
29984 // target for those cases.
29985 Est = DAG.getSelect(DL, VT, Test,
29986 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
29987 }
29988 return Est;
29989 }
29990
29991 return SDValue();
29992}
29993
29994SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
29995 return buildSqrtEstimateImpl(Op, true);
29996}
29997
29998SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
29999 return buildSqrtEstimateImpl(Op, false);
30000}
30001
30002/// Return true if there is any possibility that the two addresses overlap.
30003bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
30004
30005 struct MemUseCharacteristics {
30006 bool IsVolatile;
30007 bool IsAtomic;
30009 int64_t Offset;
30010 LocationSize NumBytes;
30011 MachineMemOperand *MMO;
30012 };
30013
30014 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
30015 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
30016 int64_t Offset = 0;
30017 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
30018 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
30019 : (LSN->getAddressingMode() == ISD::PRE_DEC)
30020 ? -1 * C->getSExtValue()
30021 : 0;
30022 TypeSize Size = LSN->getMemoryVT().getStoreSize();
30023 return {LSN->isVolatile(), LSN->isAtomic(),
30024 LSN->getBasePtr(), Offset /*base offset*/,
30025 LocationSize::precise(Size), LSN->getMemOperand()};
30026 }
30027 if (const auto *LN = cast<LifetimeSDNode>(N)) {
30028 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
30029 return {false /*isVolatile*/,
30030 /*isAtomic*/ false,
30031 LN->getOperand(1),
30032 0,
30033 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
30034 (MachineMemOperand *)nullptr};
30035 }
30036 // Default.
30037 return {false /*isvolatile*/,
30038 /*isAtomic*/ false,
30039 SDValue(),
30040 (int64_t)0 /*offset*/,
30042 (MachineMemOperand *)nullptr};
30043 };
30044
30045 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
30046 MUC1 = getCharacteristics(Op1);
30047
30048 // If they are to the same address, then they must be aliases.
30049 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
30050 MUC0.Offset == MUC1.Offset)
30051 return true;
30052
30053 // If they are both volatile then they cannot be reordered.
30054 if (MUC0.IsVolatile && MUC1.IsVolatile)
30055 return true;
30056
30057 // Be conservative about atomics for the moment
30058 // TODO: This is way overconservative for unordered atomics (see D66309)
30059 if (MUC0.IsAtomic && MUC1.IsAtomic)
30060 return true;
30061
30062 if (MUC0.MMO && MUC1.MMO) {
30063 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
30064 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
30065 return false;
30066 }
30067
30068 // If NumBytes is scalable and offset is not 0, conservatively return may
30069 // alias
30070 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
30071 MUC0.Offset != 0) ||
30072 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
30073 MUC1.Offset != 0))
30074 return true;
30075 // Try to prove that there is aliasing, or that there is no aliasing. Either
30076 // way, we can return now. If nothing can be proved, proceed with more tests.
30077 bool IsAlias;
30078 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
30079 DAG, IsAlias))
30080 return IsAlias;
30081
30082 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
30083 // either are not known.
30084 if (!MUC0.MMO || !MUC1.MMO)
30085 return true;
30086
30087 // If one operation reads from invariant memory, and the other may store, they
30088 // cannot alias. These should really be checking the equivalent of mayWrite,
30089 // but it only matters for memory nodes other than load /store.
30090 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
30091 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
30092 return false;
30093
30094 // If we know required SrcValue1 and SrcValue2 have relatively large
30095 // alignment compared to the size and offset of the access, we may be able
30096 // to prove they do not alias. This check is conservative for now to catch
30097 // cases created by splitting vector types, it only works when the offsets are
30098 // multiples of the size of the data.
30099 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
30100 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
30101 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
30102 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
30103 LocationSize Size0 = MUC0.NumBytes;
30104 LocationSize Size1 = MUC1.NumBytes;
30105
30106 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
30107 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
30108 !Size1.isScalable() && Size0 == Size1 &&
30109 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
30110 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
30111 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
30112 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
30113 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
30114
30115 // There is no overlap between these relatively aligned accesses of
30116 // similar size. Return no alias.
30117 if ((OffAlign0 + static_cast<int64_t>(
30118 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
30119 (OffAlign1 + static_cast<int64_t>(
30120 Size1.getValue().getKnownMinValue())) <= OffAlign0)
30121 return false;
30122 }
30123
30126 : DAG.getSubtarget().useAA();
30127#ifndef NDEBUG
30128 if (CombinerAAOnlyFunc.getNumOccurrences() &&
30130 UseAA = false;
30131#endif
30132
30133 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
30134 Size0.hasValue() && Size1.hasValue() &&
30135 // Can't represent a scalable size + fixed offset in LocationSize
30136 (!Size0.isScalable() || SrcValOffset0 == 0) &&
30137 (!Size1.isScalable() || SrcValOffset1 == 0)) {
30138 // Use alias analysis information.
30139 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
30140 int64_t Overlap0 =
30141 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
30142 int64_t Overlap1 =
30143 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
30144 LocationSize Loc0 =
30145 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
30146 LocationSize Loc1 =
30147 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
30148 if (BatchAA->isNoAlias(
30149 MemoryLocation(MUC0.MMO->getValue(), Loc0,
30150 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
30151 MemoryLocation(MUC1.MMO->getValue(), Loc1,
30152 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
30153 return false;
30154 }
30155
30156 // Otherwise we have to assume they alias.
30157 return true;
30158}
30159
30160/// Walk up chain skipping non-aliasing memory nodes,
30161/// looking for aliasing nodes and adding them to the Aliases vector.
30162void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
30163 SmallVectorImpl<SDValue> &Aliases) {
30164 SmallVector<SDValue, 8> Chains; // List of chains to visit.
30165 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
30166
30167 // Get alias information for node.
30168 // TODO: relax aliasing for unordered atomics (see D66309)
30169 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
30170
30171 // Starting off.
30172 Chains.push_back(OriginalChain);
30173 unsigned Depth = 0;
30174
30175 // Attempt to improve chain by a single step
30176 auto ImproveChain = [&](SDValue &C) -> bool {
30177 switch (C.getOpcode()) {
30178 case ISD::EntryToken:
30179 // No need to mark EntryToken.
30180 C = SDValue();
30181 return true;
30182 case ISD::LOAD:
30183 case ISD::STORE: {
30184 // Get alias information for C.
30185 // TODO: Relax aliasing for unordered atomics (see D66309)
30186 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
30187 cast<LSBaseSDNode>(C.getNode())->isSimple();
30188 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
30189 // Look further up the chain.
30190 C = C.getOperand(0);
30191 return true;
30192 }
30193 // Alias, so stop here.
30194 return false;
30195 }
30196
30197 case ISD::CopyFromReg:
30198 // Always forward past CopyFromReg.
30199 C = C.getOperand(0);
30200 return true;
30201
30202 case ISD::LIFETIME_START:
30203 case ISD::LIFETIME_END: {
30204 // We can forward past any lifetime start/end that can be proven not to
30205 // alias the memory access.
30206 if (!mayAlias(N, C.getNode())) {
30207 // Look further up the chain.
30208 C = C.getOperand(0);
30209 return true;
30210 }
30211 return false;
30212 }
30213 default:
30214 return false;
30215 }
30216 };
30217
30218 // Look at each chain and determine if it is an alias. If so, add it to the
30219 // aliases list. If not, then continue up the chain looking for the next
30220 // candidate.
30221 while (!Chains.empty()) {
30222 SDValue Chain = Chains.pop_back_val();
30223
30224 // Don't bother if we've seen Chain before.
30225 if (!Visited.insert(Chain.getNode()).second)
30226 continue;
30227
30228 // For TokenFactor nodes, look at each operand and only continue up the
30229 // chain until we reach the depth limit.
30230 //
30231 // FIXME: The depth check could be made to return the last non-aliasing
30232 // chain we found before we hit a tokenfactor rather than the original
30233 // chain.
30234 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
30235 Aliases.clear();
30236 Aliases.push_back(OriginalChain);
30237 return;
30238 }
30239
30240 if (Chain.getOpcode() == ISD::TokenFactor) {
30241 // We have to check each of the operands of the token factor for "small"
30242 // token factors, so we queue them up. Adding the operands to the queue
30243 // (stack) in reverse order maintains the original order and increases the
30244 // likelihood that getNode will find a matching token factor (CSE.)
30245 if (Chain.getNumOperands() > 16) {
30246 Aliases.push_back(Chain);
30247 continue;
30248 }
30249 for (unsigned n = Chain.getNumOperands(); n;)
30250 Chains.push_back(Chain.getOperand(--n));
30251 ++Depth;
30252 continue;
30253 }
30254 // Everything else
30255 if (ImproveChain(Chain)) {
30256 // Updated Chain Found, Consider new chain if one exists.
30257 if (Chain.getNode())
30258 Chains.push_back(Chain);
30259 ++Depth;
30260 continue;
30261 }
30262 // No Improved Chain Possible, treat as Alias.
30263 Aliases.push_back(Chain);
30264 }
30265}
30266
30267/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
30268/// (aliasing node.)
30269SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
30270 if (OptLevel == CodeGenOptLevel::None)
30271 return OldChain;
30272
30273 // Ops for replacing token factor.
30275
30276 // Accumulate all the aliases to this node.
30277 GatherAllAliases(N, OldChain, Aliases);
30278
30279 // If no operands then chain to entry token.
30280 if (Aliases.empty())
30281 return DAG.getEntryNode();
30282
30283 // If a single operand then chain to it. We don't need to revisit it.
30284 if (Aliases.size() == 1)
30285 return Aliases[0];
30286
30287 // Construct a custom tailored token factor.
30288 return DAG.getTokenFactor(SDLoc(N), Aliases);
30289}
30290
30291// This function tries to collect a bunch of potentially interesting
30292// nodes to improve the chains of, all at once. This might seem
30293// redundant, as this function gets called when visiting every store
30294// node, so why not let the work be done on each store as it's visited?
30295//
30296// I believe this is mainly important because mergeConsecutiveStores
30297// is unable to deal with merging stores of different sizes, so unless
30298// we improve the chains of all the potential candidates up-front
30299// before running mergeConsecutiveStores, it might only see some of
30300// the nodes that will eventually be candidates, and then not be able
30301// to go from a partially-merged state to the desired final
30302// fully-merged state.
30303
30304bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
30305 SmallVector<StoreSDNode *, 8> ChainedStores;
30306 StoreSDNode *STChain = St;
30307 // Intervals records which offsets from BaseIndex have been covered. In
30308 // the common case, every store writes to the immediately previous address
30309 // space and thus merged with the previous interval at insertion time.
30310
30311 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
30312 IntervalMapHalfOpenInfo<int64_t>>;
30313 IMap::Allocator A;
30314 IMap Intervals(A);
30315
30316 // This holds the base pointer, index, and the offset in bytes from the base
30317 // pointer.
30318 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30319
30320 // We must have a base and an offset.
30321 if (!BasePtr.getBase().getNode())
30322 return false;
30323
30324 // Do not handle stores to undef base pointers.
30325 if (BasePtr.getBase().isUndef())
30326 return false;
30327
30328 // Do not handle stores to opaque types
30329 if (St->getMemoryVT().isZeroSized())
30330 return false;
30331
30332 // BaseIndexOffset assumes that offsets are fixed-size, which
30333 // is not valid for scalable vectors where the offsets are
30334 // scaled by `vscale`, so bail out early.
30335 if (St->getMemoryVT().isScalableVT())
30336 return false;
30337
30338 // Add ST's interval.
30339 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
30340 std::monostate{});
30341
30342 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
30343 if (Chain->getMemoryVT().isScalableVector())
30344 return false;
30345
30346 // If the chain has more than one use, then we can't reorder the mem ops.
30347 if (!SDValue(Chain, 0)->hasOneUse())
30348 break;
30349 // TODO: Relax for unordered atomics (see D66309)
30350 if (!Chain->isSimple() || Chain->isIndexed())
30351 break;
30352
30353 // Find the base pointer and offset for this memory node.
30354 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
30355 // Check that the base pointer is the same as the original one.
30356 int64_t Offset;
30357 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
30358 break;
30359 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
30360 // Make sure we don't overlap with other intervals by checking the ones to
30361 // the left or right before inserting.
30362 auto I = Intervals.find(Offset);
30363 // If there's a next interval, we should end before it.
30364 if (I != Intervals.end() && I.start() < (Offset + Length))
30365 break;
30366 // If there's a previous interval, we should start after it.
30367 if (I != Intervals.begin() && (--I).stop() <= Offset)
30368 break;
30369 Intervals.insert(Offset, Offset + Length, std::monostate{});
30370
30371 ChainedStores.push_back(Chain);
30372 STChain = Chain;
30373 }
30374
30375 // If we didn't find a chained store, exit.
30376 if (ChainedStores.empty())
30377 return false;
30378
30379 // Improve all chained stores (St and ChainedStores members) starting from
30380 // where the store chain ended and return single TokenFactor.
30381 SDValue NewChain = STChain->getChain();
30383 for (unsigned I = ChainedStores.size(); I;) {
30384 StoreSDNode *S = ChainedStores[--I];
30385 SDValue BetterChain = FindBetterChain(S, NewChain);
30387 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
30388 TFOps.push_back(SDValue(S, 0));
30389 ChainedStores[I] = S;
30390 }
30391
30392 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
30393 SDValue BetterChain = FindBetterChain(St, NewChain);
30394 SDValue NewST;
30395 if (St->isTruncatingStore())
30396 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
30397 St->getBasePtr(), St->getMemoryVT(),
30398 St->getMemOperand());
30399 else
30400 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
30401 St->getBasePtr(), St->getMemOperand());
30402
30403 TFOps.push_back(NewST);
30404
30405 // If we improved every element of TFOps, then we've lost the dependence on
30406 // NewChain to successors of St and we need to add it back to TFOps. Do so at
30407 // the beginning to keep relative order consistent with FindBetterChains.
30408 auto hasImprovedChain = [&](SDValue ST) -> bool {
30409 return ST->getOperand(0) != NewChain;
30410 };
30411 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
30412 if (AddNewChain)
30413 TFOps.insert(TFOps.begin(), NewChain);
30414
30415 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
30416 CombineTo(St, TF);
30417
30418 // Add TF and its operands to the worklist.
30419 AddToWorklist(TF.getNode());
30420 for (const SDValue &Op : TF->ops())
30421 AddToWorklist(Op.getNode());
30422 AddToWorklist(STChain);
30423 return true;
30424}
30425
30426bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
30427 if (OptLevel == CodeGenOptLevel::None)
30428 return false;
30429
30430 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30431
30432 // We must have a base and an offset.
30433 if (!BasePtr.getBase().getNode())
30434 return false;
30435
30436 // Do not handle stores to undef base pointers.
30437 if (BasePtr.getBase().isUndef())
30438 return false;
30439
30440 // Directly improve a chain of disjoint stores starting at St.
30441 if (parallelizeChainedStores(St))
30442 return true;
30443
30444 // Improve St's Chain..
30445 SDValue BetterChain = FindBetterChain(St, St->getChain());
30446 if (St->getChain() != BetterChain) {
30447 replaceStoreChain(St, BetterChain);
30448 return true;
30449 }
30450 return false;
30451}
30452
30453/// This is the entry point for the file.
30455 CodeGenOptLevel OptLevel) {
30456 /// This is the main entry point to this class.
30457 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
30458}
return SDValue()
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
ByteProvider< SDNode * > SDByteProvider
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
static MaybeAlign getAlign(Value *Ptr)
iv Induction Variable Users
Definition IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr int Concat[]
Value * RHS
Value * LHS
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:298
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:294
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:331
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:304
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1102
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1190
bool isNegative() const
Definition APFloat.h:1431
bool isNormal() const
Definition APFloat.h:1435
bool isDenormal() const
Definition APFloat.h:1432
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1414
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
Definition APFloat.h:1298
bool isNaN() const
Definition APFloat.h:1429
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1070
bool isSignaling() const
Definition APFloat.h:1433
APInt bitcastToAPInt() const
Definition APFloat.h:1335
bool isLargest() const
Definition APFloat.h:1447
bool isInfinity() const
Definition APFloat.h:1428
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition APInt.cpp:644
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition APInt.h:466
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1249
int32_t exactLogBase2() const
Definition APInt.h:1783
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned countLeadingZeros() const
Definition APInt.h:1606
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
unsigned logBase2() const
Definition APInt.h:1761
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:471
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1150
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1367
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:196
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< SDNode * > Val, int64_t ByteOffset, int64_t VectorOffset)
Combiner implementation.
Definition Combiner.h:34
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
bool isBigEndian() const
Definition DataLayout.h:208
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static bool shouldExecute(unsigned CounterName)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:321
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const_iterator find(KeyT x) const
find - Return an iterator pointing to the first interval ending at or after x, or end().
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:420
iterator end() const
Definition ArrayRef.h:344
iterator begin() const
Definition ArrayRef.h:343
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:413
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
bool isGuaranteedNotToBePoison(SDValue Op, unsigned Depth=0) const
Return true if this function can prove that Op is never poison.
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
value_type pop_back_val()
Definition SetVector.h:278
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N) const
Return true if it is profitable to fold a pair of shifts into a mask.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool canTransformPtrArithOutOfBounds(const Function &F, EVT PtrVT) const
True if the target allows transformations of in-bounds pointer arithmetic that cause out-of-bounds in...
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:231
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2248
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2253
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2258
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2263
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ POISON
POISON - A poison node.
Definition ISDOpcodes.h:231
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:387
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:393
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:521
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:712
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ TRUNCATE_SSAT_U
Definition ISDOpcodes.h:861
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:815
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:628
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:688
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ TargetConstantFP
Definition ISDOpcodes.h:175
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ TargetFrameIndex
Definition ISDOpcodes.h:182
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:280
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:707
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:696
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:859
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:863
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:333
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isIndexTypeSigned(MemIndexType IndexType)
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_SelectLike(const T0_P &Cond, const T1_P &T, const T2_P &F)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
Not(const Pred &P) -> Not< Pred >
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
constexpr double e
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1606
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:361
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1516
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1588
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1544
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:363
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2108
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition ValueTypes.h:279
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
EVT changeElementType(EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:113
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:187
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:256
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:268
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...