LLVM 23.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/APSInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SetVector.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
52#include "llvm/IR/Attributes.h"
53#include "llvm/IR/Constant.h"
54#include "llvm/IR/DataLayout.h"
57#include "llvm/IR/Function.h"
58#include "llvm/IR/Metadata.h"
63#include "llvm/Support/Debug.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <functional>
75#include <iterator>
76#include <optional>
77#include <string>
78#include <tuple>
79#include <utility>
80#include <variant>
81
82#include "MatchContext.h"
83#include "SDNodeDbgValue.h"
84
85using namespace llvm;
86using namespace llvm::SDPatternMatch;
87
88#define DEBUG_TYPE "dagcombine"
89
90STATISTIC(NodesCombined , "Number of dag nodes combined");
91STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
92STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
93STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
94STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
95STATISTIC(SlicedLoads, "Number of load sliced");
96STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
97
98DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
99 "Controls whether a DAG combine is performed for a node");
100
101static cl::opt<bool>
102CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
103 cl::desc("Enable DAG combiner's use of IR alias analysis"));
104
105static cl::opt<bool>
106UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
107 cl::desc("Enable DAG combiner's use of TBAA"));
108
109#ifndef NDEBUG
111CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
112 cl::desc("Only use DAG-combiner alias analysis in this"
113 " function"));
114#endif
115
116/// Hidden option to stress test load slicing, i.e., when this option
117/// is enabled, load slicing bypasses most of its profitability guards.
118static cl::opt<bool>
119StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
120 cl::desc("Bypass the profitability model of load slicing"),
121 cl::init(false));
122
123static cl::opt<bool>
124 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
125 cl::desc("DAG combiner may split indexing from loads"));
126
127static cl::opt<bool>
128 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
129 cl::desc("DAG combiner enable merging multiple stores "
130 "into a wider store"));
131
133 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
134 cl::desc("Limit the number of operands to inline for Token Factors"));
135
137 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
138 cl::desc("Limit the number of times for the same StoreNode and RootNode "
139 "to bail out in store merging dependence check"));
140
142 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
143 cl::desc("DAG combiner enable reducing the width of load/op/store "
144 "sequence"));
146 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
147 cl::Hidden, cl::init(false),
148 cl::desc("DAG combiner force override the narrowing profitable check when "
149 "reducing the width of load/op/store sequences"));
150
152 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
153 cl::desc("DAG combiner enable load/<replace bytes>/store with "
154 "a narrower store"));
155
157 "combiner-topological-sorting", cl::Hidden, cl::init(false),
158 cl::desc("DAG combiner nodes consistently processed in topological order"));
159
160static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
161 cl::init(false),
162 cl::desc("Disable the DAG combiner"));
163
164namespace {
165
166 class DAGCombiner {
167 SelectionDAG &DAG;
168 const TargetLowering &TLI;
169 const SelectionDAGTargetInfo *STI;
171 CodeGenOptLevel OptLevel;
172 bool LegalDAG = false;
173 bool LegalOperations = false;
174 bool LegalTypes = false;
175 bool ForCodeSize;
176 bool DisableGenericCombines;
177
178 /// Worklist of all of the nodes that need to be simplified.
179 ///
180 /// This must behave as a stack -- new nodes to process are pushed onto the
181 /// back and when processing we pop off of the back.
182 ///
183 /// The worklist will not contain duplicates but may contain null entries
184 /// due to nodes being deleted from the underlying DAG. For fast lookup and
185 /// deduplication, the index of the node in this vector is stored in the
186 /// node in SDNode::CombinerWorklistIndex.
188
189 /// This records all nodes attempted to be added to the worklist since we
190 /// considered a new worklist entry. As we keep do not add duplicate nodes
191 /// in the worklist, this is different from the tail of the worklist.
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // BatchAA - Used for DAG load/store alias analysis.
203 BatchAAResults *BatchAA;
204
205 /// This caches all chains that have already been processed in
206 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
207 /// stores candidates.
208 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
209
210 /// When an instruction is simplified, add all users of the instruction to
211 /// the work lists because they might get more simplified now.
212 void AddUsersToWorklist(SDNode *N) {
213 for (SDNode *Node : N->users())
214 AddToWorklist(Node);
215 }
216
217 /// Convenient shorthand to add a node and all of its user to the worklist.
218 void AddToWorklistWithUsers(SDNode *N) {
219 AddUsersToWorklist(N);
220 AddToWorklist(N);
221 }
222
223 // Prune potentially dangling nodes. This is called after
224 // any visit to a node, but should also be called during a visit after any
225 // failed combine which may have created a DAG node.
226 void clearAddedDanglingWorklistEntries() {
227 // Check any nodes added to the worklist to see if they are prunable.
228 while (!PruningList.empty()) {
229 auto *N = PruningList.pop_back_val();
230 if (N->use_empty())
231 recursivelyDeleteUnusedNodes(N);
232 }
233 }
234
235 SDNode *getNextWorklistEntry() {
236 // Before we do any work, remove nodes that are not in use.
237 clearAddedDanglingWorklistEntries();
238 SDNode *N = nullptr;
239 // The Worklist holds the SDNodes in order, but it may contain null
240 // entries.
241 while (!N && !Worklist.empty()) {
242 N = Worklist.pop_back_val();
243 }
244
245 if (N) {
246 assert(N->getCombinerWorklistIndex() >= 0 &&
247 "Found a worklist entry without a corresponding map entry!");
248 // Set to -2 to indicate that we combined the node.
249 N->setCombinerWorklistIndex(-2);
250 }
251 return N;
252 }
253
254 /// Call the node-specific routine that folds each particular type of node.
255 SDValue visit(SDNode *N);
256
257 public:
258 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
259 : DAG(D), TLI(D.getTargetLoweringInfo()),
260 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
261 BatchAA(BatchAA) {
262 ForCodeSize = DAG.shouldOptForSize();
263 DisableGenericCombines =
264 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
265 }
266
267 void ConsiderForPruning(SDNode *N) {
268 // Mark this for potential pruning.
269 PruningList.insert(N);
270 }
271
272 /// Add to the worklist making sure its instance is at the back (next to be
273 /// processed.)
274 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
275 bool SkipIfCombinedBefore = false) {
276 assert(N->getOpcode() != ISD::DELETED_NODE &&
277 "Deleted Node added to Worklist");
278
279 // Skip handle nodes as they can't usefully be combined and confuse the
280 // zero-use deletion strategy.
281 if (N->getOpcode() == ISD::HANDLENODE)
282 return;
283
284 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
285 return;
286
287 if (IsCandidateForPruning)
288 ConsiderForPruning(N);
289
290 if (N->getCombinerWorklistIndex() < 0) {
291 N->setCombinerWorklistIndex(Worklist.size());
292 Worklist.push_back(N);
293 }
294 }
295
296 /// Remove all instances of N from the worklist.
297 void removeFromWorklist(SDNode *N) {
298 PruningList.remove(N);
299 StoreRootCountMap.erase(N);
300
301 int WorklistIndex = N->getCombinerWorklistIndex();
302 // If not in the worklist, the index might be -1 or -2 (was combined
303 // before). As the node gets deleted anyway, there's no need to update
304 // the index.
305 if (WorklistIndex < 0)
306 return; // Not in the worklist.
307
308 // Null out the entry rather than erasing it to avoid a linear operation.
309 Worklist[WorklistIndex] = nullptr;
310 N->setCombinerWorklistIndex(-1);
311 }
312
313 void deleteAndRecombine(SDNode *N);
314 bool recursivelyDeleteUnusedNodes(SDNode *N);
315
316 /// Replaces all uses of the results of one DAG node with new values.
317 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
318 bool AddTo = true);
319
320 /// Replaces all uses of the results of one DAG node with new values.
321 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
322 return CombineTo(N, &Res, 1, AddTo);
323 }
324
325 /// Replaces all uses of the results of one DAG node with new values.
326 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
327 bool AddTo = true) {
328 SDValue To[] = { Res0, Res1 };
329 return CombineTo(N, To, 2, AddTo);
330 }
331
332 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
333 bool AddTo = true) {
334 return CombineTo(N, To->data(), To->size(), AddTo);
335 }
336
337 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
338
339 private:
340 /// Check the specified integer node value to see if it can be simplified or
341 /// if things it uses can be simplified by bit propagation.
342 /// If so, return true.
343 bool SimplifyDemandedBits(SDValue Op) {
344 unsigned BitWidth = Op.getScalarValueSizeInBits();
345 APInt DemandedBits = APInt::getAllOnes(BitWidth);
346 return SimplifyDemandedBits(Op, DemandedBits);
347 }
348
349 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
350 EVT VT = Op.getValueType();
351 APInt DemandedElts = VT.isFixedLengthVector()
353 : APInt(1, 1);
354 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
355 }
356
357 /// Check the specified vector node value to see if it can be simplified or
358 /// if things it uses can be simplified as it only uses some of the
359 /// elements. If so, return true.
360 bool SimplifyDemandedVectorElts(SDValue Op) {
361 // TODO: For now just pretend it cannot be simplified.
362 if (Op.getValueType().isScalableVector())
363 return false;
364
365 unsigned NumElts = Op.getValueType().getVectorNumElements();
366 APInt DemandedElts = APInt::getAllOnes(NumElts);
367 return SimplifyDemandedVectorElts(Op, DemandedElts);
368 }
369
370 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
371 const APInt &DemandedElts,
372 bool AssumeSingleUse = false);
373 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
374 bool AssumeSingleUse = false);
375
376 bool CombineToPreIndexedLoadStore(SDNode *N);
377 bool CombineToPostIndexedLoadStore(SDNode *N);
378 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
379 bool SliceUpLoad(SDNode *N);
380
381 // Looks up the chain to find a unique (unaliased) store feeding the passed
382 // load. If no such store is found, returns a nullptr.
383 // Note: This will look past a CALLSEQ_START if the load is chained to it so
384 // so that it can find stack stores for byval params.
385 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
386 // Scalars have size 0 to distinguish from singleton vectors.
387 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
388 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
389 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
390
391 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
392 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
393 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
394 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
395 SDValue PromoteIntBinOp(SDValue Op);
396 SDValue PromoteIntShiftOp(SDValue Op);
397 SDValue PromoteExtend(SDValue Op);
398 bool PromoteLoad(SDValue Op);
399
400 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
401 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
402 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
403
404 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
405 SDValue RHS, SDValue True, SDValue False,
406 ISD::CondCode CC);
407
408 /// Call the node-specific routine that knows how to fold each
409 /// particular type of node. If that doesn't do anything, try the
410 /// target-specific DAG combines.
411 SDValue combine(SDNode *N);
412
413 // Visitation implementation - Implement dag node combining for different
414 // node types. The semantics are as follows:
415 // Return Value:
416 // SDValue.getNode() == 0 - No change was made
417 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
418 // otherwise - N should be replaced by the returned Operand.
419 //
420 SDValue visitTokenFactor(SDNode *N);
421 SDValue visitMERGE_VALUES(SDNode *N);
422 SDValue visitADD(SDNode *N);
423 SDValue visitADDLike(SDNode *N);
424 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
425 SDNode *LocReference);
426 SDValue visitPTRADD(SDNode *N);
427 SDValue visitSUB(SDNode *N);
428 SDValue visitADDSAT(SDNode *N);
429 SDValue visitSUBSAT(SDNode *N);
430 SDValue visitADDC(SDNode *N);
431 SDValue visitADDO(SDNode *N);
432 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
433 SDValue visitSUBC(SDNode *N);
434 SDValue visitSUBO(SDNode *N);
435 SDValue visitADDE(SDNode *N);
436 SDValue visitUADDO_CARRY(SDNode *N);
437 SDValue visitSADDO_CARRY(SDNode *N);
438 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
439 SDNode *N);
440 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
441 SDNode *N);
442 SDValue visitSUBE(SDNode *N);
443 SDValue visitUSUBO_CARRY(SDNode *N);
444 SDValue visitSSUBO_CARRY(SDNode *N);
445 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
446 SDValue visitMULFIX(SDNode *N);
447 SDValue useDivRem(SDNode *N);
448 SDValue visitSDIV(SDNode *N);
449 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
450 SDValue visitUDIV(SDNode *N);
451 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
452 SDValue visitREM(SDNode *N);
453 SDValue visitMULHU(SDNode *N);
454 SDValue visitMULHS(SDNode *N);
455 SDValue visitAVG(SDNode *N);
456 SDValue visitABD(SDNode *N);
457 SDValue visitSMUL_LOHI(SDNode *N);
458 SDValue visitUMUL_LOHI(SDNode *N);
459 SDValue visitMULO(SDNode *N);
460 SDValue visitIMINMAX(SDNode *N);
461 SDValue visitAND(SDNode *N);
462 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
463 SDValue visitOR(SDNode *N);
464 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
465 SDValue visitXOR(SDNode *N);
466 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
467 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
468 SDValue visitSHL(SDNode *N);
469 SDValue visitSRA(SDNode *N);
470 SDValue visitSRL(SDNode *N);
471 SDValue visitFunnelShift(SDNode *N);
472 SDValue visitSHLSAT(SDNode *N);
473 SDValue visitRotate(SDNode *N);
474 SDValue visitABS(SDNode *N);
475 SDValue visitCLMUL(SDNode *N);
476 SDValue visitBSWAP(SDNode *N);
477 SDValue visitBITREVERSE(SDNode *N);
478 SDValue visitCTLZ(SDNode *N);
479 SDValue visitCTLZ_ZERO_POISON(SDNode *N);
480 SDValue visitCTTZ(SDNode *N);
481 SDValue visitCTTZ_ZERO_POISON(SDNode *N);
482 SDValue visitCTPOP(SDNode *N);
483 SDValue visitSELECT(SDNode *N);
484 SDValue visitVSELECT(SDNode *N);
485 SDValue visitVP_SELECT(SDNode *N);
486 SDValue visitSELECT_CC(SDNode *N);
487 SDValue visitSETCC(SDNode *N);
488 SDValue visitSETCCCARRY(SDNode *N);
489 SDValue visitSIGN_EXTEND(SDNode *N);
490 SDValue visitZERO_EXTEND(SDNode *N);
491 SDValue visitANY_EXTEND(SDNode *N);
492 SDValue visitAssertExt(SDNode *N);
493 SDValue visitAssertAlign(SDNode *N);
494 SDValue visitIS_FPCLASS(SDNode *N);
495 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
496 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
497 SDValue visitTRUNCATE(SDNode *N);
498 SDValue visitTRUNCATE_USAT_U(SDNode *N);
499 SDValue visitBITCAST(SDNode *N);
500 SDValue visitFREEZE(SDNode *N);
501 SDValue visitBUILD_PAIR(SDNode *N);
502 SDValue visitFADD(SDNode *N);
503 SDValue visitVP_FADD(SDNode *N);
504 SDValue visitVP_FSUB(SDNode *N);
505 SDValue visitSTRICT_FADD(SDNode *N);
506 SDValue visitFSUB(SDNode *N);
507 SDValue visitFMUL(SDNode *N);
508 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
509 SDValue visitFMAD(SDNode *N);
510 SDValue visitFMULADD(SDNode *N);
511 SDValue visitFDIV(SDNode *N);
512 SDValue visitFREM(SDNode *N);
513 SDValue visitFSQRT(SDNode *N);
514 SDValue visitFCOPYSIGN(SDNode *N);
515 SDValue visitFPOW(SDNode *N);
516 SDValue visitFCANONICALIZE(SDNode *N);
517 SDValue visitSINT_TO_FP(SDNode *N);
518 SDValue visitUINT_TO_FP(SDNode *N);
519 SDValue visitFP_TO_SINT(SDNode *N);
520 SDValue visitFP_TO_UINT(SDNode *N);
521 SDValue visitXROUND(SDNode *N);
522 SDValue visitFP_ROUND(SDNode *N);
523 SDValue visitFP_EXTEND(SDNode *N);
524 SDValue visitFNEG(SDNode *N);
525 SDValue visitFABS(SDNode *N);
526 SDValue visitFCEIL(SDNode *N);
527 SDValue visitFTRUNC(SDNode *N);
528 SDValue visitFFREXP(SDNode *N);
529 SDValue visitFFLOOR(SDNode *N);
530 SDValue visitFMinMax(SDNode *N);
531 SDValue visitBRCOND(SDNode *N);
532 SDValue visitBR_CC(SDNode *N);
533 SDValue visitLOAD(SDNode *N);
534
535 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
536 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
537 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
538
539 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
540
541 SDValue visitSTORE(SDNode *N);
542 SDValue visitATOMIC_STORE(SDNode *N);
543 SDValue visitLIFETIME_END(SDNode *N);
544 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
545 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
546 SDValue visitBUILD_VECTOR(SDNode *N);
547 SDValue visitCONCAT_VECTORS(SDNode *N);
548 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
549 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
550 SDValue visitVECTOR_SHUFFLE(SDNode *N);
551 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
552 SDValue visitINSERT_SUBVECTOR(SDNode *N);
553 SDValue visitVECTOR_COMPRESS(SDNode *N);
554 SDValue visitMLOAD(SDNode *N);
555 SDValue visitMSTORE(SDNode *N);
556 SDValue visitMGATHER(SDNode *N);
557 SDValue visitMSCATTER(SDNode *N);
558 SDValue visitMHISTOGRAM(SDNode *N);
559 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
560 SDValue visitVPGATHER(SDNode *N);
561 SDValue visitVPSCATTER(SDNode *N);
562 SDValue visitVP_STRIDED_LOAD(SDNode *N);
563 SDValue visitVP_STRIDED_STORE(SDNode *N);
564 SDValue visitFP_TO_FP16(SDNode *N);
565 SDValue visitFP16_TO_FP(SDNode *N);
566 SDValue visitFP_TO_BF16(SDNode *N);
567 SDValue visitBF16_TO_FP(SDNode *N);
568 SDValue visitVECREDUCE(SDNode *N);
569 SDValue visitVPOp(SDNode *N);
570 SDValue visitGET_FPENV_MEM(SDNode *N);
571 SDValue visitSET_FPENV_MEM(SDNode *N);
572
573 template <class MatchContextClass>
574 SDValue visitFADDForFMACombine(SDNode *N);
575 template <class MatchContextClass>
576 SDValue visitFSUBForFMACombine(SDNode *N);
577 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
578
579 SDValue XformToShuffleWithZero(SDNode *N);
580 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
581 const SDLoc &DL,
582 SDNode *N,
583 SDValue N0,
584 SDValue N1);
585 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
586 SDValue N1, SDNodeFlags Flags);
587 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
588 SDValue N1, SDNodeFlags Flags);
589 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
590 EVT VT, SDValue N0, SDValue N1,
591 SDNodeFlags Flags = SDNodeFlags());
592
593 SDValue visitShiftByConstant(SDNode *N);
594
595 SDValue foldSelectOfConstants(SDNode *N);
596 SDValue foldVSelectOfConstants(SDNode *N);
597 SDValue foldBinOpIntoSelect(SDNode *BO);
598 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
599 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
600 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
601 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
602 SDValue N2, SDValue N3, ISD::CondCode CC,
603 bool NotExtCompare = false);
604 SDValue convertSelectOfFPConstantsToLoadOffset(
605 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
606 ISD::CondCode CC);
607 SDValue foldSignChangeInBitcast(SDNode *N);
608 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
609 SDValue N2, SDValue N3, ISD::CondCode CC);
610 SDValue foldSelectOfBinops(SDNode *N);
611 SDValue foldSextSetcc(SDNode *N);
612 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
613 const SDLoc &DL);
614 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
615 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
616 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
617 SDValue False, ISD::CondCode CC, const SDLoc &DL);
618 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
619 SDValue False, ISD::CondCode CC, const SDLoc &DL);
620 SDValue unfoldMaskedMerge(SDNode *N);
621 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
622 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
623 const SDLoc &DL, bool foldBooleans);
624 SDValue rebuildSetCC(SDValue N);
625
626 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
627 SDValue &CC, bool MatchStrict = false) const;
628 bool isOneUseSetCC(SDValue N) const;
629
630 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
631 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
632
633 SDValue foldCTLZToCTLS(SDValue Src, const SDLoc &DL);
634
635 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
636 unsigned HiOp);
637 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
638 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
639 const TargetLowering &TLI);
640 SDValue foldPartialReduceMLAMulOp(SDNode *N);
641 SDValue foldPartialReduceAdd(SDNode *N);
642
643 SDValue CombineExtLoad(SDNode *N);
644 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
645 SDValue combineRepeatedFPDivisors(SDNode *N);
646 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
647 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
648 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
649 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
650 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
651 SDValue BuildSDIV(SDNode *N);
652 SDValue BuildSDIVPow2(SDNode *N);
653 SDValue BuildUDIV(SDNode *N);
654 SDValue BuildSREMPow2(SDNode *N);
655 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
656 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
657 bool KnownNeverZero = false,
658 bool InexpensiveOnly = false,
659 std::optional<EVT> OutVT = std::nullopt);
660 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
661 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
662 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
663 SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip, SDNodeFlags Flags);
664 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
665 bool Reciprocal);
666 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
667 bool Reciprocal);
668 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
669 bool DemandHighBits = true);
670 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
671 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
672 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
673 bool HasPos, unsigned PosOpcode,
674 unsigned NegOpcode, const SDLoc &DL);
675 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
676 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
677 bool HasPos, unsigned PosOpcode,
678 unsigned NegOpcode, const SDLoc &DL);
679 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
680 bool FromAdd);
681 SDValue MatchLoadCombine(SDNode *N);
682 SDValue mergeTruncStores(StoreSDNode *N);
683 SDValue reduceLoadWidth(SDNode *N);
684 SDValue ReduceLoadOpStoreWidth(SDNode *N);
685 SDValue splitMergedValStore(StoreSDNode *ST);
686 SDValue TransformFPLoadStorePair(SDNode *N);
687 SDValue convertBuildVecExtToExt(SDNode *N);
688 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
689 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
690 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
691 SDValue reduceBuildVecToShuffle(SDNode *N);
692 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
693 ArrayRef<int> VectorMask, SDValue VecIn1,
694 SDValue VecIn2, unsigned LeftIdx,
695 bool DidSplitVec);
696 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
697
698 /// Walk up chain skipping non-aliasing memory nodes,
699 /// looking for aliasing nodes and adding them to the Aliases vector.
700 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
701 SmallVectorImpl<SDValue> &Aliases);
702
703 /// Return true if there is any possibility that the two addresses overlap.
704 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
705
706 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
707 /// chain (aliasing node.)
708 SDValue FindBetterChain(SDNode *N, SDValue Chain);
709
710 /// Try to replace a store and any possibly adjacent stores on
711 /// consecutive chains with better chains. Return true only if St is
712 /// replaced.
713 ///
714 /// Notice that other chains may still be replaced even if the function
715 /// returns false.
716 bool findBetterNeighborChains(StoreSDNode *St);
717
718 // Helper for findBetterNeighborChains. Walk up store chain add additional
719 // chained stores that do not overlap and can be parallelized.
720 bool parallelizeChainedStores(StoreSDNode *St);
721
722 /// Holds a pointer to an LSBaseSDNode as well as information on where it
723 /// is located in a sequence of memory operations connected by a chain.
724 struct MemOpLink {
725 // Ptr to the mem node.
726 LSBaseSDNode *MemNode;
727
728 // Offset from the base ptr.
729 int64_t OffsetFromBase;
730
731 MemOpLink(LSBaseSDNode *N, int64_t Offset)
732 : MemNode(N), OffsetFromBase(Offset) {}
733 };
734
735 // Classify the origin of a stored value.
736 enum class StoreSource { Unknown, Constant, Extract, Load };
737 StoreSource getStoreSource(SDValue StoreVal) {
738 switch (StoreVal.getOpcode()) {
739 case ISD::Constant:
740 case ISD::ConstantFP:
741 return StoreSource::Constant;
745 return StoreSource::Constant;
746 return StoreSource::Unknown;
749 return StoreSource::Extract;
750 case ISD::LOAD:
751 return StoreSource::Load;
752 default:
753 return StoreSource::Unknown;
754 }
755 }
756
757 /// This is a helper function for visitMUL to check the profitability
758 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
759 /// MulNode is the original multiply, AddNode is (add x, c1),
760 /// and ConstNode is c2.
761 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
762 SDValue ConstNode);
763
764 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
765 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
766 /// the type of the loaded value to be extended.
767 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
768 EVT LoadResultTy, EVT &ExtVT);
769
770 /// Helper function to calculate whether the given Load/Store can have its
771 /// width reduced to ExtVT.
772 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
773 EVT &MemVT, unsigned ShAmt = 0);
774
775 /// Used by BackwardsPropagateMask to find suitable loads.
776 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
777 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
778 ConstantSDNode *Mask, SDNode *&NodeToMask);
779 /// Attempt to propagate a given AND node back to load leaves so that they
780 /// can be combined into narrow loads.
781 bool BackwardsPropagateMask(SDNode *N);
782
783 /// Helper function for mergeConsecutiveStores which merges the component
784 /// store chains.
785 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
786 unsigned NumStores);
787
788 /// Helper function for mergeConsecutiveStores which checks if all the store
789 /// nodes have the same underlying object. We can still reuse the first
790 /// store's pointer info if all the stores are from the same object.
791 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
792
793 /// This is a helper function for mergeConsecutiveStores. When the source
794 /// elements of the consecutive stores are all constants or all extracted
795 /// vector elements, try to merge them into one larger store introducing
796 /// bitcasts if necessary. \return True if a merged store was created.
797 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
798 EVT MemVT, unsigned NumStores,
799 bool IsConstantSrc, bool UseVector,
800 bool UseTrunc);
801
802 /// This is a helper function for mergeConsecutiveStores. Stores that
803 /// potentially may be merged with St are placed in StoreNodes. On success,
804 /// returns a chain predecessor to all store candidates.
805 SDNode *getStoreMergeCandidates(StoreSDNode *St,
806 SmallVectorImpl<MemOpLink> &StoreNodes);
807
808 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
809 /// have indirect dependency through their operands. RootNode is the
810 /// predecessor to all stores calculated by getStoreMergeCandidates and is
811 /// used to prune the dependency check. \return True if safe to merge.
812 bool checkMergeStoreCandidatesForDependencies(
813 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
814 SDNode *RootNode);
815
816 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
817 /// chain has a call in it. \return True if a call is found.
818 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
819
820 /// This is a helper function for mergeConsecutiveStores. Given a list of
821 /// store candidates, find the first N that are consecutive in memory.
822 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
823 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
824 int64_t ElementSizeBytes) const;
825
826 /// This is a helper function for mergeConsecutiveStores. It is used for
827 /// store chains that are composed entirely of constant values.
828 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
829 unsigned NumConsecutiveStores,
830 EVT MemVT, SDNode *Root, bool AllowVectors);
831
832 /// This is a helper function for mergeConsecutiveStores. It is used for
833 /// store chains that are composed entirely of extracted vector elements.
834 /// When extracting multiple vector elements, try to store them in one
835 /// vector store rather than a sequence of scalar stores.
836 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
837 unsigned NumConsecutiveStores, EVT MemVT,
838 SDNode *Root);
839
840 /// This is a helper function for mergeConsecutiveStores. It is used for
841 /// store chains that are composed entirely of loaded values.
842 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
843 unsigned NumConsecutiveStores, EVT MemVT,
844 SDNode *Root, bool AllowVectors,
845 bool IsNonTemporalStore, bool IsNonTemporalLoad);
846
847 /// Merge consecutive store operations into a wide store.
848 /// This optimization uses wide integers or vectors when possible.
849 /// \return true if stores were merged.
850 bool mergeConsecutiveStores(StoreSDNode *St);
851
852 /// Try to transform a truncation where C is a constant:
853 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
854 ///
855 /// \p N needs to be a truncation and its first operand an AND. Other
856 /// requirements are checked by the function (e.g. that trunc is
857 /// single-use) and if missed an empty SDValue is returned.
858 SDValue distributeTruncateThroughAnd(SDNode *N);
859
860 /// Helper function to determine whether the target supports operation
861 /// given by \p Opcode for type \p VT, that is, whether the operation
862 /// is legal or custom before legalizing operations, and whether is
863 /// legal (but not custom) after legalization.
864 bool hasOperation(unsigned Opcode, EVT VT) {
865 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
866 }
867
868 bool hasUMin(EVT VT) const {
869 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
870 return (LK.first == TargetLoweringBase::TypeLegal ||
872 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
873 }
874
875 public:
876 /// Runs the dag combiner on all nodes in the work list
877 void Run(CombineLevel AtLevel);
878
879 SelectionDAG &getDAG() const { return DAG; }
880
881 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
882 EVT getShiftAmountTy(EVT LHSTy) {
883 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
884 }
885
886 /// This method returns true if we are running before type legalization or
887 /// if the specified VT is legal.
888 bool isTypeLegal(const EVT &VT) {
889 if (!LegalTypes) return true;
890 return TLI.isTypeLegal(VT);
891 }
892
893 /// Convenience wrapper around TargetLowering::getSetCCResultType
894 EVT getSetCCResultType(EVT VT) const {
895 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
896 }
897
898 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
899 SDValue OrigLoad, SDValue ExtLoad,
900 ISD::NodeType ExtType);
901 };
902
903/// This class is a DAGUpdateListener that removes any deleted
904/// nodes from the worklist.
905class WorklistRemover : public SelectionDAG::DAGUpdateListener {
906 DAGCombiner &DC;
907
908public:
909 explicit WorklistRemover(DAGCombiner &dc)
910 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
911
912 void NodeDeleted(SDNode *N, SDNode *E) override {
913 DC.removeFromWorklist(N);
914 }
915};
916
917class WorklistInserter : public SelectionDAG::DAGUpdateListener {
918 DAGCombiner &DC;
919
920public:
921 explicit WorklistInserter(DAGCombiner &dc)
922 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
923
924 // FIXME: Ideally we could add N to the worklist, but this causes exponential
925 // compile time costs in large DAGs, e.g. Halide.
926 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
927};
928
929} // end anonymous namespace
930
931//===----------------------------------------------------------------------===//
932// TargetLowering::DAGCombinerInfo implementation
933//===----------------------------------------------------------------------===//
934
936 ((DAGCombiner*)DC)->AddToWorklist(N);
937}
938
940CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
941 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
942}
943
945CombineTo(SDNode *N, SDValue Res, bool AddTo) {
946 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
947}
948
950CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
951 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
952}
953
956 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
957}
958
961 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
962}
963
964//===----------------------------------------------------------------------===//
965// Helper Functions
966//===----------------------------------------------------------------------===//
967
968void DAGCombiner::deleteAndRecombine(SDNode *N) {
969 removeFromWorklist(N);
970
971 // If the operands of this node are only used by the node, they will now be
972 // dead. Make sure to re-visit them and recursively delete dead nodes.
973 for (const SDValue &Op : N->ops())
974 // For an operand generating multiple values, one of the values may
975 // become dead allowing further simplification (e.g. split index
976 // arithmetic from an indexed load).
977 if (Op->hasOneUse() || Op->getNumValues() > 1)
978 AddToWorklist(Op.getNode());
979
980 DAG.DeleteNode(N);
981}
982
983// APInts must be the same size for most operations, this helper
984// function zero extends the shorter of the pair so that they match.
985// We provide an Offset so that we can create bitwidths that won't overflow.
986static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
987 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
988 LHS = LHS.zext(Bits);
989 RHS = RHS.zext(Bits);
990}
991
992// Return true if this node is a setcc, or is a select_cc
993// that selects between the target values used for true and false, making it
994// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
995// the appropriate nodes based on the type of node we are checking. This
996// simplifies life a bit for the callers.
997bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
998 SDValue &CC, bool MatchStrict) const {
999 if (N.getOpcode() == ISD::SETCC) {
1000 LHS = N.getOperand(0);
1001 RHS = N.getOperand(1);
1002 CC = N.getOperand(2);
1003 return true;
1004 }
1005
1006 if (MatchStrict &&
1007 (N.getOpcode() == ISD::STRICT_FSETCC ||
1008 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1009 LHS = N.getOperand(1);
1010 RHS = N.getOperand(2);
1011 CC = N.getOperand(3);
1012 return true;
1013 }
1014
1015 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1016 !TLI.isConstFalseVal(N.getOperand(3)))
1017 return false;
1018
1019 if (TLI.getBooleanContents(N.getValueType()) ==
1021 return false;
1022
1023 LHS = N.getOperand(0);
1024 RHS = N.getOperand(1);
1025 CC = N.getOperand(4);
1026 return true;
1027}
1028
1029/// Return true if this is a SetCC-equivalent operation with only one use.
1030/// If this is true, it allows the users to invert the operation for free when
1031/// it is profitable to do so.
1032bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1033 SDValue N0, N1, N2;
1034 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1035 return true;
1036 return false;
1037}
1038
1040 if (!ScalarTy.isSimple())
1041 return false;
1042
1043 uint64_t MaskForTy = 0ULL;
1044 switch (ScalarTy.getSimpleVT().SimpleTy) {
1045 case MVT::i8:
1046 MaskForTy = 0xFFULL;
1047 break;
1048 case MVT::i16:
1049 MaskForTy = 0xFFFFULL;
1050 break;
1051 case MVT::i32:
1052 MaskForTy = 0xFFFFFFFFULL;
1053 break;
1054 default:
1055 return false;
1056 break;
1057 }
1058
1059 APInt Val;
1060 if (ISD::isConstantSplatVector(N, Val))
1061 return Val.getLimitedValue() == MaskForTy;
1062
1063 return false;
1064}
1065
1066// Determines if it is a constant integer or a splat/build vector of constant
1067// integers (and undefs).
1068// Do not permit build vector implicit truncation unless AllowTruncation is set.
1069static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1070 bool AllowTruncation = false) {
1072 return !(Const->isOpaque() && NoOpaques);
1073 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1074 return false;
1075 unsigned BitWidth = N.getScalarValueSizeInBits();
1076 for (const SDValue &Op : N->op_values()) {
1077 if (Op.isUndef())
1078 continue;
1080 if (!Const || (Const->isOpaque() && NoOpaques))
1081 return false;
1082 // When AllowTruncation is true, allow constants that have been promoted
1083 // during type legalization as long as the value fits in the target type.
1084 if ((AllowTruncation &&
1085 Const->getAPIntValue().getActiveBits() > BitWidth) ||
1086 (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
1087 return false;
1088 }
1089 return true;
1090}
1091
1092// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1093// undef's.
1094static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1095 if (V.getOpcode() != ISD::BUILD_VECTOR)
1096 return false;
1097 return isConstantOrConstantVector(V, NoOpaques) ||
1099}
1100
1101// Determine if this an indexed load with an opaque target constant index.
1102static bool canSplitIdx(LoadSDNode *LD) {
1103 return MaySplitLoadIndex &&
1104 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1105 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1106}
1107
1108bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1109 const SDLoc &DL,
1110 SDNode *N,
1111 SDValue N0,
1112 SDValue N1) {
1113 // Currently this only tries to ensure we don't undo the GEP splits done by
1114 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1115 // we check if the following transformation would be problematic:
1116 // (load/store (add, (add, x, offset1), offset2)) ->
1117 // (load/store (add, x, offset1+offset2)).
1118
1119 // (load/store (add, (add, x, y), offset2)) ->
1120 // (load/store (add, (add, x, offset2), y)).
1121
1122 if (!N0.isAnyAdd())
1123 return false;
1124
1125 // Check for vscale addressing modes.
1126 // (load/store (add/sub (add x, y), vscale))
1127 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1128 // (load/store (add/sub (add x, y), (mul vscale, C)))
1129 if ((N1.getOpcode() == ISD::VSCALE ||
1130 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1131 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1133 N1.getValueType().getFixedSizeInBits() <= 64) {
1134 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1135 ? N1.getConstantOperandVal(0)
1136 : (N1.getOperand(0).getConstantOperandVal(0) *
1137 (N1.getOpcode() == ISD::SHL
1138 ? (1LL << N1.getConstantOperandVal(1))
1139 : N1.getConstantOperandVal(1)));
1140 if (Opc == ISD::SUB)
1141 ScalableOffset = -ScalableOffset;
1142 if (all_of(N->users(), [&](SDNode *Node) {
1143 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1144 LoadStore && LoadStore->hasUniqueMemOperand() &&
1145 LoadStore->getBasePtr().getNode() == N) {
1146 TargetLoweringBase::AddrMode AM;
1147 AM.HasBaseReg = true;
1148 AM.ScalableOffset = ScalableOffset;
1149 EVT VT = LoadStore->getMemoryVT();
1150 unsigned AS = LoadStore->getAddressSpace();
1151 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1152 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1153 AS);
1154 }
1155 return false;
1156 }))
1157 return true;
1158 }
1159
1160 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1161 return false;
1162
1163 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1164 if (!C2)
1165 return false;
1166
1167 const APInt &C2APIntVal = C2->getAPIntValue();
1168 if (C2APIntVal.getSignificantBits() > 64)
1169 return false;
1170
1171 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1172 if (N0.hasOneUse())
1173 return false;
1174
1175 const APInt &C1APIntVal = C1->getAPIntValue();
1176 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1177 if (CombinedValueIntVal.getSignificantBits() > 64)
1178 return false;
1179 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1180
1181 for (SDNode *Node : N->users()) {
1182 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1183 if (!LoadStore->hasUniqueMemOperand())
1184 continue;
1185 // Is x[offset2] already not a legal addressing mode? If so then
1186 // reassociating the constants breaks nothing (we test offset2 because
1187 // that's the one we hope to fold into the load or store).
1188 TargetLoweringBase::AddrMode AM;
1189 AM.HasBaseReg = true;
1190 AM.BaseOffs = C2APIntVal.getSExtValue();
1191 EVT VT = LoadStore->getMemoryVT();
1192 unsigned AS = LoadStore->getAddressSpace();
1193 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1194 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1195 continue;
1196
1197 // Would x[offset1+offset2] still be a legal addressing mode?
1198 AM.BaseOffs = CombinedValue;
1199 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1200 return true;
1201 }
1202 }
1203 } else {
1204 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1205 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1206 return false;
1207
1208 for (SDNode *Node : N->users()) {
1209 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1210 if (!LoadStore || !LoadStore->hasUniqueMemOperand())
1211 return false;
1212
1213 // Is x[offset2] a legal addressing mode? If so then
1214 // reassociating the constants breaks address pattern
1215 TargetLoweringBase::AddrMode AM;
1216 AM.HasBaseReg = true;
1217 AM.BaseOffs = C2APIntVal.getSExtValue();
1218 EVT VT = LoadStore->getMemoryVT();
1219 unsigned AS = LoadStore->getAddressSpace();
1220 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1221 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1222 return false;
1223 }
1224 return true;
1225 }
1226
1227 return false;
1228}
1229
1230/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1231/// \p N0 is the same kind of operation as \p Opc.
1232SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1233 SDValue N0, SDValue N1,
1234 SDNodeFlags Flags) {
1235 EVT VT = N0.getValueType();
1236
1237 if (N0.getOpcode() != Opc)
1238 return SDValue();
1239
1240 SDValue N00 = N0.getOperand(0);
1241 SDValue N01 = N0.getOperand(1);
1242
1244 SDNodeFlags NewFlags;
1245 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1246 Flags.hasNoUnsignedWrap())
1247 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1248
1250 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1251 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1252 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1253 N0->getFlags().hasDisjoint());
1254 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1255 }
1256 return SDValue();
1257 }
1258 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1259 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1260 // iff (op x, c1) has one use
1261 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1262 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1263 }
1264 }
1265
1266 // Check for repeated operand logic simplifications.
1267 if (Opc == ISD::AND || Opc == ISD::OR) {
1268 // (N00 & N01) & N00 --> N00 & N01
1269 // (N00 & N01) & N01 --> N00 & N01
1270 // (N00 | N01) | N00 --> N00 | N01
1271 // (N00 | N01) | N01 --> N00 | N01
1272 if (N1 == N00 || N1 == N01)
1273 return N0;
1274 }
1275 if (Opc == ISD::XOR) {
1276 // (N00 ^ N01) ^ N00 --> N01
1277 if (N1 == N00)
1278 return N01;
1279 // (N00 ^ N01) ^ N01 --> N00
1280 if (N1 == N01)
1281 return N00;
1282 }
1283
1284 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1285 if (N1 != N01) {
1286 // Reassociate if (op N00, N1) already exist
1287 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1288 // if Op (Op N00, N1), N01 already exist
1289 // we need to stop reassciate to avoid dead loop
1290 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1291 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1292 }
1293 }
1294
1295 if (N1 != N00) {
1296 // Reassociate if (op N01, N1) already exist
1297 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1298 // if Op (Op N01, N1), N00 already exist
1299 // we need to stop reassciate to avoid dead loop
1300 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1301 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1302 }
1303 }
1304
1305 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1306 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1307 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1308 // comparisons with the same predicate. This enables optimizations as the
1309 // following one:
1310 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1311 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1312 if (Opc == ISD::AND || Opc == ISD::OR) {
1313 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1314 N01->getOpcode() == ISD::SETCC) {
1315 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1316 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1317 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1318 if (CC1 == CC00 && CC1 != CC01) {
1319 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1320 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1321 }
1322 if (CC1 == CC01 && CC1 != CC00) {
1323 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1324 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1325 }
1326 }
1327 }
1328 }
1329
1330 return SDValue();
1331}
1332
1333/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1334/// same kind of operation as \p Opc.
1335SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1336 SDValue N1, SDNodeFlags Flags) {
1337 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1338
1339 // Floating-point reassociation is not allowed without loose FP math.
1340 if (N0.getValueType().isFloatingPoint() ||
1342 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1343 return SDValue();
1344
1345 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1346 return Combined;
1347 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1348 return Combined;
1349 return SDValue();
1350}
1351
1352// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1353// Note that we only expect Flags to be passed from FP operations. For integer
1354// operations they need to be dropped.
1355SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1356 const SDLoc &DL, EVT VT, SDValue N0,
1357 SDValue N1, SDNodeFlags Flags) {
1358 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1359 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1360 N0->hasOneUse() && N1->hasOneUse() &&
1362 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1363 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1364 return DAG.getNode(RedOpc, DL, VT,
1365 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1366 N0.getOperand(0), N1.getOperand(0)));
1367 }
1368
1369 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1370 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1371 // single node.
1372 SDValue A, B, C, D, RedA, RedB;
1373 if (sd_match(N0, m_OneUse(m_c_BinOp(
1374 Opc,
1375 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1376 m_Value(RedA)),
1377 m_Value(B)))) &&
1379 Opc,
1380 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1381 m_Value(RedB)),
1382 m_Value(D)))) &&
1383 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1384 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1385 A.getValueType() == C.getValueType() &&
1386 hasOperation(Opc, A.getValueType()) &&
1387 TLI.shouldReassociateReduction(RedOpc, VT)) {
1388 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1389 (!N0->getFlags().hasAllowReassociation() ||
1391 !RedA->getFlags().hasAllowReassociation() ||
1392 !RedB->getFlags().hasAllowReassociation()))
1393 return SDValue();
1394 SelectionDAG::FlagInserter FlagsInserter(
1395 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1396 RedB->getFlags());
1397 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1398 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1399 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1400 return DAG.getNode(Opc, DL, VT, Red, Op2);
1401 }
1402 return SDValue();
1403}
1404
1405SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1406 bool AddTo) {
1407 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1408 ++NodesCombined;
1409 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1410 To[0].dump(&DAG);
1411 dbgs() << " and " << NumTo - 1 << " other values\n");
1412 for (unsigned i = 0, e = NumTo; i != e; ++i)
1413 assert((!To[i].getNode() ||
1414 N->getValueType(i) == To[i].getValueType()) &&
1415 "Cannot combine value to value of different type!");
1416
1417 WorklistRemover DeadNodes(*this);
1418 DAG.ReplaceAllUsesWith(N, To);
1419 if (AddTo) {
1420 // Push the new nodes and any users onto the worklist
1421 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1422 if (To[i].getNode())
1423 AddToWorklistWithUsers(To[i].getNode());
1424 }
1425 }
1426
1427 // Finally, if the node is now dead, remove it from the graph. The node
1428 // may not be dead if the replacement process recursively simplified to
1429 // something else needing this node.
1430 if (N->use_empty())
1431 deleteAndRecombine(N);
1432 return SDValue(N, 0);
1433}
1434
1435void DAGCombiner::
1436CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1437 // Replace the old value with the new one.
1438 ++NodesCombined;
1439 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1440 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1441
1442 // Replace all uses.
1443 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1444
1445 // Push the new node and any (possibly new) users onto the worklist.
1446 AddToWorklistWithUsers(TLO.New.getNode());
1447
1448 // Finally, if the node is now dead, remove it from the graph.
1449 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1450}
1451
1452/// Check the specified integer node value to see if it can be simplified or if
1453/// things it uses can be simplified by bit propagation. If so, return true.
1454bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1455 const APInt &DemandedElts,
1456 bool AssumeSingleUse) {
1457 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1458 KnownBits Known;
1459 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1460 AssumeSingleUse))
1461 return false;
1462
1463 // Revisit the node.
1464 AddToWorklist(Op.getNode());
1465
1466 CommitTargetLoweringOpt(TLO);
1467 return true;
1468}
1469
1470/// Check the specified vector node value to see if it can be simplified or
1471/// if things it uses can be simplified as it only uses some of the elements.
1472/// If so, return true.
1473bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1474 const APInt &DemandedElts,
1475 bool AssumeSingleUse) {
1476 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1477 APInt KnownUndef, KnownZero;
1478 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1479 TLO, 0, AssumeSingleUse))
1480 return false;
1481
1482 // Revisit the node.
1483 AddToWorklist(Op.getNode());
1484
1485 CommitTargetLoweringOpt(TLO);
1486 return true;
1487}
1488
1489void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1490 SDLoc DL(Load);
1491 EVT VT = Load->getValueType(0);
1492 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1493
1494 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1495 Trunc.dump(&DAG); dbgs() << '\n');
1496
1497 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1498 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1499
1500 AddToWorklist(Trunc.getNode());
1501 recursivelyDeleteUnusedNodes(Load);
1502}
1503
1504SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1505 Replace = false;
1506 SDLoc DL(Op);
1507 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1508 LoadSDNode *LD = cast<LoadSDNode>(Op);
1509 EVT MemVT = LD->getMemoryVT();
1511 : LD->getExtensionType();
1512 Replace = true;
1513 return DAG.getExtLoad(ExtType, DL, PVT,
1514 LD->getChain(), LD->getBasePtr(),
1515 MemVT, LD->getMemOperand());
1516 }
1517
1518 unsigned Opc = Op.getOpcode();
1519 switch (Opc) {
1520 default: break;
1521 case ISD::AssertSext:
1522 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1523 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1524 break;
1525 case ISD::AssertZext:
1526 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1527 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1528 break;
1529 case ISD::Constant: {
1530 unsigned ExtOpc =
1531 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1532 return DAG.getNode(ExtOpc, DL, PVT, Op);
1533 }
1534 }
1535
1536 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1537 return SDValue();
1538 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1539}
1540
1541SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1543 return SDValue();
1544 EVT OldVT = Op.getValueType();
1545 SDLoc DL(Op);
1546 bool Replace = false;
1547 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1548 if (!NewOp.getNode())
1549 return SDValue();
1550 AddToWorklist(NewOp.getNode());
1551
1552 if (Replace)
1553 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1554 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1555 DAG.getValueType(OldVT));
1556}
1557
1558SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1559 EVT OldVT = Op.getValueType();
1560 SDLoc DL(Op);
1561 bool Replace = false;
1562 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1563 if (!NewOp.getNode())
1564 return SDValue();
1565 AddToWorklist(NewOp.getNode());
1566
1567 if (Replace)
1568 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1569 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1570}
1571
1572/// Promote the specified integer binary operation if the target indicates it is
1573/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1574/// i32 since i16 instructions are longer.
1575SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1576 if (!LegalOperations)
1577 return SDValue();
1578
1579 EVT VT = Op.getValueType();
1580 if (VT.isVector() || !VT.isInteger())
1581 return SDValue();
1582
1583 // If operation type is 'undesirable', e.g. i16 on x86, consider
1584 // promoting it.
1585 unsigned Opc = Op.getOpcode();
1586 if (TLI.isTypeDesirableForOp(Opc, VT))
1587 return SDValue();
1588
1589 EVT PVT = VT;
1590 // Consult target whether it is a good idea to promote this operation and
1591 // what's the right type to promote it to.
1592 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1593 assert(PVT != VT && "Don't know what type to promote to!");
1594
1595 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1596
1597 bool Replace0 = false;
1598 SDValue N0 = Op.getOperand(0);
1599 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1600
1601 bool Replace1 = false;
1602 SDValue N1 = Op.getOperand(1);
1603 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1604 SDLoc DL(Op);
1605
1606 SDValue RV =
1607 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1608
1609 // We are always replacing N0/N1's use in N and only need additional
1610 // replacements if there are additional uses.
1611 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1612 // (SDValue) here because the node may reference multiple values
1613 // (for example, the chain value of a load node).
1614 Replace0 &= !N0->hasOneUse();
1615 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1616
1617 // Combine Op here so it is preserved past replacements.
1618 CombineTo(Op.getNode(), RV);
1619
1620 // If operands have a use ordering, make sure we deal with
1621 // predecessor first.
1622 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1623 std::swap(N0, N1);
1624 std::swap(NN0, NN1);
1625 }
1626
1627 if (Replace0) {
1628 AddToWorklist(NN0.getNode());
1629 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1630 }
1631 if (Replace1) {
1632 AddToWorklist(NN1.getNode());
1633 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1634 }
1635 return Op;
1636 }
1637 return SDValue();
1638}
1639
1640/// Promote the specified integer shift operation if the target indicates it is
1641/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1642/// i32 since i16 instructions are longer.
1643SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1644 if (!LegalOperations)
1645 return SDValue();
1646
1647 EVT VT = Op.getValueType();
1648 if (VT.isVector() || !VT.isInteger())
1649 return SDValue();
1650
1651 // If operation type is 'undesirable', e.g. i16 on x86, consider
1652 // promoting it.
1653 unsigned Opc = Op.getOpcode();
1654 if (TLI.isTypeDesirableForOp(Opc, VT))
1655 return SDValue();
1656
1657 EVT PVT = VT;
1658 // Consult target whether it is a good idea to promote this operation and
1659 // what's the right type to promote it to.
1660 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1661 assert(PVT != VT && "Don't know what type to promote to!");
1662
1663 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1664
1665 SDNodeFlags TruncFlags;
1666 bool Replace = false;
1667 SDValue N0 = Op.getOperand(0);
1668 if (Opc == ISD::SRA) {
1669 N0 = SExtPromoteOperand(N0, PVT);
1670 } else if (Opc == ISD::SRL) {
1671 N0 = ZExtPromoteOperand(N0, PVT);
1672 } else {
1673 if (Op->getFlags().hasNoUnsignedWrap()) {
1674 N0 = ZExtPromoteOperand(N0, PVT);
1675 TruncFlags = SDNodeFlags::NoUnsignedWrap;
1676 } else if (Op->getFlags().hasNoSignedWrap()) {
1677 N0 = SExtPromoteOperand(N0, PVT);
1678 TruncFlags = SDNodeFlags::NoSignedWrap;
1679 } else {
1680 N0 = PromoteOperand(N0, PVT, Replace);
1681 }
1682 }
1683
1684 if (!N0.getNode())
1685 return SDValue();
1686
1687 SDLoc DL(Op);
1688 SDValue N1 = Op.getOperand(1);
1689 SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT,
1690 DAG.getNode(Opc, DL, PVT, N0, N1), TruncFlags);
1691
1692 if (Replace)
1693 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1694
1695 // Deal with Op being deleted.
1696 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1697 return RV;
1698 }
1699 return SDValue();
1700}
1701
1702SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1703 if (!LegalOperations)
1704 return SDValue();
1705
1706 EVT VT = Op.getValueType();
1707 if (VT.isVector() || !VT.isInteger())
1708 return SDValue();
1709
1710 // If operation type is 'undesirable', e.g. i16 on x86, consider
1711 // promoting it.
1712 unsigned Opc = Op.getOpcode();
1713 if (TLI.isTypeDesirableForOp(Opc, VT))
1714 return SDValue();
1715
1716 EVT PVT = VT;
1717 // Consult target whether it is a good idea to promote this operation and
1718 // what's the right type to promote it to.
1719 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1720 assert(PVT != VT && "Don't know what type to promote to!");
1721 // fold (aext (aext x)) -> (aext x)
1722 // fold (aext (zext x)) -> (zext x)
1723 // fold (aext (sext x)) -> (sext x)
1724 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1725 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1726 }
1727 return SDValue();
1728}
1729
1730bool DAGCombiner::PromoteLoad(SDValue Op) {
1731 if (!LegalOperations)
1732 return false;
1733
1734 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1735 return false;
1736
1737 EVT VT = Op.getValueType();
1738 if (VT.isVector() || !VT.isInteger())
1739 return false;
1740
1741 // If operation type is 'undesirable', e.g. i16 on x86, consider
1742 // promoting it.
1743 unsigned Opc = Op.getOpcode();
1744 if (TLI.isTypeDesirableForOp(Opc, VT))
1745 return false;
1746
1747 EVT PVT = VT;
1748 // Consult target whether it is a good idea to promote this operation and
1749 // what's the right type to promote it to.
1750 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1751 assert(PVT != VT && "Don't know what type to promote to!");
1752
1753 SDLoc DL(Op);
1754 SDNode *N = Op.getNode();
1755 LoadSDNode *LD = cast<LoadSDNode>(N);
1756 EVT MemVT = LD->getMemoryVT();
1758 : LD->getExtensionType();
1759 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1760 LD->getChain(), LD->getBasePtr(),
1761 MemVT, LD->getMemOperand());
1762 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1763
1764 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1765 Result.dump(&DAG); dbgs() << '\n');
1766
1767 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1768 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1769
1770 AddToWorklist(Result.getNode());
1771 recursivelyDeleteUnusedNodes(N);
1772 return true;
1773 }
1774
1775 return false;
1776}
1777
1778/// Recursively delete a node which has no uses and any operands for
1779/// which it is the only use.
1780///
1781/// Note that this both deletes the nodes and removes them from the worklist.
1782/// It also adds any nodes who have had a user deleted to the worklist as they
1783/// may now have only one use and subject to other combines.
1784bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1785 if (!N->use_empty())
1786 return false;
1787
1788 SmallSetVector<SDNode *, 16> Nodes;
1789 Nodes.insert(N);
1790 do {
1791 N = Nodes.pop_back_val();
1792 if (!N)
1793 continue;
1794
1795 if (N->use_empty()) {
1796 for (const SDValue &ChildN : N->op_values())
1797 Nodes.insert(ChildN.getNode());
1798
1799 removeFromWorklist(N);
1800 DAG.DeleteNode(N);
1801 } else {
1802 AddToWorklist(N);
1803 }
1804 } while (!Nodes.empty());
1805 return true;
1806}
1807
1808//===----------------------------------------------------------------------===//
1809// Main DAG Combiner implementation
1810//===----------------------------------------------------------------------===//
1811
1812void DAGCombiner::Run(CombineLevel AtLevel) {
1813 // set the instance variables, so that the various visit routines may use it.
1814 Level = AtLevel;
1815 LegalDAG = Level >= AfterLegalizeDAG;
1816 LegalOperations = Level >= AfterLegalizeVectorOps;
1817 LegalTypes = Level >= AfterLegalizeTypes;
1818
1819 bool UseTopologicalSorting = EnableTopologicalSorting.getNumOccurrences() > 0
1821 : TLI.useTopologicalSorting();
1822
1823 WorklistInserter AddNodes(*this);
1824
1825 if (UseTopologicalSorting)
1827
1828 // Add all the dag nodes to the worklist.
1829 //
1830 // Note: All nodes are not added to PruningList here, this is because the only
1831 // nodes which can be deleted are those which have no uses and all other nodes
1832 // which would otherwise be added to the worklist by the first call to
1833 // getNextWorklistEntry are already present in it.
1834 if (UseTopologicalSorting) {
1835 for (SDNode &Node : reverse(DAG.allnodes()))
1836 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1837 } else {
1838 for (SDNode &Node : DAG.allnodes())
1839 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1840 }
1841
1842 // Create a dummy node (which is not added to allnodes), that adds a reference
1843 // to the root node, preventing it from being deleted, and tracking any
1844 // changes of the root.
1845 HandleSDNode Dummy(DAG.getRoot());
1846
1847 // While we have a valid worklist entry node, try to combine it.
1848 while (SDNode *N = getNextWorklistEntry()) {
1849 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1850 // N is deleted from the DAG, since they too may now be dead or may have a
1851 // reduced number of uses, allowing other xforms.
1852 if (recursivelyDeleteUnusedNodes(N))
1853 continue;
1854
1855 WorklistRemover DeadNodes(*this);
1856
1857 // If this combine is running after legalizing the DAG, re-legalize any
1858 // nodes pulled off the worklist.
1859 if (LegalDAG) {
1860 SmallSetVector<SDNode *, 16> UpdatedNodes;
1861 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1862
1863 for (SDNode *LN : UpdatedNodes)
1864 AddToWorklistWithUsers(LN);
1865
1866 if (!NIsValid)
1867 continue;
1868 }
1869
1870 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1871
1872 // Add any operands of the new node which have not yet been combined to the
1873 // worklist as well. getNextWorklistEntry flags nodes that have been
1874 // combined before. Because the worklist uniques things already, this won't
1875 // repeatedly process the same operand.
1876 for (const SDValue &ChildN : N->op_values())
1877 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1878 /*SkipIfCombinedBefore=*/true);
1879
1880 SDValue RV = combine(N);
1881
1882 if (!RV.getNode())
1883 continue;
1884
1885 ++NodesCombined;
1886
1887 // Invalidate cached info.
1888 ChainsWithoutMergeableStores.clear();
1889
1890 // If we get back the same node we passed in, rather than a new node or
1891 // zero, we know that the node must have defined multiple values and
1892 // CombineTo was used. Since CombineTo takes care of the worklist
1893 // mechanics for us, we have no work to do in this case.
1894 if (RV.getNode() == N)
1895 continue;
1896
1897 assert(N->getOpcode() != ISD::DELETED_NODE &&
1898 RV.getOpcode() != ISD::DELETED_NODE &&
1899 "Node was deleted but visit returned new node!");
1900
1901 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1902
1903 if (N->getNumValues() == RV->getNumValues())
1904 DAG.ReplaceAllUsesWith(N, RV.getNode());
1905 else {
1906 assert(N->getValueType(0) == RV.getValueType() &&
1907 N->getNumValues() == 1 && "Type mismatch");
1908 DAG.ReplaceAllUsesWith(N, &RV);
1909 }
1910
1911 // Push the new node and any users onto the worklist. Omit this if the
1912 // new node is the EntryToken (e.g. if a store managed to get optimized
1913 // out), because re-visiting the EntryToken and its users will not uncover
1914 // any additional opportunities, but there may be a large number of such
1915 // users, potentially causing compile time explosion.
1916 if (RV.getOpcode() != ISD::EntryToken)
1917 AddToWorklistWithUsers(RV.getNode());
1918
1919 // Finally, if the node is now dead, remove it from the graph. The node
1920 // may not be dead if the replacement process recursively simplified to
1921 // something else needing this node. This will also take care of adding any
1922 // operands which have lost a user to the worklist.
1923 recursivelyDeleteUnusedNodes(N);
1924 }
1925
1926 // If the root changed (e.g. it was a dead load, update the root).
1927 DAG.setRoot(Dummy.getValue());
1928 DAG.RemoveDeadNodes();
1929}
1930
1931SDValue DAGCombiner::visit(SDNode *N) {
1932 // clang-format off
1933 switch (N->getOpcode()) {
1934 default: break;
1935 case ISD::TokenFactor: return visitTokenFactor(N);
1936 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1937 case ISD::ADD: return visitADD(N);
1938 case ISD::PTRADD: return visitPTRADD(N);
1939 case ISD::SUB: return visitSUB(N);
1940 case ISD::SADDSAT:
1941 case ISD::UADDSAT: return visitADDSAT(N);
1942 case ISD::SSUBSAT:
1943 case ISD::USUBSAT: return visitSUBSAT(N);
1944 case ISD::ADDC: return visitADDC(N);
1945 case ISD::SADDO:
1946 case ISD::UADDO: return visitADDO(N);
1947 case ISD::SUBC: return visitSUBC(N);
1948 case ISD::SSUBO:
1949 case ISD::USUBO: return visitSUBO(N);
1950 case ISD::ADDE: return visitADDE(N);
1951 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1952 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1953 case ISD::SUBE: return visitSUBE(N);
1954 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1955 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1956 case ISD::SMULFIX:
1957 case ISD::SMULFIXSAT:
1958 case ISD::UMULFIX:
1959 case ISD::UMULFIXSAT: return visitMULFIX(N);
1960 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1961 case ISD::SDIV: return visitSDIV(N);
1962 case ISD::UDIV: return visitUDIV(N);
1963 case ISD::SREM:
1964 case ISD::UREM: return visitREM(N);
1965 case ISD::MULHU: return visitMULHU(N);
1966 case ISD::MULHS: return visitMULHS(N);
1967 case ISD::AVGFLOORS:
1968 case ISD::AVGFLOORU:
1969 case ISD::AVGCEILS:
1970 case ISD::AVGCEILU: return visitAVG(N);
1971 case ISD::ABDS:
1972 case ISD::ABDU: return visitABD(N);
1973 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1974 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1975 case ISD::SMULO:
1976 case ISD::UMULO: return visitMULO(N);
1977 case ISD::SMIN:
1978 case ISD::SMAX:
1979 case ISD::UMIN:
1980 case ISD::UMAX: return visitIMINMAX(N);
1981 case ISD::AND: return visitAND(N);
1982 case ISD::OR: return visitOR(N);
1983 case ISD::XOR: return visitXOR(N);
1984 case ISD::SHL: return visitSHL(N);
1985 case ISD::SRA: return visitSRA(N);
1986 case ISD::SRL: return visitSRL(N);
1987 case ISD::ROTR:
1988 case ISD::ROTL: return visitRotate(N);
1989 case ISD::FSHL:
1990 case ISD::FSHR: return visitFunnelShift(N);
1991 case ISD::SSHLSAT:
1992 case ISD::USHLSAT: return visitSHLSAT(N);
1993 case ISD::ABS: return visitABS(N);
1994 case ISD::CLMUL:
1995 case ISD::CLMULR:
1996 case ISD::CLMULH: return visitCLMUL(N);
1997 case ISD::BSWAP: return visitBSWAP(N);
1998 case ISD::BITREVERSE: return visitBITREVERSE(N);
1999 case ISD::CTLZ: return visitCTLZ(N);
2000 case ISD::CTLZ_ZERO_POISON: return visitCTLZ_ZERO_POISON(N);
2001 case ISD::CTTZ: return visitCTTZ(N);
2002 case ISD::CTTZ_ZERO_POISON: return visitCTTZ_ZERO_POISON(N);
2003 case ISD::CTPOP: return visitCTPOP(N);
2004 case ISD::SELECT: return visitSELECT(N);
2005 case ISD::VSELECT: return visitVSELECT(N);
2006 case ISD::SELECT_CC: return visitSELECT_CC(N);
2007 case ISD::SETCC: return visitSETCC(N);
2008 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
2009 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
2010 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
2011 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
2012 case ISD::AssertSext:
2013 case ISD::AssertZext: return visitAssertExt(N);
2014 case ISD::AssertAlign: return visitAssertAlign(N);
2015 case ISD::IS_FPCLASS: return visitIS_FPCLASS(N);
2016 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
2019 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
2020 case ISD::TRUNCATE: return visitTRUNCATE(N);
2021 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
2022 case ISD::BITCAST: return visitBITCAST(N);
2023 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
2024 case ISD::FADD: return visitFADD(N);
2025 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
2026 case ISD::FSUB: return visitFSUB(N);
2027 case ISD::FMUL: return visitFMUL(N);
2028 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
2029 case ISD::FMAD: return visitFMAD(N);
2030 case ISD::FMULADD: return visitFMULADD(N);
2031 case ISD::FDIV: return visitFDIV(N);
2032 case ISD::FREM: return visitFREM(N);
2033 case ISD::FSQRT: return visitFSQRT(N);
2034 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
2035 case ISD::FPOW: return visitFPOW(N);
2036 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2037 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2038 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2039 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2040 case ISD::LROUND:
2041 case ISD::LLROUND:
2042 case ISD::LRINT:
2043 case ISD::LLRINT: return visitXROUND(N);
2044 case ISD::FP_ROUND: return visitFP_ROUND(N);
2045 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2046 case ISD::FNEG: return visitFNEG(N);
2047 case ISD::FABS: return visitFABS(N);
2048 case ISD::FFLOOR: return visitFFLOOR(N);
2049 case ISD::FMINNUM:
2050 case ISD::FMAXNUM:
2051 case ISD::FMINIMUM:
2052 case ISD::FMAXIMUM:
2053 case ISD::FMINIMUMNUM:
2054 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2055 case ISD::FCEIL: return visitFCEIL(N);
2056 case ISD::FTRUNC: return visitFTRUNC(N);
2057 case ISD::FFREXP: return visitFFREXP(N);
2058 case ISD::BRCOND: return visitBRCOND(N);
2059 case ISD::BR_CC: return visitBR_CC(N);
2060 case ISD::LOAD: return visitLOAD(N);
2061 case ISD::STORE: return visitSTORE(N);
2062 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2063 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2064 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2065 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2066 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2067 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2068 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2069 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2070 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2071 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2072 case ISD::MGATHER: return visitMGATHER(N);
2073 case ISD::MLOAD: return visitMLOAD(N);
2074 case ISD::MSCATTER: return visitMSCATTER(N);
2075 case ISD::MSTORE: return visitMSTORE(N);
2076 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2081 return visitPARTIAL_REDUCE_MLA(N);
2082 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2083 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2084 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2085 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2086 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2087 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2088 case ISD::FREEZE: return visitFREEZE(N);
2089 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2090 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2091 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2094 case ISD::VECREDUCE_ADD:
2095 case ISD::VECREDUCE_MUL:
2096 case ISD::VECREDUCE_AND:
2097 case ISD::VECREDUCE_OR:
2098 case ISD::VECREDUCE_XOR:
2106 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2107#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2108#include "llvm/IR/VPIntrinsics.def"
2109 return visitVPOp(N);
2110 }
2111 // clang-format on
2112 return SDValue();
2113}
2114
2115SDValue DAGCombiner::combine(SDNode *N) {
2116 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2117 return SDValue();
2118
2119 SDValue RV;
2120 if (!DisableGenericCombines)
2121 RV = visit(N);
2122
2123 // If nothing happened, try a target-specific DAG combine.
2124 if (!RV.getNode()) {
2125 assert(N->getOpcode() != ISD::DELETED_NODE &&
2126 "Node was deleted but visit returned NULL!");
2127
2128 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2129 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2130
2131 // Expose the DAG combiner to the target combiner impls.
2132 TargetLowering::DAGCombinerInfo
2133 DagCombineInfo(DAG, Level, false, this);
2134
2135 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2136 }
2137 }
2138
2139 // If nothing happened still, try promoting the operation.
2140 if (!RV.getNode()) {
2141 switch (N->getOpcode()) {
2142 default: break;
2143 case ISD::ADD:
2144 case ISD::SUB:
2145 case ISD::MUL:
2146 case ISD::AND:
2147 case ISD::OR:
2148 case ISD::XOR:
2149 RV = PromoteIntBinOp(SDValue(N, 0));
2150 break;
2151 case ISD::SHL:
2152 case ISD::SRA:
2153 case ISD::SRL:
2154 RV = PromoteIntShiftOp(SDValue(N, 0));
2155 break;
2156 case ISD::SIGN_EXTEND:
2157 case ISD::ZERO_EXTEND:
2158 case ISD::ANY_EXTEND:
2159 RV = PromoteExtend(SDValue(N, 0));
2160 break;
2161 case ISD::LOAD:
2162 if (PromoteLoad(SDValue(N, 0)))
2163 RV = SDValue(N, 0);
2164 break;
2165 }
2166 }
2167
2168 // If N is a commutative binary node, try to eliminate it if the commuted
2169 // version is already present in the DAG.
2170 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2171 SDValue N0 = N->getOperand(0);
2172 SDValue N1 = N->getOperand(1);
2173
2174 // Constant operands are canonicalized to RHS.
2175 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2176 SDValue Ops[] = {N1, N0};
2177 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2178 N->getFlags());
2179 if (CSENode)
2180 return SDValue(CSENode, 0);
2181 }
2182 }
2183
2184 return RV;
2185}
2186
2187/// Given a node, return its input chain if it has one, otherwise return a null
2188/// sd operand.
2190 if (unsigned NumOps = N->getNumOperands()) {
2191 if (N->getOperand(0).getValueType() == MVT::Other)
2192 return N->getOperand(0);
2193 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2194 return N->getOperand(NumOps-1);
2195 for (unsigned i = 1; i < NumOps-1; ++i)
2196 if (N->getOperand(i).getValueType() == MVT::Other)
2197 return N->getOperand(i);
2198 }
2199 return SDValue();
2200}
2201
2202SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2203 SDValue Operand = N->getOperand(0);
2204 EVT VT = Operand.getValueType();
2205 SDLoc dl(N);
2206
2207 // Canonicalize undef to quiet NaN.
2208 if (Operand.isUndef()) {
2209 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2210 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2211 }
2212 return SDValue();
2213}
2214
2215SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2216 // If N has two operands, where one has an input chain equal to the other,
2217 // the 'other' chain is redundant.
2218 if (N->getNumOperands() == 2) {
2219 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2220 return N->getOperand(0);
2221 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2222 return N->getOperand(1);
2223 }
2224
2225 // Don't simplify token factors if optnone.
2226 if (OptLevel == CodeGenOptLevel::None)
2227 return SDValue();
2228
2229 // Don't simplify the token factor if the node itself has too many operands.
2230 if (N->getNumOperands() > TokenFactorInlineLimit)
2231 return SDValue();
2232
2233 // If the sole user is a token factor, we should make sure we have a
2234 // chance to merge them together. This prevents TF chains from inhibiting
2235 // optimizations.
2236 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2237 AddToWorklist(*(N->user_begin()));
2238
2239 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2240 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2241 SmallPtrSet<SDNode*, 16> SeenOps;
2242 bool Changed = false; // If we should replace this token factor.
2243
2244 // Start out with this token factor.
2245 TFs.push_back(N);
2246
2247 // Iterate through token factors. The TFs grows when new token factors are
2248 // encountered.
2249 for (unsigned i = 0; i < TFs.size(); ++i) {
2250 // Limit number of nodes to inline, to avoid quadratic compile times.
2251 // We have to add the outstanding Token Factors to Ops, otherwise we might
2252 // drop Ops from the resulting Token Factors.
2253 if (Ops.size() > TokenFactorInlineLimit) {
2254 for (unsigned j = i; j < TFs.size(); j++)
2255 Ops.emplace_back(TFs[j], 0);
2256 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2257 // combiner worklist later.
2258 TFs.resize(i);
2259 break;
2260 }
2261
2262 SDNode *TF = TFs[i];
2263 // Check each of the operands.
2264 for (const SDValue &Op : TF->op_values()) {
2265 switch (Op.getOpcode()) {
2266 case ISD::EntryToken:
2267 // Entry tokens don't need to be added to the list. They are
2268 // redundant.
2269 Changed = true;
2270 break;
2271
2272 case ISD::TokenFactor:
2273 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2274 // Queue up for processing.
2275 TFs.push_back(Op.getNode());
2276 Changed = true;
2277 break;
2278 }
2279 [[fallthrough]];
2280
2281 default:
2282 // Only add if it isn't already in the list.
2283 if (SeenOps.insert(Op.getNode()).second)
2284 Ops.push_back(Op);
2285 else
2286 Changed = true;
2287 break;
2288 }
2289 }
2290 }
2291
2292 // Re-visit inlined Token Factors, to clean them up in case they have been
2293 // removed. Skip the first Token Factor, as this is the current node.
2294 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2295 AddToWorklist(TFs[i]);
2296
2297 // Remove Nodes that are chained to another node in the list. Do so
2298 // by walking up chains breath-first stopping when we've seen
2299 // another operand. In general we must climb to the EntryNode, but we can exit
2300 // early if we find all remaining work is associated with just one operand as
2301 // no further pruning is possible.
2302
2303 // List of nodes to search through and original Ops from which they originate.
2305 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2306 SmallPtrSet<SDNode *, 16> SeenChains;
2307 bool DidPruneOps = false;
2308
2309 unsigned NumLeftToConsider = 0;
2310 for (const SDValue &Op : Ops) {
2311 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2312 OpWorkCount.push_back(1);
2313 }
2314
2315 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2316 // If this is an Op, we can remove the op from the list. Remark any
2317 // search associated with it as from the current OpNumber.
2318 if (SeenOps.contains(Op)) {
2319 Changed = true;
2320 DidPruneOps = true;
2321 unsigned OrigOpNumber = 0;
2322 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2323 OrigOpNumber++;
2324 assert((OrigOpNumber != Ops.size()) &&
2325 "expected to find TokenFactor Operand");
2326 // Re-mark worklist from OrigOpNumber to OpNumber
2327 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2328 if (Worklist[i].second == OrigOpNumber) {
2329 Worklist[i].second = OpNumber;
2330 }
2331 }
2332 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2333 OpWorkCount[OrigOpNumber] = 0;
2334 NumLeftToConsider--;
2335 }
2336 // Add if it's a new chain
2337 if (SeenChains.insert(Op).second) {
2338 OpWorkCount[OpNumber]++;
2339 Worklist.push_back(std::make_pair(Op, OpNumber));
2340 }
2341 };
2342
2343 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2344 // We need at least be consider at least 2 Ops to prune.
2345 if (NumLeftToConsider <= 1)
2346 break;
2347 auto CurNode = Worklist[i].first;
2348 auto CurOpNumber = Worklist[i].second;
2349 assert((OpWorkCount[CurOpNumber] > 0) &&
2350 "Node should not appear in worklist");
2351 switch (CurNode->getOpcode()) {
2352 case ISD::EntryToken:
2353 // Hitting EntryToken is the only way for the search to terminate without
2354 // hitting
2355 // another operand's search. Prevent us from marking this operand
2356 // considered.
2357 NumLeftToConsider++;
2358 break;
2359 case ISD::TokenFactor:
2360 for (const SDValue &Op : CurNode->op_values())
2361 AddToWorklist(i, Op.getNode(), CurOpNumber);
2362 break;
2364 case ISD::LIFETIME_END:
2365 case ISD::CopyFromReg:
2366 case ISD::CopyToReg:
2367 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2368 break;
2369 default:
2370 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2371 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2372 break;
2373 }
2374 OpWorkCount[CurOpNumber]--;
2375 if (OpWorkCount[CurOpNumber] == 0)
2376 NumLeftToConsider--;
2377 }
2378
2379 // If we've changed things around then replace token factor.
2380 if (Changed) {
2382 if (Ops.empty()) {
2383 // The entry token is the only possible outcome.
2384 Result = DAG.getEntryNode();
2385 } else {
2386 if (DidPruneOps) {
2387 SmallVector<SDValue, 8> PrunedOps;
2388 //
2389 for (const SDValue &Op : Ops) {
2390 if (SeenChains.count(Op.getNode()) == 0)
2391 PrunedOps.push_back(Op);
2392 }
2393 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2394 } else {
2395 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2396 }
2397 }
2398 return Result;
2399 }
2400 return SDValue();
2401}
2402
2403/// MERGE_VALUES can always be eliminated.
2404SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2405 WorklistRemover DeadNodes(*this);
2406 // Replacing results may cause a different MERGE_VALUES to suddenly
2407 // be CSE'd with N, and carry its uses with it. Iterate until no
2408 // uses remain, to ensure that the node can be safely deleted.
2409 // First add the users of this node to the work list so that they
2410 // can be tried again once they have new operands.
2411 AddUsersToWorklist(N);
2412 do {
2413 // Do as a single replacement to avoid rewalking use lists.
2415 DAG.ReplaceAllUsesWith(N, Ops.data());
2416 } while (!N->use_empty());
2417 deleteAndRecombine(N);
2418 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2419}
2420
2421/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2422/// ConstantSDNode pointer else nullptr.
2425 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2426}
2427
2428// isTruncateOf - If N is a truncate of some other value, return true, record
2429// the value being truncated in Op and which of Op's bits are zero/one in Known.
2430// This function computes KnownBits to avoid a duplicated call to
2431// computeKnownBits in the caller.
2433 KnownBits &Known) {
2434 if (N->getOpcode() == ISD::TRUNCATE) {
2435 Op = N->getOperand(0);
2436 Known = DAG.computeKnownBits(Op);
2437 if (N->getFlags().hasNoUnsignedWrap())
2438 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2439 return true;
2440 }
2441
2442 if (N.getValueType().getScalarType() != MVT::i1 ||
2443 !sd_match(
2445 return false;
2446
2447 Known = DAG.computeKnownBits(Op);
2448 return (Known.Zero | 1).isAllOnes();
2449}
2450
2451/// Return true if 'Use' is a load or a store that uses N as its base pointer
2452/// and that N may be folded in the load / store addressing mode.
2454 const TargetLowering &TLI) {
2455 EVT VT;
2456 unsigned AS;
2457
2458 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2459 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2460 return false;
2461 VT = LD->getMemoryVT();
2462 AS = LD->getAddressSpace();
2463 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2464 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2465 return false;
2466 VT = ST->getMemoryVT();
2467 AS = ST->getAddressSpace();
2469 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2470 return false;
2471 VT = LD->getMemoryVT();
2472 AS = LD->getAddressSpace();
2474 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2475 return false;
2476 VT = ST->getMemoryVT();
2477 AS = ST->getAddressSpace();
2478 } else {
2479 return false;
2480 }
2481
2483 if (N->isAnyAdd()) {
2484 AM.HasBaseReg = true;
2486 if (Offset)
2487 // [reg +/- imm]
2488 AM.BaseOffs = Offset->getSExtValue();
2489 else
2490 // [reg +/- reg]
2491 AM.Scale = 1;
2492 } else if (N->getOpcode() == ISD::SUB) {
2493 AM.HasBaseReg = true;
2495 if (Offset)
2496 // [reg +/- imm]
2497 AM.BaseOffs = -Offset->getSExtValue();
2498 else
2499 // [reg +/- reg]
2500 AM.Scale = 1;
2501 } else {
2502 return false;
2503 }
2504
2505 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2506 VT.getTypeForEVT(*DAG.getContext()), AS);
2507}
2508
2509/// This inverts a canonicalization in IR that replaces a variable select arm
2510/// with an identity constant. Codegen improves if we re-use the variable
2511/// operand rather than load a constant. This can also be converted into a
2512/// masked vector operation if the target supports it.
2514 bool ShouldCommuteOperands) {
2515 SDValue N0 = N->getOperand(0);
2516 SDValue N1 = N->getOperand(1);
2517
2518 // Match a select as operand 1. The identity constant that we are looking for
2519 // is only valid as operand 1 of a non-commutative binop.
2520 if (ShouldCommuteOperands)
2521 std::swap(N0, N1);
2522
2523 SDValue Cond, TVal, FVal;
2525 m_Value(FVal)))))
2526 return SDValue();
2527
2528 // We can't hoist all instructions because of immediate UB (not speculatable).
2529 // For example div/rem by zero.
2531 return SDValue();
2532
2533 unsigned SelOpcode = N1.getOpcode();
2534 unsigned Opcode = N->getOpcode();
2535 EVT VT = N->getValueType(0);
2536 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2537
2538 // This transform increases uses of N0, so freeze it to be safe.
2539 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2540 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2541 if (DAG.isIdentityElement(Opcode, N->getFlags(), TVal, OpNo) &&
2542 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2543 FVal)) {
2544 SDValue F0 = DAG.getFreeze(N0);
2545 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2546 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2547 }
2548 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2549 if (DAG.isIdentityElement(Opcode, N->getFlags(), FVal, OpNo) &&
2550 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2551 TVal)) {
2552 SDValue F0 = DAG.getFreeze(N0);
2553 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2554 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2555 }
2556
2557 return SDValue();
2558}
2559
2560SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2561 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2562 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2563 "Unexpected binary operator");
2564
2565 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2566 return Sel;
2567
2568 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2569 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2570 return Sel;
2571
2572 // Don't do this unless the old select is going away. We want to eliminate the
2573 // binary operator, not replace a binop with a select.
2574 // TODO: Handle ISD::SELECT_CC.
2575 unsigned SelOpNo = 0;
2576 SDValue Sel = BO->getOperand(0);
2577 auto BinOpcode = BO->getOpcode();
2578 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2579 SelOpNo = 1;
2580 Sel = BO->getOperand(1);
2581
2582 // Peek through trunc to shift amount type.
2583 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2584 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2585 // This is valid when the truncated bits of x are already zero.
2586 SDValue Op;
2587 KnownBits Known;
2588 if (isTruncateOf(DAG, Sel, Op, Known) &&
2590 Sel = Op;
2591 }
2592 }
2593
2594 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2595 return SDValue();
2596
2597 SDValue CT = Sel.getOperand(1);
2598 if (!isConstantOrConstantVector(CT, true) &&
2600 return SDValue();
2601
2602 SDValue CF = Sel.getOperand(2);
2603 if (!isConstantOrConstantVector(CF, true) &&
2605 return SDValue();
2606
2607 // Bail out if any constants are opaque because we can't constant fold those.
2608 // The exception is "and" and "or" with either 0 or -1 in which case we can
2609 // propagate non constant operands into select. I.e.:
2610 // and (select Cond, 0, -1), X --> select Cond, 0, X
2611 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2612 bool CanFoldNonConst =
2613 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2616
2617 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2618 if (!CanFoldNonConst &&
2619 !isConstantOrConstantVector(CBO, true) &&
2621 return SDValue();
2622
2623 SDLoc DL(Sel);
2624 SDValue NewCT, NewCF;
2625 EVT VT = BO->getValueType(0);
2626
2627 if (CanFoldNonConst) {
2628 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2629 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2630 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2631 NewCT = CT;
2632 else
2633 NewCT = CBO;
2634
2635 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2636 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2637 NewCF = CF;
2638 else
2639 NewCF = CBO;
2640 } else {
2641 // We have a select-of-constants followed by a binary operator with a
2642 // constant. Eliminate the binop by pulling the constant math into the
2643 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2644 // CBO, CF + CBO
2645 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2646 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2647 if (!NewCT)
2648 return SDValue();
2649
2650 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2651 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2652 if (!NewCF)
2653 return SDValue();
2654 }
2655
2656 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2657}
2658
2660 SelectionDAG &DAG) {
2661 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2662 "Expecting add or sub");
2663
2664 // Match a constant operand and a zext operand for the math instruction:
2665 // add Z, C
2666 // sub C, Z
2667 bool IsAdd = N->getOpcode() == ISD::ADD;
2668 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2669 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2670 auto *CN = dyn_cast<ConstantSDNode>(C);
2671 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2672 return SDValue();
2673
2674 // Match the zext operand as a setcc of a boolean.
2675 if (Z.getOperand(0).getValueType() != MVT::i1)
2676 return SDValue();
2677
2678 // Match the compare as: setcc (X & 1), 0, eq.
2679 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2681 return SDValue();
2682
2683 // We are adding/subtracting a constant and an inverted low bit. Turn that
2684 // into a subtract/add of the low bit with incremented/decremented constant:
2685 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2686 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2687 EVT VT = C.getValueType();
2688 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2689 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2690 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2691 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2692}
2693
2694// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2695SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2696 SDValue N0 = N->getOperand(0);
2697 EVT VT = N0.getValueType();
2698 SDValue A, B;
2699
2700 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2702 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2703 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2704 }
2705 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2707 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2708 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2709 }
2710 return SDValue();
2711}
2712
2713/// Try to fold a pointer arithmetic node.
2714/// This needs to be done separately from normal addition, because pointer
2715/// addition is not commutative.
2716SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2717 SDValue N0 = N->getOperand(0);
2718 SDValue N1 = N->getOperand(1);
2719 EVT PtrVT = N0.getValueType();
2720 EVT IntVT = N1.getValueType();
2721 SDLoc DL(N);
2722
2723 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2724 // combines here depend on this assumption.
2725 assert(PtrVT == IntVT &&
2726 "PTRADD with different operand types is not supported");
2727
2728 // fold (ptradd x, 0) -> x
2729 if (isNullConstant(N1))
2730 return N0;
2731
2732 // fold (ptradd 0, x) -> x
2733 if (PtrVT == IntVT && isNullConstant(N0))
2734 return N1;
2735
2736 if (N0.getOpcode() == ISD::PTRADD &&
2737 !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2738 SDValue X = N0.getOperand(0);
2739 SDValue Y = N0.getOperand(1);
2740 SDValue Z = N1;
2741 bool N0OneUse = N0.hasOneUse();
2742 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2743 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2744
2745 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2746 // * y is a constant and (ptradd x, y) has one use; or
2747 // * y and z are both constants.
2748 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2749 // If both additions in the original were NUW, the new ones are as well.
2750 SDNodeFlags Flags =
2751 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2752 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2753 AddToWorklist(Add.getNode());
2754 // We can't set InBounds even if both original ptradds were InBounds and
2755 // NUW: SDAG usually represents pointers as integers, therefore, the
2756 // matched pattern behaves as if it had implicit casts:
2757 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
2758 // The outer inbounds ptradd might therefore rely on a provenance that x
2759 // does not have.
2760 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2761 }
2762 }
2763
2764 // The following combines can turn in-bounds pointer arithmetic out of bounds.
2765 // That is problematic for settings like AArch64's CPA, which checks that
2766 // intermediate results of pointer arithmetic remain in bounds. The target
2767 // therefore needs to opt-in to enable them.
2769 DAG.getMachineFunction().getFunction(), PtrVT))
2770 return SDValue();
2771
2772 if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2773 // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2774 // global address GA and constant c, such that c can be folded into GA.
2775 // TODO: Support constant vector splats.
2776 SDValue GAValue = N0.getOperand(0);
2777 if (const GlobalAddressSDNode *GA =
2779 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2780 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2781 // If both additions in the original were NUW, reassociation preserves
2782 // that.
2783 SDNodeFlags Flags =
2784 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2785 // We can't set InBounds even if both original ptradds were InBounds and
2786 // NUW: SDAG usually represents pointers as integers, therefore, the
2787 // matched pattern behaves as if it had implicit casts:
2788 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
2789 // The outer inbounds ptradd might therefore rely on a provenance that
2790 // GA does not have.
2791 SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2792 AddToWorklist(Inner.getNode());
2793 return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2794 }
2795 }
2796 }
2797
2798 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2799 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2800 // y is not, and (add y, z) is used only once.
2801 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2802 // z is not, and (add y, z) is used only once.
2803 // The goal is to move constant offsets to the outermost ptradd, to create
2804 // more opportunities to fold offsets into memory instructions.
2805 // Together with the another combine above, this also implements
2806 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2807 SDValue X = N0;
2808 SDValue Y = N1.getOperand(0);
2809 SDValue Z = N1.getOperand(1);
2810 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2811 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2812
2813 // If both additions in the original were NUW, reassociation preserves that.
2814 SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags();
2815 SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
2816 if (CommonFlags.hasNoUnsignedWrap()) {
2817 // If both operations are NUW and the PTRADD is inbounds, the offests are
2818 // both non-negative, so the reassociated PTRADDs are also inbounds.
2819 ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds;
2820 }
2821
2822 if (ZIsConstant != YIsConstant) {
2823 if (YIsConstant)
2824 std::swap(Y, Z);
2825 SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2826 AddToWorklist(Inner.getNode());
2827 return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2828 }
2829 }
2830
2831 // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2832 // that transformation can't block an offset folding at any use of the ptradd.
2833 // This should be done late, after legalization, so that it doesn't block
2834 // other ptradd combines that could enable more offset folding.
2835 if (LegalOperations && DAG.haveNoCommonBitsSet(N0, N1)) {
2836 bool TransformCannotBreakAddrMode = none_of(N->users(), [&](SDNode *User) {
2837 return canFoldInAddressingMode(N, User, DAG, TLI);
2838 });
2839
2840 if (TransformCannotBreakAddrMode)
2841 return DAG.getNode(ISD::OR, DL, PtrVT, N0, N1, SDNodeFlags::Disjoint);
2842 }
2843
2844 return SDValue();
2845}
2846
2847/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2848/// a shift and add with a different constant.
2850 SelectionDAG &DAG) {
2851 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2852 "Expecting add or sub");
2853
2854 // We need a constant operand for the add/sub, and the other operand is a
2855 // logical shift right: add (srl), C or sub C, (srl).
2856 bool IsAdd = N->getOpcode() == ISD::ADD;
2857 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2858 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2859 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2860 ShiftOp.getOpcode() != ISD::SRL)
2861 return SDValue();
2862
2863 // The shift must be of a 'not' value.
2864 SDValue Not = ShiftOp.getOperand(0);
2865 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2866 return SDValue();
2867
2868 // The shift must be moving the sign bit to the least-significant-bit.
2869 EVT VT = ShiftOp.getValueType();
2870 SDValue ShAmt = ShiftOp.getOperand(1);
2871 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2872 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2873 return SDValue();
2874
2875 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2876 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2877 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2878 if (SDValue NewC = DAG.FoldConstantArithmetic(
2879 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2880 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2881 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2882 Not.getOperand(0), ShAmt);
2883 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2884 }
2885
2886 return SDValue();
2887}
2888
2889static bool
2891 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2892 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2893}
2894
2895/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2896/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2897/// are no common bits set in the operands).
2898SDValue DAGCombiner::visitADDLike(SDNode *N) {
2899 SDValue N0 = N->getOperand(0);
2900 SDValue N1 = N->getOperand(1);
2901 EVT VT = N0.getValueType();
2902 SDLoc DL(N);
2903
2904 // fold (add x, undef) -> undef
2905 if (N0.isUndef())
2906 return N0;
2907 if (N1.isUndef())
2908 return N1;
2909
2910 // fold (add c1, c2) -> c1+c2
2911 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2912 return C;
2913
2914 // canonicalize constant to RHS
2917 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2918
2919 if (areBitwiseNotOfEachother(N0, N1))
2920 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2921
2922 // fold vector ops
2923 if (VT.isVector()) {
2924 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2925 return FoldedVOp;
2926
2927 // fold (add x, 0) -> x, vector edition
2929 return N0;
2930 }
2931
2932 // fold (add x, 0) -> x
2933 if (isNullConstant(N1))
2934 return N0;
2935
2936 if (N0.getOpcode() == ISD::SUB) {
2937 SDValue N00 = N0.getOperand(0);
2938 SDValue N01 = N0.getOperand(1);
2939
2940 // fold ((A-c1)+c2) -> (A+(c2-c1))
2941 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2942 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2943
2944 // fold ((c1-A)+c2) -> (c1+c2)-A
2945 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2946 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2947 }
2948
2949 // add (sext i1 X), 1 -> zext (not i1 X)
2950 // We don't transform this pattern:
2951 // add (zext i1 X), -1 -> sext (not i1 X)
2952 // because most (?) targets generate better code for the zext form.
2953 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2954 isOneOrOneSplat(N1)) {
2955 SDValue X = N0.getOperand(0);
2956 if ((!LegalOperations ||
2957 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2959 X.getScalarValueSizeInBits() == 1) {
2960 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2961 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2962 }
2963 }
2964
2965 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2966 // iff (or x, c0) is equivalent to (add x, c0).
2967 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2968 // iff (xor x, c0) is equivalent to (add x, c0).
2969 if (DAG.isADDLike(N0)) {
2970 SDValue N01 = N0.getOperand(1);
2971 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2972 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2973 }
2974
2975 if (SDValue NewSel = foldBinOpIntoSelect(N))
2976 return NewSel;
2977
2978 // reassociate add
2979 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2980 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2981 return RADD;
2982
2983 // (X + Y) + X --> Y + (X + X)
2984 SDValue X, Y, InnerAdd;
2985 if (sd_match(
2986 N, m_Add(m_OneUse(m_Value(InnerAdd, m_Add(m_Value(X), m_Value(Y)))),
2987 m_Deferred(X)))) {
2988 if (X != Y) {
2989 // Redistribute shared NUW flag.
2990 // TODO: If NSW+NUW occurs on both adds, that can be redistributed too.
2991 SDNodeFlags NewFlags =
2992 N->getFlags() & InnerAdd->getFlags() & SDNodeFlags::NoUnsignedWrap;
2993 SDValue X2 = DAG.getNode(ISD::ADD, DL, VT, X, X, NewFlags);
2994 return DAG.getNode(ISD::ADD, DL, VT, Y, X2, NewFlags);
2995 }
2996 }
2997
2998 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2999 // equivalent to (add x, c).
3000 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
3001 // equivalent to (add x, c).
3002 // Do this optimization only when adding c does not introduce instructions
3003 // for adding carries.
3004 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
3005 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
3006 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
3007 // If N0's type does not split or is a sign mask, it does not introduce
3008 // add carry.
3009 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
3010 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
3013 if (NoAddCarry)
3014 return DAG.getNode(
3015 ISD::ADD, DL, VT,
3016 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
3017 N0.getOperand(1));
3018 }
3019 return SDValue();
3020 };
3021 if (SDValue Add = ReassociateAddOr(N0, N1))
3022 return Add;
3023 if (SDValue Add = ReassociateAddOr(N1, N0))
3024 return Add;
3025
3026 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
3027 if (SDValue SD =
3028 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
3029 return SD;
3030 }
3031
3032 SDValue A, B, C, D;
3033
3034 // fold ((0-A) + B) -> B-A
3035 if (sd_match(N0, m_Neg(m_Value(A))))
3036 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
3037
3038 // fold (A + (0-B)) -> A-B
3039 if (sd_match(N1, m_Neg(m_Value(B))))
3040 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
3041
3042 // fold (A+(B-A)) -> B
3043 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
3044 return B;
3045
3046 // fold ((B-A)+A) -> B
3047 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
3048 return B;
3049
3050 // fold ((A-B)+(C-A)) -> (C-B)
3051 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3053 return DAG.getNode(ISD::SUB, DL, VT, C, B);
3054
3055 // fold ((A-B)+(B-C)) -> (A-C)
3056 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3058 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3059
3060 // fold (A+(B-(A+C))) to (B-C)
3061 // fold (A+(B-(C+A))) to (B-C)
3062 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
3063 return DAG.getNode(ISD::SUB, DL, VT, B, C);
3064
3065 // fold (A+((B-A)+or-C)) to (B+or-C)
3066 if (sd_match(N1,
3068 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
3069 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
3070
3071 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3072 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
3073 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
3075 return DAG.getNode(ISD::SUB, DL, VT,
3076 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
3077 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
3078
3079 // fold (add (umax X, C), -C) --> (usubsat X, C)
3080 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
3081 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
3082 return (!Max && !Op) ||
3083 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3084 };
3085 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
3086 /*AllowUndefs*/ true))
3087 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
3088 N0.getOperand(1));
3089 }
3090
3092 return SDValue(N, 0);
3093
3094 if (isOneOrOneSplat(N1)) {
3095 // fold (add (xor a, -1), 1) -> (sub 0, a)
3096 if (isBitwiseNot(N0))
3097 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
3098 N0.getOperand(0));
3099
3100 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3101 if (N0.getOpcode() == ISD::ADD) {
3102 SDValue A, Xor;
3103
3104 if (isBitwiseNot(N0.getOperand(0))) {
3105 A = N0.getOperand(1);
3106 Xor = N0.getOperand(0);
3107 } else if (isBitwiseNot(N0.getOperand(1))) {
3108 A = N0.getOperand(0);
3109 Xor = N0.getOperand(1);
3110 }
3111
3112 if (Xor)
3113 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3114 }
3115
3116 // Look for:
3117 // add (add x, y), 1
3118 // And if the target does not like this form then turn into:
3119 // sub y, (xor x, -1)
3120 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3121 N0.hasOneUse() &&
3122 // Limit this to after legalization if the add has wrap flags
3123 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3124 !N->getFlags().hasNoSignedWrap()))) {
3125 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3126 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3127 }
3128 }
3129
3130 // (x - y) + -1 -> add (xor y, -1), x
3131 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3132 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3133 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3134 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3135 }
3136
3137 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3138 // This can help if the inner add has multiple uses.
3139 APInt CM, CA;
3140 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3141 if (VT.getScalarSizeInBits() <= 64) {
3143 m_ConstInt(CM)))) &&
3145 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3146 SDNodeFlags Flags;
3147 // If all the inputs are nuw, the outputs can be nuw. If all the input
3148 // are _also_ nsw the outputs can be too.
3149 if (N->getFlags().hasNoUnsignedWrap() &&
3150 N0->getFlags().hasNoUnsignedWrap() &&
3153 if (N->getFlags().hasNoSignedWrap() &&
3154 N0->getFlags().hasNoSignedWrap() &&
3157 }
3158 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3159 DAG.getConstant(CM, DL, VT), Flags);
3160 return DAG.getNode(
3161 ISD::ADD, DL, VT, Mul,
3162 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3163 }
3164 // Also look in case there is an intermediate add.
3165 if (sd_match(N0, m_OneUse(m_Add(
3167 m_ConstInt(CM))),
3168 m_Value(B)))) &&
3170 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3171 SDNodeFlags Flags;
3172 // If all the inputs are nuw, the outputs can be nuw. If all the input
3173 // are _also_ nsw the outputs can be too.
3174 SDValue OMul =
3175 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3176 if (N->getFlags().hasNoUnsignedWrap() &&
3177 N0->getFlags().hasNoUnsignedWrap() &&
3178 OMul->getFlags().hasNoUnsignedWrap() &&
3179 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3181 if (N->getFlags().hasNoSignedWrap() &&
3182 N0->getFlags().hasNoSignedWrap() &&
3183 OMul->getFlags().hasNoSignedWrap() &&
3184 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3186 }
3187 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3188 DAG.getConstant(CM, DL, VT), Flags);
3189 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3190 return DAG.getNode(
3191 ISD::ADD, DL, VT, Add,
3192 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3193 }
3194 }
3195 }
3196
3197 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3198 return Combined;
3199
3200 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3201 return Combined;
3202
3203 return SDValue();
3204}
3205
3206// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3207// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
3208// Attempt to form avgceil(A, B) from ((A >> 1) + (B >> 1)) + ((A | B) & 1)
3209SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3210 SDValue N0 = N->getOperand(0);
3211 EVT VT = N0.getValueType();
3212 SDValue A, B;
3213
3214 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3215 (sd_match(N,
3217 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3220 m_Srl(m_Deferred(A), m_One()),
3221 m_Srl(m_Deferred(B), m_One()))))) {
3222 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3223 }
3224 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3225 (sd_match(N,
3227 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3230 m_Sra(m_Deferred(A), m_One()),
3231 m_Sra(m_Deferred(B), m_One()))))) {
3232 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3233 }
3234
3235 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
3236 sd_match(N,
3238 m_Srl(m_Deferred(A), m_One()),
3239 m_Srl(m_Deferred(B), m_One())))) {
3240 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
3241 }
3242 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
3243 sd_match(N,
3245 m_Sra(m_Deferred(A), m_One()),
3246 m_Sra(m_Deferred(B), m_One())))) {
3247 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
3248 }
3249
3250 return SDValue();
3251}
3252
3253SDValue DAGCombiner::visitADD(SDNode *N) {
3254 SDValue N0 = N->getOperand(0);
3255 SDValue N1 = N->getOperand(1);
3256 EVT VT = N0.getValueType();
3257 SDLoc DL(N);
3258
3259 if (SDValue Combined = visitADDLike(N))
3260 return Combined;
3261
3262 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3263 return V;
3264
3265 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3266 return V;
3267
3268 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3269 return V;
3270
3271 // Try to match AVGFLOOR fixedwidth pattern
3272 if (SDValue V = foldAddToAvg(N, DL))
3273 return V;
3274
3275 // fold (a+b) -> (a|b) iff a and b share no bits.
3276 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3277 DAG.haveNoCommonBitsSet(N0, N1))
3278 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3279
3280 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3281 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3282 const APInt &C0 = N0->getConstantOperandAPInt(0);
3283 const APInt &C1 = N1->getConstantOperandAPInt(0);
3284 return DAG.getVScale(DL, VT, C0 + C1);
3285 }
3286
3287 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3288 if (N0.getOpcode() == ISD::ADD &&
3289 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3290 N1.getOpcode() == ISD::VSCALE) {
3291 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3292 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3293 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3294 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3295 }
3296
3297 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3298 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3299 N1.getOpcode() == ISD::STEP_VECTOR) {
3300 const APInt &C0 = N0->getConstantOperandAPInt(0);
3301 const APInt &C1 = N1->getConstantOperandAPInt(0);
3302 APInt NewStep = C0 + C1;
3303 return DAG.getStepVector(DL, VT, NewStep);
3304 }
3305
3306 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3307 if (N0.getOpcode() == ISD::ADD &&
3309 N1.getOpcode() == ISD::STEP_VECTOR) {
3310 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3311 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3312 APInt NewStep = SV0 + SV1;
3313 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3314 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3315 }
3316
3317 return SDValue();
3318}
3319
3320SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3321 unsigned Opcode = N->getOpcode();
3322 SDValue N0 = N->getOperand(0);
3323 SDValue N1 = N->getOperand(1);
3324 EVT VT = N0.getValueType();
3325 bool IsSigned = Opcode == ISD::SADDSAT;
3326 SDLoc DL(N);
3327
3328 // fold (add_sat x, undef) -> -1
3329 if (N0.isUndef() || N1.isUndef())
3330 return DAG.getAllOnesConstant(DL, VT);
3331
3332 // fold (add_sat c1, c2) -> c3
3333 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3334 return C;
3335
3336 // canonicalize constant to RHS
3339 return DAG.getNode(Opcode, DL, VT, N1, N0);
3340
3341 // fold vector ops
3342 if (VT.isVector()) {
3343 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3344 return FoldedVOp;
3345
3346 // fold (add_sat x, 0) -> x, vector edition
3348 return N0;
3349 }
3350
3351 // fold (add_sat x, 0) -> x
3352 if (isNullConstant(N1))
3353 return N0;
3354
3355 // If it cannot overflow, transform into an add.
3356 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3357 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3358
3359 return SDValue();
3360}
3361
3363 bool ForceCarryReconstruction = false) {
3364 bool Masked = false;
3365
3366 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3367 while (true) {
3368 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3369 return V;
3370
3371 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3372 V = V.getOperand(0);
3373 continue;
3374 }
3375
3376 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3377 if (ForceCarryReconstruction)
3378 return V;
3379
3380 Masked = true;
3381 V = V.getOperand(0);
3382 continue;
3383 }
3384
3385 break;
3386 }
3387
3388 // If this is not a carry, return.
3389 if (V.getResNo() != 1)
3390 return SDValue();
3391
3392 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3393 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3394 return SDValue();
3395
3396 EVT VT = V->getValueType(0);
3397 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3398 return SDValue();
3399
3400 // If the result is masked, then no matter what kind of bool it is we can
3401 // return. If it isn't, then we need to make sure the bool type is either 0 or
3402 // 1 and not other values.
3403 if (Masked ||
3404 TLI.getBooleanContents(V.getValueType()) ==
3406 return V;
3407
3408 return SDValue();
3409}
3410
3411/// Given the operands of an add/sub operation, see if the 2nd operand is a
3412/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3413/// the opcode and bypass the mask operation.
3414static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3415 SelectionDAG &DAG, const SDLoc &DL) {
3416 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3417 N1 = N1.getOperand(0);
3418
3419 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3420 return SDValue();
3421
3422 EVT VT = N0.getValueType();
3423 SDValue N10 = N1.getOperand(0);
3424 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3425 N10 = N10.getOperand(0);
3426
3427 if (N10.getValueType() != VT)
3428 return SDValue();
3429
3430 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3431 return SDValue();
3432
3433 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3434 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3435 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3436}
3437
3438/// Helper for doing combines based on N0 and N1 being added to each other.
3439SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3440 SDNode *LocReference) {
3441 EVT VT = N0.getValueType();
3442 SDLoc DL(LocReference);
3443
3444 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3445 SDValue Y, N;
3446 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3447 return DAG.getNode(ISD::SUB, DL, VT, N0,
3448 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3449
3450 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3451 return V;
3452
3453 // Look for:
3454 // add (add x, 1), y
3455 // And if the target does not like this form then turn into:
3456 // sub y, (xor x, -1)
3457 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3458 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3459 // Limit this to after legalization if the add has wrap flags
3460 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3461 !N0->getFlags().hasNoSignedWrap()))) {
3462 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3463 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3464 }
3465
3466 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3467 // Hoist one-use subtraction by non-opaque constant:
3468 // (x - C) + y -> (x + y) - C
3469 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3470 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3471 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3472 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3473 }
3474 // Hoist one-use subtraction from non-opaque constant:
3475 // (C - x) + y -> (y - x) + C
3476 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3477 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3478 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3479 }
3480 }
3481
3482 // add (mul x, C), x -> mul x, C+1
3483 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3484 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3485 N0.hasOneUse()) {
3486 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3487 DAG.getConstant(1, DL, VT));
3488 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3489 }
3490
3491 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3492 // rather than 'add 0/-1' (the zext should get folded).
3493 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3494 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3495 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3497 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3498 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3499 }
3500
3501 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3502 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3503 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3504 if (TN->getVT() == MVT::i1) {
3505 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3506 DAG.getConstant(1, DL, VT));
3507 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3508 }
3509 }
3510
3511 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3512 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3513 N1.getResNo() == 0)
3514 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3515 N0, N1.getOperand(0), N1.getOperand(2));
3516
3517 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3519 if (SDValue Carry = getAsCarry(TLI, N1))
3520 return DAG.getNode(ISD::UADDO_CARRY, DL,
3521 DAG.getVTList(VT, Carry.getValueType()), N0,
3522 DAG.getConstant(0, DL, VT), Carry);
3523
3524 return SDValue();
3525}
3526
3527SDValue DAGCombiner::visitADDC(SDNode *N) {
3528 SDValue N0 = N->getOperand(0);
3529 SDValue N1 = N->getOperand(1);
3530 EVT VT = N0.getValueType();
3531 SDLoc DL(N);
3532
3533 // If the flag result is dead, turn this into an ADD.
3534 if (!N->hasAnyUseOfValue(1))
3535 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3536 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3537
3538 // canonicalize constant to RHS.
3539 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3540 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3541 if (N0C && !N1C)
3542 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3543
3544 // fold (addc x, 0) -> x + no carry out
3545 if (isNullConstant(N1))
3546 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3547 DL, MVT::Glue));
3548
3549 // If it cannot overflow, transform into an add.
3551 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3552 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3553
3554 return SDValue();
3555}
3556
3557/**
3558 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3559 * then the flip also occurs if computing the inverse is the same cost.
3560 * This function returns an empty SDValue in case it cannot flip the boolean
3561 * without increasing the cost of the computation. If you want to flip a boolean
3562 * no matter what, use DAG.getLogicalNOT.
3563 */
3565 const TargetLowering &TLI,
3566 bool Force) {
3567 if (Force && isa<ConstantSDNode>(V))
3568 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3569
3570 if (V.getOpcode() != ISD::XOR)
3571 return SDValue();
3572
3573 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3574 return V.getOperand(0);
3575 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3576 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3577 return SDValue();
3578}
3579
3580SDValue DAGCombiner::visitADDO(SDNode *N) {
3581 SDValue N0 = N->getOperand(0);
3582 SDValue N1 = N->getOperand(1);
3583 EVT VT = N0.getValueType();
3584 bool IsSigned = (ISD::SADDO == N->getOpcode());
3585
3586 EVT CarryVT = N->getValueType(1);
3587 SDLoc DL(N);
3588
3589 // If the flag result is dead, turn this into an ADD.
3590 if (!N->hasAnyUseOfValue(1))
3591 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3592 DAG.getUNDEF(CarryVT));
3593
3594 // canonicalize constant to RHS.
3597 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3598
3599 // fold (addo x, 0) -> x + no carry out
3600 if (isNullOrNullSplat(N1))
3601 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3602
3603 // If it cannot overflow, transform into an add.
3604 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3605 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3606 DAG.getConstant(0, DL, CarryVT));
3607
3608 if (IsSigned) {
3609 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3610 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3611 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3612 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3613 } else {
3614 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3615 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3616 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3617 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3618 return CombineTo(
3619 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3620 }
3621
3622 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3623 return Combined;
3624
3625 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3626 return Combined;
3627 }
3628
3629 return SDValue();
3630}
3631
3632SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3633 EVT VT = N0.getValueType();
3634 if (VT.isVector())
3635 return SDValue();
3636
3637 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3638 // If Y + 1 cannot overflow.
3639 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3640 SDValue Y = N1.getOperand(0);
3641 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3643 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3644 N1.getOperand(2));
3645 }
3646
3647 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3649 if (SDValue Carry = getAsCarry(TLI, N1))
3650 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3651 DAG.getConstant(0, SDLoc(N), VT), Carry);
3652
3653 return SDValue();
3654}
3655
3656SDValue DAGCombiner::visitADDE(SDNode *N) {
3657 SDValue N0 = N->getOperand(0);
3658 SDValue N1 = N->getOperand(1);
3659 SDValue CarryIn = N->getOperand(2);
3660
3661 // canonicalize constant to RHS
3662 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3663 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3664 if (N0C && !N1C)
3665 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3666 N1, N0, CarryIn);
3667
3668 // fold (adde x, y, false) -> (addc x, y)
3669 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3670 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3671
3672 return SDValue();
3673}
3674
3675SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3676 SDValue N0 = N->getOperand(0);
3677 SDValue N1 = N->getOperand(1);
3678 SDValue CarryIn = N->getOperand(2);
3679 SDLoc DL(N);
3680
3681 // canonicalize constant to RHS
3682 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3683 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3684 if (N0C && !N1C)
3685 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3686
3687 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3688 if (isNullConstant(CarryIn)) {
3689 if (!LegalOperations ||
3690 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3691 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3692 }
3693
3694 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3695 if (isNullConstant(N0) && isNullConstant(N1)) {
3696 EVT VT = N0.getValueType();
3697 EVT CarryVT = CarryIn.getValueType();
3698 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3699 AddToWorklist(CarryExt.getNode());
3700 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3701 DAG.getConstant(1, DL, VT)),
3702 DAG.getConstant(0, DL, CarryVT));
3703 }
3704
3705 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3706 return Combined;
3707
3708 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3709 return Combined;
3710
3711 // We want to avoid useless duplication.
3712 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3713 // not a binary operation, this is not really possible to leverage this
3714 // existing mechanism for it. However, if more operations require the same
3715 // deduplication logic, then it may be worth generalize.
3716 SDValue Ops[] = {N1, N0, CarryIn};
3717 SDNode *CSENode =
3718 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3719 if (CSENode)
3720 return SDValue(CSENode, 0);
3721
3722 return SDValue();
3723}
3724
3725/**
3726 * If we are facing some sort of diamond carry propagation pattern try to
3727 * break it up to generate something like:
3728 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3729 *
3730 * The end result is usually an increase in operation required, but because the
3731 * carry is now linearized, other transforms can kick in and optimize the DAG.
3732 *
3733 * Patterns typically look something like
3734 * (uaddo A, B)
3735 * / \
3736 * Carry Sum
3737 * | \
3738 * | (uaddo_carry *, 0, Z)
3739 * | /
3740 * \ Carry
3741 * | /
3742 * (uaddo_carry X, *, *)
3743 *
3744 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3745 * produce a combine with a single path for carry propagation.
3746 */
3748 SelectionDAG &DAG, SDValue X,
3749 SDValue Carry0, SDValue Carry1,
3750 SDNode *N) {
3751 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3752 return SDValue();
3753 if (Carry1.getOpcode() != ISD::UADDO)
3754 return SDValue();
3755
3756 SDValue Z;
3757
3758 /**
3759 * First look for a suitable Z. It will present itself in the form of
3760 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3761 */
3762 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3763 isNullConstant(Carry0.getOperand(1))) {
3764 Z = Carry0.getOperand(2);
3765 } else if (Carry0.getOpcode() == ISD::UADDO &&
3766 isOneConstant(Carry0.getOperand(1))) {
3767 EVT VT = Carry0->getValueType(1);
3768 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3769 } else {
3770 // We couldn't find a suitable Z.
3771 return SDValue();
3772 }
3773
3774
3775 auto cancelDiamond = [&](SDValue A,SDValue B) {
3776 SDLoc DL(N);
3777 SDValue NewY =
3778 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3779 Combiner.AddToWorklist(NewY.getNode());
3780 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3781 DAG.getConstant(0, DL, X.getValueType()),
3782 NewY.getValue(1));
3783 };
3784
3785 /**
3786 * (uaddo A, B)
3787 * |
3788 * Sum
3789 * |
3790 * (uaddo_carry *, 0, Z)
3791 */
3792 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3793 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3794 }
3795
3796 /**
3797 * (uaddo_carry A, 0, Z)
3798 * |
3799 * Sum
3800 * |
3801 * (uaddo *, B)
3802 */
3803 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3804 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3805 }
3806
3807 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3808 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3809 }
3810
3811 return SDValue();
3812}
3813
3814// If we are facing some sort of diamond carry/borrow in/out pattern try to
3815// match patterns like:
3816//
3817// (uaddo A, B) CarryIn
3818// | \ |
3819// | \ |
3820// PartialSum PartialCarryOutX /
3821// | | /
3822// | ____|____________/
3823// | / |
3824// (uaddo *, *) \________
3825// | \ \
3826// | \ |
3827// | PartialCarryOutY |
3828// | \ |
3829// | \ /
3830// AddCarrySum | ______/
3831// | /
3832// CarryOut = (or *, *)
3833//
3834// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3835//
3836// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3837//
3838// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3839// with a single path for carry/borrow out propagation.
3841 SDValue N0, SDValue N1, SDNode *N) {
3842 SDValue Carry0 = getAsCarry(TLI, N0);
3843 if (!Carry0)
3844 return SDValue();
3845 SDValue Carry1 = getAsCarry(TLI, N1);
3846 if (!Carry1)
3847 return SDValue();
3848
3849 unsigned Opcode = Carry0.getOpcode();
3850 if (Opcode != Carry1.getOpcode())
3851 return SDValue();
3852 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3853 return SDValue();
3854 // Guarantee identical type of CarryOut
3855 EVT CarryOutType = N->getValueType(0);
3856 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3857 CarryOutType != Carry1.getValue(1).getValueType())
3858 return SDValue();
3859
3860 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3861 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3862 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3863 std::swap(Carry0, Carry1);
3864
3865 // Check if nodes are connected in expected way.
3866 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3867 Carry1.getOperand(1) != Carry0.getValue(0))
3868 return SDValue();
3869
3870 // The carry in value must be on the righthand side for subtraction.
3871 unsigned CarryInOperandNum =
3872 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3873 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3874 return SDValue();
3875 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3876
3877 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3878 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3879 return SDValue();
3880
3881 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3882 CarryIn = getAsCarry(TLI, CarryIn, true);
3883 if (!CarryIn)
3884 return SDValue();
3885
3886 SDLoc DL(N);
3887 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3888 Carry1->getValueType(0));
3889 SDValue Merged =
3890 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3891 Carry0.getOperand(1), CarryIn);
3892
3893 // Please note that because we have proven that the result of the UADDO/USUBO
3894 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3895 // therefore prove that if the first UADDO/USUBO overflows, the second
3896 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3897 // maximum value.
3898 //
3899 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3900 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3901 //
3902 // This is important because it means that OR and XOR can be used to merge
3903 // carry flags; and that AND can return a constant zero.
3904 //
3905 // TODO: match other operations that can merge flags (ADD, etc)
3906 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3907 if (N->getOpcode() == ISD::AND)
3908 return DAG.getConstant(0, DL, CarryOutType);
3909 return Merged.getValue(1);
3910}
3911
3912// Reconstruct a subtract-with-borrow chain from its canonicalized icmp form:
3913// carry_out = or(icmp ult A, B, and(icmp eq A, B, carry_in))
3914// InstCombine folds usub.with.overflow chains into this, losing the
3915// USUBO_CARRY that lowers to sbb/sbcs.
3917 const TargetLowering &TLI) {
3918 SDValue A, B, CarryIn;
3923 m_Value(CarryIn)))))
3924 return SDValue();
3925
3926 EVT IntVT = A.getValueType();
3927 // Skip vectors: USUBO_CARRY on a vector type has no legalization path and
3928 // would crash.
3929 if (IntVT.isVector() || !TLI.isOperationLegalOrCustom(
3931 *DAG.getContext(), IntVT)))
3932 return SDValue();
3933
3934 SDLoc DL(N);
3935 SDVTList VTs = DAG.getVTList(IntVT, N->getValueType(0));
3936 return DAG.getNode(ISD::USUBO_CARRY, DL, VTs, A, B, CarryIn).getValue(1);
3937}
3938
3939SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3940 SDValue CarryIn, SDNode *N) {
3941 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3942 // carry.
3943 if (isBitwiseNot(N0))
3944 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3945 SDLoc DL(N);
3946 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3947 N0.getOperand(0), NotC);
3948 return CombineTo(
3949 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3950 }
3951
3952 // Iff the flag result is dead:
3953 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3954 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3955 // or the dependency between the instructions.
3956 if ((N0.getOpcode() == ISD::ADD ||
3957 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3958 N0.getValue(1) != CarryIn)) &&
3959 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3960 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3961 N0.getOperand(0), N0.getOperand(1), CarryIn);
3962
3963 /**
3964 * When one of the uaddo_carry argument is itself a carry, we may be facing
3965 * a diamond carry propagation. In which case we try to transform the DAG
3966 * to ensure linear carry propagation if that is possible.
3967 */
3968 if (auto Y = getAsCarry(TLI, N1)) {
3969 // Because both are carries, Y and Z can be swapped.
3970 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3971 return R;
3972 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3973 return R;
3974 }
3975
3976 return SDValue();
3977}
3978
3979SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3980 SDValue CarryIn, SDNode *N) {
3981 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3982 if (isBitwiseNot(N0)) {
3983 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3984 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3985 N0.getOperand(0), NotC);
3986 }
3987
3988 return SDValue();
3989}
3990
3991SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3992 SDValue N0 = N->getOperand(0);
3993 SDValue N1 = N->getOperand(1);
3994 SDValue CarryIn = N->getOperand(2);
3995 SDLoc DL(N);
3996
3997 // canonicalize constant to RHS
3998 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3999 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
4000 if (N0C && !N1C)
4001 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
4002
4003 // fold (saddo_carry x, y, false) -> (saddo x, y)
4004 if (isNullConstant(CarryIn)) {
4005 if (!LegalOperations ||
4006 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
4007 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
4008 }
4009
4010 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
4011 return Combined;
4012
4013 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
4014 return Combined;
4015
4016 return SDValue();
4017}
4018
4019// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
4020// clamp/truncation if necessary.
4022 SDValue RHS, SelectionDAG &DAG,
4023 const SDLoc &DL) {
4024 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
4025 "Illegal truncation");
4026
4027 if (DstVT == SrcVT)
4028 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
4029
4030 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
4031 // clamping RHS.
4033 DstVT.getScalarSizeInBits());
4034 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
4035 return SDValue();
4036
4037 SDValue SatLimit =
4039 DstVT.getScalarSizeInBits()),
4040 DL, SrcVT);
4041 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
4042 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
4043 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
4044 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
4045}
4046
4047// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
4048// usubsat(a,b), optionally as a truncated type.
4049SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
4050 if (N->getOpcode() != ISD::SUB ||
4051 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
4052 return SDValue();
4053
4054 EVT SubVT = N->getValueType(0);
4055 SDValue Op0 = N->getOperand(0);
4056 SDValue Op1 = N->getOperand(1);
4057
4058 // Try to find umax(a,b) - b or a - umin(a,b) patterns
4059 // they may be converted to usubsat(a,b).
4060 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
4061 SDValue MaxLHS = Op0.getOperand(0);
4062 SDValue MaxRHS = Op0.getOperand(1);
4063 if (MaxLHS == Op1)
4064 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
4065 if (MaxRHS == Op1)
4066 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
4067 }
4068
4069 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
4070 SDValue MinLHS = Op1.getOperand(0);
4071 SDValue MinRHS = Op1.getOperand(1);
4072 if (MinLHS == Op0)
4073 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
4074 if (MinRHS == Op0)
4075 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
4076 }
4077
4078 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
4079 if (Op1.getOpcode() == ISD::TRUNCATE &&
4080 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
4081 Op1.getOperand(0).hasOneUse()) {
4082 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
4083 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
4084 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
4085 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
4086 DAG, DL);
4087 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
4088 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
4089 DAG, DL);
4090 }
4091
4092 return SDValue();
4093}
4094
4095// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
4096// counting leading ones. Broadly, it replaces the substraction with a left
4097// shift.
4098//
4099// * DAG Legalisation Pattern:
4100//
4101// (sub (ctlz (zeroextend (not Src)))
4102// BitWidthDiff)
4103//
4104// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
4105// -->
4106//
4107// (ctlz_zero_poison (not (shl (anyextend Src)
4108// BitWidthDiff)))
4109//
4110// * Type Legalisation Pattern:
4111//
4112// (sub (ctlz (and (xor Src XorMask)
4113// AndMask))
4114// BitWidthDiff)
4115//
4116// if AndMask has only trailing ones
4117// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
4118// and XorMask has more trailing ones than AndMask
4119// -->
4120//
4121// (ctlz_zero_poison (not (shl Src BitWidthDiff)))
4122template <class MatchContextClass>
4124 const SDLoc DL(N);
4125 SDValue N0 = N->getOperand(0);
4126 EVT VT = N0.getValueType();
4127 unsigned BitWidth = VT.getScalarSizeInBits();
4128
4129 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4130
4131 APInt AndMask;
4132 APInt XorMask;
4133 uint64_t BitWidthDiff;
4134
4135 SDValue CtlzOp;
4136 SDValue Src;
4137
4138 if (!sd_context_match(
4139 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
4140 return SDValue();
4141
4142 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
4143 // DAG Legalisation Pattern:
4144 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4145 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4146 return SDValue();
4147
4148 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
4149 } else if (sd_context_match(CtlzOp, Matcher,
4150 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
4151 m_ConstInt(AndMask)))) {
4152 // Type Legalisation Pattern:
4153 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4154 if (BitWidthDiff >= BitWidth)
4155 return SDValue();
4156 unsigned AndMaskWidth = BitWidth - BitWidthDiff;
4157 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4158 return SDValue();
4159 } else
4160 return SDValue();
4161
4162 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4163 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4164 SDValue Not =
4165 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4166
4167 return Matcher.getNode(ISD::CTLZ_ZERO_POISON, DL, VT, Not);
4168}
4169
4170// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4172 const SDLoc &DL) {
4173 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4174 SDValue Sub0 = N->getOperand(0);
4175 SDValue Sub1 = N->getOperand(1);
4176
4177 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4178 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4179 DivRem.getOpcode() == ISD::UDIVREM) &&
4180 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4181 DivRem.getOperand(1) == MaybeY) {
4182 return SDValue(DivRem.getNode(), 1);
4183 }
4184 return SDValue();
4185 };
4186
4187 if (Sub1.getOpcode() == ISD::MUL) {
4188 // (sub x, (mul divrem(x,y)[0], y))
4189 SDValue Mul0 = Sub1.getOperand(0);
4190 SDValue Mul1 = Sub1.getOperand(1);
4191
4192 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4193 return Res;
4194
4195 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4196 return Res;
4197
4198 } else if (Sub1.getOpcode() == ISD::SHL) {
4199 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4200 SDValue Shl0 = Sub1.getOperand(0);
4201 SDValue Shl1 = Sub1.getOperand(1);
4202 // Check if Shl0 is divrem(x, Y)[0]
4203 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4204 Shl0.getOpcode() == ISD::UDIVREM) &&
4205 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4206
4207 SDValue Divisor = Shl0.getOperand(1);
4208
4209 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4211 if (!DivC || !ShC)
4212 return SDValue();
4213
4214 if (DivC->getAPIntValue().isPowerOf2() &&
4215 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4216 return SDValue(Shl0.getNode(), 1);
4217 }
4218 }
4219 return SDValue();
4220}
4221
4222// Since it may not be valid to emit a fold to zero for vector initializers
4223// check if we can before folding.
4224static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4225 SelectionDAG &DAG, bool LegalOperations) {
4226 if (!VT.isVector())
4227 return DAG.getConstant(0, DL, VT);
4228 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4229 return DAG.getConstant(0, DL, VT);
4230 return SDValue();
4231}
4232
4233SDValue DAGCombiner::visitSUB(SDNode *N) {
4234 SDValue N0 = N->getOperand(0);
4235 SDValue N1 = N->getOperand(1);
4236 EVT VT = N0.getValueType();
4237 unsigned BitWidth = VT.getScalarSizeInBits();
4238 SDLoc DL(N);
4239
4241 return V;
4242
4243 // fold (sub x, x) -> 0
4244 if (N0 == N1)
4245 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4246
4247 // fold (sub c1, c2) -> c3
4248 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4249 return C;
4250
4251 // fold vector ops
4252 if (VT.isVector()) {
4253 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4254 return FoldedVOp;
4255
4256 // fold (sub x, 0) -> x, vector edition
4258 return N0;
4259 }
4260
4261 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4262 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4263 if (N1.hasOneUse() && hasUMin(VT)) {
4264 SDValue Y;
4265 auto MS0 = m_Specific(N0);
4266 auto MVY = m_Value(Y);
4267 auto MZ = m_Zero();
4268 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4269 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4270
4271 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4272 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4273 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4274 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4275
4276 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4277 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4278 }
4279
4280 if (SDValue NewSel = foldBinOpIntoSelect(N))
4281 return NewSel;
4282
4283 // fold (sub x, c) -> (add x, -c)
4284 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4285 return DAG.getNode(ISD::ADD, DL, VT, N0,
4286 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4287
4288 if (isNullOrNullSplat(N0)) {
4289 // Right-shifting everything out but the sign bit followed by negation is
4290 // the same as flipping arithmetic/logical shift type without the negation:
4291 // -(X >>u 31) -> (X >>s 31)
4292 // -(X >>s 31) -> (X >>u 31)
4293 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4294 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4295 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4296 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4297 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4298 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4299 }
4300 }
4301
4302 // 0 - X --> 0 if the sub is NUW.
4303 if (N->getFlags().hasNoUnsignedWrap())
4304 return N0;
4305
4307 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4308 // N1 must be 0 because negating the minimum signed value is undefined.
4309 if (N->getFlags().hasNoSignedWrap())
4310 return N0;
4311
4312 // 0 - X --> X if X is 0 or the minimum signed value.
4313 return N1;
4314 }
4315
4316 // Convert 0 - abs(x).
4317 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4319 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4320 return Result;
4321
4322 // Similar to the previous rule, but this time targeting an expanded abs.
4323 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4324 // as well as
4325 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4326 // Note that these two are applicable to both signed and unsigned min/max.
4327 SDValue X;
4328 SDValue S0;
4329 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4330 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4331 m_UMax(m_Value(X), NegPat),
4332 m_SMin(m_Value(X), NegPat),
4333 m_UMin(m_Value(X), NegPat))))) {
4334 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4335 if (hasOperation(NewOpc, VT))
4336 return DAG.getNode(NewOpc, DL, VT, X, S0);
4337 }
4338
4339 // Fold neg(splat(neg(x)) -> splat(x)
4340 if (VT.isVector()) {
4341 SDValue N1S = DAG.getSplatValue(N1, true);
4342 if (N1S && N1S.getOpcode() == ISD::SUB &&
4343 isNullConstant(N1S.getOperand(0)))
4344 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4345 }
4346
4347 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4348 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4349 isOneOrOneSplat(N1->getOperand(1))) {
4350 EVT ExtVT = VT.changeElementType(*DAG.getContext(), MVT::i1);
4353 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4354 DAG.getValueType(ExtVT));
4355 }
4356 }
4357 }
4358
4359 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4361 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4362
4363 // fold (A - (0-B)) -> A+B
4364 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4365 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4366
4367 // fold A-(A-B) -> B
4368 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4369 return N1.getOperand(1);
4370
4371 // fold (A+B)-A -> B
4372 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4373 return N0.getOperand(1);
4374
4375 // fold (A+B)-B -> A
4376 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4377 return N0.getOperand(0);
4378
4379 // fold (A+C1)-C2 -> A+(C1-C2)
4380 if (N0.getOpcode() == ISD::ADD) {
4381 SDValue N01 = N0.getOperand(1);
4382 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4383 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4384 }
4385
4386 // fold C2-(A+C1) -> (C2-C1)-A
4387 if (N1.getOpcode() == ISD::ADD) {
4388 SDValue N11 = N1.getOperand(1);
4389 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4390 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4391 }
4392
4393 // fold (A-C1)-C2 -> A-(C1+C2)
4394 if (N0.getOpcode() == ISD::SUB) {
4395 SDValue N01 = N0.getOperand(1);
4396 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4397 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4398 }
4399
4400 // fold (c1-A)-c2 -> (c1-c2)-A
4401 if (N0.getOpcode() == ISD::SUB) {
4402 SDValue N00 = N0.getOperand(0);
4403 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4404 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4405 }
4406
4407 SDValue A, B, C;
4408
4409 // fold ((A+(B+C))-B) -> A+C
4410 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4411 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4412
4413 // fold ((A+(B-C))-B) -> A-C
4414 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4415 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4416
4417 // fold ((A-(B-C))-C) -> A-B
4418 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4419 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4420
4421 // fold (A-(B-C)) -> A+(C-B)
4422 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4423 return DAG.getNode(ISD::ADD, DL, VT, N0,
4424 DAG.getNode(ISD::SUB, DL, VT, C, B));
4425
4426 // A - (A & B) -> A & (~B)
4427 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4428 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4429 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4430
4431 // fold (A - (-B * C)) -> (A + (B * C))
4432 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4433 return DAG.getNode(ISD::ADD, DL, VT, N0,
4434 DAG.getNode(ISD::MUL, DL, VT, B, C));
4435
4436 // If either operand of a sub is undef, the result is undef
4437 if (N0.isUndef())
4438 return N0;
4439 if (N1.isUndef())
4440 return N1;
4441
4442 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4443 return V;
4444
4445 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4446 return V;
4447
4448 // Try to match AVGCEIL fixedwidth pattern
4449 if (SDValue V = foldSubToAvg(N, DL))
4450 return V;
4451
4452 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4453 return V;
4454
4455 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4456 return V;
4457
4458 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4459 return V;
4460
4461 // (A - B) - 1 -> add (xor B, -1), A
4463 m_One(/*AllowUndefs=*/true))))
4464 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4465
4466 // Look for:
4467 // sub y, (xor x, -1)
4468 // And if the target does not like this form then turn into:
4469 // add (add x, y), 1
4470 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4471 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4472 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4473 }
4474
4475 // Hoist one-use addition by non-opaque constant:
4476 // (x + C) - y -> (x - y) + C
4477 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4478 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4479 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4480 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4481 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4482 }
4483 // y - (x + C) -> (y - x) - C
4484 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4485 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4486 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4487 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4488 }
4489 // (x - C) - y -> (x - y) - C
4490 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4491 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4492 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4493 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4494 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4495 }
4496 // (C - x) - y -> C - (x + y)
4497 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4498 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4499 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4500 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4501 }
4502
4503 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4504 // rather than 'sub 0/1' (the sext should get folded).
4505 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4506 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4507 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4508 TLI.getBooleanContents(VT) ==
4510 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4511 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4512 }
4513
4514 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4515 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4517 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4518 return DAG.getNode(ISD::ABS, DL, VT, A);
4519
4520 // If the relocation model supports it, consider symbol offsets.
4521 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4522 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4523 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4524 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4525 if (GA->getGlobal() == GB->getGlobal())
4526 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4527 DL, VT);
4528 }
4529
4530 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4531 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4532 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4533 if (TN->getVT() == MVT::i1) {
4534 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4535 DAG.getConstant(1, DL, VT));
4536 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4537 }
4538 }
4539
4540 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4541 // avoid if ISD::MUL handling is poor and ISD::SHL isn't an option.
4542 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4543 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4544 if (!IntVal.isPowerOf2() ||
4545 hasOperation(ISD::MUL, N1.getOperand(0).getValueType()))
4546 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4547 }
4548
4549 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4550 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4551 APInt NewStep = -N1.getConstantOperandAPInt(0);
4552 return DAG.getNode(ISD::ADD, DL, VT, N0,
4553 DAG.getStepVector(DL, VT, NewStep));
4554 }
4555
4556 // Prefer an add for more folding potential and possibly better codegen:
4557 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4558 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4559 SDValue ShAmt = N1.getOperand(1);
4560 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4561 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4562 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4563 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4564 }
4565 }
4566
4567 // As with the previous fold, prefer add for more folding potential.
4568 // Subtracting SMIN/0 is the same as adding SMIN/0:
4569 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4570 if (N1.getOpcode() == ISD::SHL) {
4571 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4572 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4573 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4574 }
4575
4576 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4577 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4578 N0.getResNo() == 0 && N0.hasOneUse())
4579 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4580 N0.getOperand(0), N1, N0.getOperand(2));
4581
4583 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4584 if (SDValue Carry = getAsCarry(TLI, N0)) {
4585 SDValue X = N1;
4586 SDValue Zero = DAG.getConstant(0, DL, VT);
4587 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4588 return DAG.getNode(ISD::UADDO_CARRY, DL,
4589 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4590 Carry);
4591 }
4592 }
4593
4594 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4595 const APInt &C0Val = C0->getAPIntValue();
4596
4597 // sub nuw C, x --> xor x, C when C is a mask (2^k - 1)
4598 if (N->getFlags().hasNoUnsignedWrap() && C0Val.isMask())
4599 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4600
4601 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4602 // sub C0, X --> xor X, C0
4603 if (!C0->isOpaque()) {
4604 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4605 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4606 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4607 }
4608 }
4609
4610 // smax(a,b) - smin(a,b) --> abds(a,b)
4611 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4612 sd_match(N0, &DAG, m_SMaxLike(m_Value(A), m_Value(B))) &&
4613 sd_match(N1, &DAG, m_SMinLike(m_Specific(A), m_Specific(B))))
4614 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4615
4616 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4617 if (hasOperation(ISD::ABDS, VT) &&
4618 sd_match(N0, &DAG, m_SMinLike(m_Value(A), m_Value(B))) &&
4619 sd_match(N1, &DAG, m_SMaxLike(m_Specific(A), m_Specific(B))))
4620 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4621
4622 // umax(a,b) - umin(a,b) --> abdu(a,b)
4623 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4624 sd_match(N0, &DAG, m_UMaxLike(m_Value(A), m_Value(B))) &&
4625 sd_match(N1, &DAG, m_UMinLike(m_Specific(A), m_Specific(B))))
4626 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4627
4628 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4629 if (hasOperation(ISD::ABDU, VT) &&
4630 sd_match(N0, &DAG, m_UMinLike(m_Value(A), m_Value(B))) &&
4631 sd_match(N1, &DAG, m_UMaxLike(m_Specific(A), m_Specific(B))))
4632 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4633
4634 return SDValue();
4635}
4636
4637SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4638 unsigned Opcode = N->getOpcode();
4639 SDValue N0 = N->getOperand(0);
4640 SDValue N1 = N->getOperand(1);
4641 EVT VT = N0.getValueType();
4642 bool IsSigned = Opcode == ISD::SSUBSAT;
4643 SDLoc DL(N);
4644
4645 // fold (sub_sat x, undef) -> 0
4646 if (N0.isUndef() || N1.isUndef())
4647 return DAG.getConstant(0, DL, VT);
4648
4649 // fold (sub_sat x, x) -> 0
4650 if (N0 == N1)
4651 return DAG.getConstant(0, DL, VT);
4652
4653 // fold (sub_sat c1, c2) -> c3
4654 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4655 return C;
4656
4657 // fold vector ops
4658 if (VT.isVector()) {
4659 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4660 return FoldedVOp;
4661
4662 // fold (sub_sat x, 0) -> x, vector edition
4664 return N0;
4665 }
4666
4667 // fold (sub_sat x, 0) -> x
4668 if (isNullConstant(N1))
4669 return N0;
4670
4671 // If it cannot overflow, transform into an sub.
4672 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4673 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4674
4675 return SDValue();
4676}
4677
4678SDValue DAGCombiner::visitSUBC(SDNode *N) {
4679 SDValue N0 = N->getOperand(0);
4680 SDValue N1 = N->getOperand(1);
4681 EVT VT = N0.getValueType();
4682 SDLoc DL(N);
4683
4684 // If the flag result is dead, turn this into an SUB.
4685 if (!N->hasAnyUseOfValue(1))
4686 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4687 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4688
4689 // fold (subc x, x) -> 0 + no borrow
4690 if (N0 == N1)
4691 return CombineTo(N, DAG.getConstant(0, DL, VT),
4692 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4693
4694 // fold (subc x, 0) -> x + no borrow
4695 if (isNullConstant(N1))
4696 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4697
4698 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4699 if (isAllOnesConstant(N0))
4700 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4701 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4702
4703 return SDValue();
4704}
4705
4706SDValue DAGCombiner::visitSUBO(SDNode *N) {
4707 SDValue N0 = N->getOperand(0);
4708 SDValue N1 = N->getOperand(1);
4709 EVT VT = N0.getValueType();
4710 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4711
4712 EVT CarryVT = N->getValueType(1);
4713 SDLoc DL(N);
4714
4715 // If the flag result is dead, turn this into an SUB.
4716 if (!N->hasAnyUseOfValue(1))
4717 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4718 DAG.getUNDEF(CarryVT));
4719
4720 // fold (subo x, x) -> 0 + no borrow
4721 if (N0 == N1)
4722 return CombineTo(N, DAG.getConstant(0, DL, VT),
4723 DAG.getConstant(0, DL, CarryVT));
4724
4725 // fold (subox, c) -> (addo x, -c)
4726 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4727 if (IsSigned && !N1C->isMinSignedValue())
4728 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4729 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4730
4731 // fold (subo x, 0) -> x + no borrow
4732 if (isNullOrNullSplat(N1))
4733 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4734
4735 // If it cannot overflow, transform into an sub.
4736 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4737 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4738 DAG.getConstant(0, DL, CarryVT));
4739
4740 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4741 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4742 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4743 DAG.getConstant(0, DL, CarryVT));
4744
4745 return SDValue();
4746}
4747
4748SDValue DAGCombiner::visitSUBE(SDNode *N) {
4749 SDValue N0 = N->getOperand(0);
4750 SDValue N1 = N->getOperand(1);
4751 SDValue CarryIn = N->getOperand(2);
4752
4753 // fold (sube x, y, false) -> (subc x, y)
4754 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4755 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4756
4757 return SDValue();
4758}
4759
4760SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4761 SDValue N0 = N->getOperand(0);
4762 SDValue N1 = N->getOperand(1);
4763 SDValue CarryIn = N->getOperand(2);
4764
4765 // fold (usubo_carry x, y, false) -> (usubo x, y)
4766 if (isNullConstant(CarryIn)) {
4767 if (!LegalOperations ||
4768 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4769 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4770 }
4771
4772 return SDValue();
4773}
4774
4775SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4776 SDValue N0 = N->getOperand(0);
4777 SDValue N1 = N->getOperand(1);
4778 SDValue CarryIn = N->getOperand(2);
4779
4780 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4781 if (isNullConstant(CarryIn)) {
4782 if (!LegalOperations ||
4783 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4784 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4785 }
4786
4787 return SDValue();
4788}
4789
4790// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4791// UMULFIXSAT here.
4792SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4793 SDValue N0 = N->getOperand(0);
4794 SDValue N1 = N->getOperand(1);
4795 SDValue Scale = N->getOperand(2);
4796 EVT VT = N0.getValueType();
4797
4798 // fold (mulfix x, undef, scale) -> 0
4799 if (N0.isUndef() || N1.isUndef())
4800 return DAG.getConstant(0, SDLoc(N), VT);
4801
4802 // Canonicalize constant to RHS (vector doesn't have to splat)
4805 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4806
4807 // fold (mulfix x, 0, scale) -> 0
4808 if (isNullConstant(N1))
4809 return DAG.getConstant(0, SDLoc(N), VT);
4810
4811 return SDValue();
4812}
4813
4814template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4815 SDValue N0 = N->getOperand(0);
4816 SDValue N1 = N->getOperand(1);
4817 EVT VT = N0.getValueType();
4818 unsigned BitWidth = VT.getScalarSizeInBits();
4819 SDLoc DL(N);
4820 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4821 MatchContextClass Matcher(DAG, TLI, N);
4822
4823 // fold (mul x, undef) -> 0
4824 if (N0.isUndef() || N1.isUndef())
4825 return DAG.getConstant(0, DL, VT);
4826
4827 // fold (mul c1, c2) -> c1*c2
4828 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4829 return C;
4830
4831 // canonicalize constant to RHS (vector doesn't have to splat)
4834 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4835
4836 bool N1IsConst = false;
4837 bool N1IsOpaqueConst = false;
4838 APInt ConstValue1;
4839
4840 // fold vector ops
4841 if (VT.isVector()) {
4842 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4843 if (!UseVP)
4844 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4845 return FoldedVOp;
4846
4847 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4848 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4849 "Splat APInt should be element width");
4850 } else {
4851 N1IsConst = isa<ConstantSDNode>(N1);
4852 if (N1IsConst) {
4853 ConstValue1 = N1->getAsAPIntVal();
4854 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4855 }
4856 }
4857
4858 // fold (mul x, 0) -> 0
4859 if (N1IsConst && ConstValue1.isZero())
4860 return N1;
4861
4862 // fold (mul x, 1) -> x
4863 if (N1IsConst && ConstValue1.isOne())
4864 return N0;
4865
4866 if (!UseVP)
4867 if (SDValue NewSel = foldBinOpIntoSelect(N))
4868 return NewSel;
4869
4870 // fold (mul x, -1) -> 0-x
4871 if (N1IsConst && ConstValue1.isAllOnes())
4872 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4873
4874 // fold (mul x, (1 << c)) -> x << c
4875 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4876 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4877 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4878 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4879 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4880 SDNodeFlags Flags;
4881 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4882 // Preserve nsw when the shift amount is strictly less than BitWidth - 1,
4883 // i.e. the multiplier is not the signed minimum value.
4884 if (N->getFlags().hasNoSignedWrap() && N1IsConst &&
4885 ConstValue1.logBase2() < BitWidth - 1)
4886 Flags.setNoSignedWrap(true);
4887 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4888 }
4889 }
4890
4891 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4892 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4893 unsigned Log2Val = (-ConstValue1).logBase2();
4894
4895 // FIXME: If the input is something that is easily negated (e.g. a
4896 // single-use add), we should put the negate there.
4897 return Matcher.getNode(
4898 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4899 Matcher.getNode(ISD::SHL, DL, VT, N0,
4900 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4901 }
4902
4903 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4904 // hi result is in use in case we hit this mid-legalization.
4905 if (!UseVP) {
4906 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4907 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4908 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4909 // TODO: Can we match commutable operands with getNodeIfExists?
4910 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4911 if (LoHi->hasAnyUseOfValue(1))
4912 return SDValue(LoHi, 0);
4913 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4914 if (LoHi->hasAnyUseOfValue(1))
4915 return SDValue(LoHi, 0);
4916 }
4917 }
4918 }
4919
4920 // Try to transform:
4921 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4922 // mul x, (2^N + 1) --> add (shl x, N), x
4923 // mul x, (2^N - 1) --> sub (shl x, N), x
4924 // Examples: x * 33 --> (x << 5) + x
4925 // x * 15 --> (x << 4) - x
4926 // x * -33 --> -((x << 5) + x)
4927 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4928 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4929 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4930 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4931 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4932 // x * 0xf800 --> (x << 16) - (x << 11)
4933 // x * -0x8800 --> -((x << 15) + (x << 11))
4934 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4935 if (!UseVP && N1IsConst &&
4936 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4937 // TODO: We could handle more general decomposition of any constant by
4938 // having the target set a limit on number of ops and making a
4939 // callback to determine that sequence (similar to sqrt expansion).
4940 unsigned MathOp = ISD::DELETED_NODE;
4941 APInt MulC = ConstValue1.abs();
4942 // The constant `2` should be treated as (2^0 + 1).
4943 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4944 MulC.lshrInPlace(TZeros);
4945 if ((MulC - 1).isPowerOf2())
4946 MathOp = ISD::ADD;
4947 else if ((MulC + 1).isPowerOf2())
4948 MathOp = ISD::SUB;
4949
4950 if (MathOp != ISD::DELETED_NODE) {
4951 unsigned ShAmt =
4952 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4953 ShAmt += TZeros;
4954 assert(ShAmt < BitWidth &&
4955 "multiply-by-constant generated out of bounds shift");
4956 SDValue Shl =
4957 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4958 SDValue R =
4959 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4960 DAG.getNode(ISD::SHL, DL, VT, N0,
4961 DAG.getConstant(TZeros, DL, VT)))
4962 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4963 if (ConstValue1.isNegative())
4964 R = DAG.getNegative(R, DL, VT);
4965 return R;
4966 }
4967 }
4968
4969 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4970 {
4971 SDValue X, C1;
4972 if (sd_context_match(N0, Matcher, m_Shl(m_Value(X), m_Value(C1))))
4973 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, C1}))
4974 return DAG.getNode(ISD::MUL, DL, VT, X, C3);
4975 }
4976
4977 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4978 // use.
4979 {
4980 SDValue X, C, Y;
4981 if (sd_context_match(
4982 N, Matcher,
4985 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, X, Y);
4986 return Matcher.getNode(ISD::SHL, DL, VT, Mul, C);
4987 }
4988 }
4989
4990 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4991 if (sd_context_match(N0, Matcher, m_SpecificOpc(ISD::ADD)) &&
4995 return Matcher.getNode(
4996 ISD::ADD, DL, VT,
4997 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4998 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4999
5000 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
5001 // avoid if ISD::MUL handling is poor and ISD::SHL isn't an option.
5002 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
5003 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
5004 const APInt &C0 = N0.getConstantOperandAPInt(0);
5005 const APInt &C1 = NC1->getAPIntValue();
5006 if (!C0.isPowerOf2() || C1.isPowerOf2() ||
5007 hasOperation(ISD::MUL, NC1->getValueType(0)))
5008 return DAG.getVScale(DL, VT, C0 * C1);
5009 }
5010
5011 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
5012 APInt MulVal;
5013 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
5014 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
5015 const APInt &C0 = N0.getConstantOperandAPInt(0);
5016 APInt NewStep = C0 * MulVal;
5017 return DAG.getStepVector(DL, VT, NewStep);
5018 }
5019
5020 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
5021 SDValue X;
5022 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
5024 N, Matcher,
5026 m_Deferred(X)))) {
5027 return Matcher.getNode(ISD::ABS, DL, VT, X);
5028 }
5029
5030 // Fold ((mul x, 0/undef) -> 0,
5031 // (mul x, 1) -> x) -> x)
5032 // -> and(x, mask)
5033 // We can replace vectors with '0' and '1' factors with a clearing mask.
5034 if (VT.isFixedLengthVector()) {
5035 unsigned NumElts = VT.getVectorNumElements();
5036 SmallBitVector ClearMask;
5037 ClearMask.reserve(NumElts);
5038 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
5039 if (!V || V->isZero()) {
5040 ClearMask.push_back(true);
5041 return true;
5042 }
5043 ClearMask.push_back(false);
5044 return V->isOne();
5045 };
5046 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
5047 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
5048 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
5049 EVT LegalSVT = N1.getOperand(0).getValueType();
5050 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
5051 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
5053 for (unsigned I = 0; I != NumElts; ++I)
5054 if (ClearMask[I])
5055 Mask[I] = Zero;
5056 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
5057 }
5058 }
5059
5060 // reassociate mul
5061 // TODO: Change reassociateOps to support vp ops.
5062 if (!UseVP)
5063 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
5064 return RMUL;
5065
5066 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
5067 // TODO: Change reassociateReduction to support vp ops.
5068 if (!UseVP)
5069 if (SDValue SD =
5070 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
5071 return SD;
5072
5073 // Simplify the operands using demanded-bits information.
5075 return SDValue(N, 0);
5076
5077 return SDValue();
5078}
5079
5080/// Return true if divmod libcall is available.
5082 const SelectionDAG &DAG) {
5083 RTLIB::Libcall LC;
5084 EVT NodeType = Node->getValueType(0);
5085 if (!NodeType.isSimple())
5086 return false;
5087 switch (NodeType.getSimpleVT().SimpleTy) {
5088 default: return false; // No libcall for vector types.
5089 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
5090 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
5091 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
5092 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
5093 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
5094 }
5095
5096 return DAG.getLibcalls().getLibcallImpl(LC) != RTLIB::Unsupported;
5097}
5098
5099/// Issue divrem if both quotient and remainder are needed.
5100SDValue DAGCombiner::useDivRem(SDNode *Node) {
5101 if (Node->use_empty())
5102 return SDValue(); // This is a dead node, leave it alone.
5103
5104 unsigned Opcode = Node->getOpcode();
5105 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
5106 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
5107
5108 // DivMod lib calls can still work on non-legal types if using lib-calls.
5109 EVT VT = Node->getValueType(0);
5110 if (VT.isVector() || !VT.isInteger())
5111 return SDValue();
5112
5113 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
5114 return SDValue();
5115
5116 // If DIVREM is going to get expanded into a libcall,
5117 // but there is no libcall available, then don't combine.
5118 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
5120 return SDValue();
5121
5122 // If div is legal, it's better to do the normal expansion
5123 unsigned OtherOpcode = 0;
5124 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
5125 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
5126 if (TLI.isOperationLegalOrCustom(Opcode, VT))
5127 return SDValue();
5128 } else {
5129 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5130 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
5131 return SDValue();
5132 }
5133
5134 SDValue Op0 = Node->getOperand(0);
5135 SDValue Op1 = Node->getOperand(1);
5136 SDValue combined;
5137 for (SDNode *User : Op0->users()) {
5138 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
5139 User->use_empty())
5140 continue;
5141 // Convert the other matching node(s), too;
5142 // otherwise, the DIVREM may get target-legalized into something
5143 // target-specific that we won't be able to recognize.
5144 unsigned UserOpc = User->getOpcode();
5145 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
5146 User->getOperand(0) == Op0 &&
5147 User->getOperand(1) == Op1) {
5148 if (!combined) {
5149 if (UserOpc == OtherOpcode) {
5150 SDVTList VTs = DAG.getVTList(VT, VT);
5151 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
5152 } else if (UserOpc == DivRemOpc) {
5153 combined = SDValue(User, 0);
5154 } else {
5155 assert(UserOpc == Opcode);
5156 continue;
5157 }
5158 }
5159 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
5160 CombineTo(User, combined);
5161 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
5162 CombineTo(User, combined.getValue(1));
5163 }
5164 }
5165 return combined;
5166}
5167
5169 SDValue N0 = N->getOperand(0);
5170 SDValue N1 = N->getOperand(1);
5171 EVT VT = N->getValueType(0);
5172 SDLoc DL(N);
5173
5174 unsigned Opc = N->getOpcode();
5175 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5176
5177 // X / undef -> undef
5178 // X % undef -> undef
5179 // X / 0 -> undef
5180 // X % 0 -> undef
5181 // NOTE: This includes vectors where any divisor element is zero/undef.
5182 if (DAG.isUndef(Opc, {N0, N1}))
5183 return DAG.getUNDEF(VT);
5184
5185 // undef / X -> 0
5186 // undef % X -> 0
5187 if (N0.isUndef())
5188 return DAG.getConstant(0, DL, VT);
5189
5190 // 0 / X -> 0
5191 // 0 % X -> 0
5193 if (N0C && N0C->isZero())
5194 return N0;
5195
5196 // X / X -> 1
5197 // X % X -> 0
5198 if (N0 == N1)
5199 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5200
5201 // X / 1 -> X
5202 // X % 1 -> 0
5203 // If this is a boolean op (single-bit element type), we can't have
5204 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5205 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5206 // it's a 1.
5207 if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
5208 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5209
5210 return SDValue();
5211}
5212
5213SDValue DAGCombiner::visitSDIV(SDNode *N) {
5214 SDValue N0 = N->getOperand(0);
5215 SDValue N1 = N->getOperand(1);
5216 EVT VT = N->getValueType(0);
5217 EVT CCVT = getSetCCResultType(VT);
5218 SDLoc DL(N);
5219
5220 // fold (sdiv c1, c2) -> c1/c2
5221 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5222 return C;
5223
5224 // fold vector ops
5225 if (VT.isVector())
5226 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5227 return FoldedVOp;
5228
5229 // fold (sdiv X, -1) -> 0-X
5230 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5231 if (N1C && N1C->isAllOnes())
5232 return DAG.getNegative(N0, DL, VT);
5233
5234 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5235 if (N1C && N1C->isMinSignedValue())
5236 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5237 DAG.getConstant(1, DL, VT),
5238 DAG.getConstant(0, DL, VT));
5239
5240 if (SDValue V = simplifyDivRem(N, DAG))
5241 return V;
5242
5243 if (SDValue NewSel = foldBinOpIntoSelect(N))
5244 return NewSel;
5245
5246 // If we know the sign bits of both operands are zero, strength reduce to a
5247 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5248 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5249 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5250
5251 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5252 // If the corresponding remainder node exists, update its users with
5253 // (Dividend - (Quotient * Divisor).
5254 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5255 { N0, N1 })) {
5256 // If the sdiv has the exact flag we shouldn't propagate it to the
5257 // remainder node.
5258 if (!N->getFlags().hasExact()) {
5259 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5260 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5261 AddToWorklist(Mul.getNode());
5262 AddToWorklist(Sub.getNode());
5263 CombineTo(RemNode, Sub);
5264 }
5265 }
5266 return V;
5267 }
5268
5269 // sdiv, srem -> sdivrem
5270 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5271 // true. Otherwise, we break the simplification logic in visitREM().
5272 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5273 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5274 if (SDValue DivRem = useDivRem(N))
5275 return DivRem;
5276
5277 return SDValue();
5278}
5279
5280static bool isDivisorPowerOfTwo(SDValue Divisor) {
5281 // Helper for determining whether a value is a power-2 constant scalar or a
5282 // vector of such elements.
5283 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5284 if (C->isZero() || C->isOpaque())
5285 return false;
5286 if (C->getAPIntValue().isPowerOf2())
5287 return true;
5288 if (C->getAPIntValue().isNegatedPowerOf2())
5289 return true;
5290 return false;
5291 };
5292
5293 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
5294 /*AllowTruncation=*/true);
5295}
5296
5297SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5298 SDLoc DL(N);
5299 EVT VT = N->getValueType(0);
5300 EVT CCVT = getSetCCResultType(VT);
5301 unsigned BitWidth = VT.getScalarSizeInBits();
5302 unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
5303
5304 // fold (sdiv X, pow2) -> simple ops after legalize
5305 // FIXME: We check for the exact bit here because the generic lowering gives
5306 // better results in that case. The target-specific lowering should learn how
5307 // to handle exact sdivs efficiently. An exception is made for large bitwidths
5308 // exceeding what the target can natively support, as division expansion was
5309 // skipped in favor of this optimization.
5310 if ((!N->getFlags().hasExact() || BitWidth > MaxLegalDivRemBitWidth) &&
5311 isDivisorPowerOfTwo(N1)) {
5312 // Target-specific implementation of sdiv x, pow2.
5313 if (SDValue Res = BuildSDIVPow2(N))
5314 return Res;
5315
5316 // Create constants that are functions of the shift amount value.
5317 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5318 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5319 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5320 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5321 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5322 if (!isConstantOrConstantVector(Inexact))
5323 return SDValue();
5324
5325 // Splat the sign bit into the register
5326 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5327 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5328 AddToWorklist(Sign.getNode());
5329
5330 // Add (N0 < 0) ? abs2 - 1 : 0;
5331 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5332 AddToWorklist(Srl.getNode());
5333 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5334 AddToWorklist(Add.getNode());
5335 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5336 AddToWorklist(Sra.getNode());
5337
5338 // Special case: (sdiv X, 1) -> X
5339 // Special Case: (sdiv X, -1) -> 0-X
5340 SDValue One = DAG.getConstant(1, DL, VT);
5342 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5343 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5344 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5345 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5346
5347 // If dividing by a positive value, we're done. Otherwise, the result must
5348 // be negated.
5349 SDValue Zero = DAG.getConstant(0, DL, VT);
5350 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5351
5352 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5353 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5354 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5355 return Res;
5356 }
5357
5358 // If integer divide is expensive and we satisfy the requirements, emit an
5359 // alternate sequence. Targets may check function attributes for size/speed
5360 // trade-offs.
5361 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5362 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5363 /*AllowTruncation=*/true) &&
5364 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5365 if (SDValue Op = BuildSDIV(N))
5366 return Op;
5367
5368 return SDValue();
5369}
5370
5371SDValue DAGCombiner::visitUDIV(SDNode *N) {
5372 SDValue N0 = N->getOperand(0);
5373 SDValue N1 = N->getOperand(1);
5374 EVT VT = N->getValueType(0);
5375 EVT CCVT = getSetCCResultType(VT);
5376 SDLoc DL(N);
5377
5378 // fold (udiv c1, c2) -> c1/c2
5379 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5380 return C;
5381
5382 // fold vector ops
5383 if (VT.isVector())
5384 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5385 return FoldedVOp;
5386
5387 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5388 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5389 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5390 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5391 DAG.getConstant(1, DL, VT),
5392 DAG.getConstant(0, DL, VT));
5393 }
5394
5395 if (SDValue V = simplifyDivRem(N, DAG))
5396 return V;
5397
5398 if (SDValue NewSel = foldBinOpIntoSelect(N))
5399 return NewSel;
5400
5401 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5402 // If the corresponding remainder node exists, update its users with
5403 // (Dividend - (Quotient * Divisor).
5404 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5405 { N0, N1 })) {
5406 // If the udiv has the exact flag we shouldn't propagate it to the
5407 // remainder node.
5408 if (!N->getFlags().hasExact()) {
5409 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5410 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5411 AddToWorklist(Mul.getNode());
5412 AddToWorklist(Sub.getNode());
5413 CombineTo(RemNode, Sub);
5414 }
5415 }
5416 return V;
5417 }
5418
5419 // sdiv, srem -> sdivrem
5420 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5421 // true. Otherwise, we break the simplification logic in visitREM().
5422 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5423 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5424 if (SDValue DivRem = useDivRem(N))
5425 return DivRem;
5426
5427 // Simplify the operands using demanded-bits information.
5428 // We don't have demanded bits support for UDIV so this just enables constant
5429 // folding based on known bits.
5431 return SDValue(N, 0);
5432
5433 return SDValue();
5434}
5435
5436SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5437 SDLoc DL(N);
5438 EVT VT = N->getValueType(0);
5439
5440 // fold (udiv x, (1 << c)) -> x >>u c
5441 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5442 /*AllowTruncation=*/true)) {
5443 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5444 AddToWorklist(LogBase2.getNode());
5445
5446 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5447 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5448 AddToWorklist(Trunc.getNode());
5449 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5450 }
5451 }
5452
5453 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5454 if (N1.getOpcode() == ISD::SHL) {
5455 SDValue N10 = N1.getOperand(0);
5456 if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
5457 /*AllowTruncation=*/true)) {
5458 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5459 AddToWorklist(LogBase2.getNode());
5460
5461 EVT ADDVT = N1.getOperand(1).getValueType();
5462 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5463 AddToWorklist(Trunc.getNode());
5464 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5465 AddToWorklist(Add.getNode());
5466 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5467 }
5468 }
5469 }
5470
5471 // fold (udiv x, c) -> alternate
5472 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5473 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5474 /*AllowTruncation=*/true) &&
5475 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5476 if (SDValue Op = BuildUDIV(N))
5477 return Op;
5478
5479 return SDValue();
5480}
5481
5482SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5483 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5484 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5485 // Target-specific implementation of srem x, pow2.
5486 if (SDValue Res = BuildSREMPow2(N))
5487 return Res;
5488 }
5489 return SDValue();
5490}
5491
5492// handles ISD::SREM and ISD::UREM
5493SDValue DAGCombiner::visitREM(SDNode *N) {
5494 unsigned Opcode = N->getOpcode();
5495 SDValue N0 = N->getOperand(0);
5496 SDValue N1 = N->getOperand(1);
5497 EVT VT = N->getValueType(0);
5498 EVT CCVT = getSetCCResultType(VT);
5499
5500 bool isSigned = (Opcode == ISD::SREM);
5501 SDLoc DL(N);
5502
5503 // fold (rem c1, c2) -> c1%c2
5504 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5505 return C;
5506
5507 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5508 // Freeze the numerator to avoid a miscompile with an undefined value.
5509 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5510 CCVT.isVector() == VT.isVector()) {
5511 SDValue F0 = DAG.getFreeze(N0);
5512 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5513 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5514 }
5515
5516 if (SDValue V = simplifyDivRem(N, DAG))
5517 return V;
5518
5519 if (SDValue NewSel = foldBinOpIntoSelect(N))
5520 return NewSel;
5521
5522 if (isSigned) {
5523 // If we know the sign bits of both operands are zero, strength reduce to a
5524 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5525 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5526 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5527 } else {
5528 if (DAG.isKnownToBeAPowerOfTwo(N1, /*OrZero=*/true)) {
5529 // fold (urem x, pow2) -> (and x, pow2-1)
5530 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5531 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5532 AddToWorklist(Add.getNode());
5533 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5534 }
5535 }
5536
5537 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5538
5539 // If X/C can be simplified by the division-by-constant logic, lower
5540 // X%C to the equivalent of X-X/C*C.
5541 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5542 // speculative DIV must not cause a DIVREM conversion. We guard against this
5543 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5544 // combine will not return a DIVREM. Regardless, checking cheapness here
5545 // makes sense since the simplification results in fatter code.
5546 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5547 if (isSigned) {
5548 // check if we can build faster implementation for srem
5549 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5550 return OptimizedRem;
5551 }
5552
5553 SDValue OptimizedDiv =
5554 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5555 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5556 // If the equivalent Div node also exists, update its users.
5557 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5558 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5559 { N0, N1 }))
5560 CombineTo(DivNode, OptimizedDiv);
5561 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5562 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5563 AddToWorklist(OptimizedDiv.getNode());
5564 AddToWorklist(Mul.getNode());
5565 return Sub;
5566 }
5567 }
5568
5569 // sdiv, srem -> sdivrem
5570 if (SDValue DivRem = useDivRem(N))
5571 return DivRem.getValue(1);
5572
5573 // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5574 // iff urem(BCst, Op1Cst) == 0
5575 SDValue A;
5576 APInt Op1Cst, BCst;
5577 if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
5578 m_ConstInt(Op1Cst))) &&
5579 BCst.urem(Op1Cst).isZero()) {
5580 return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5581 }
5582
5583 // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5584 // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5585 if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
5586 m_ConstInt(Op1Cst))) &&
5587 BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5588 return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5589 }
5590
5591 return SDValue();
5592}
5593
5594SDValue DAGCombiner::visitMULHS(SDNode *N) {
5595 SDValue N0 = N->getOperand(0);
5596 SDValue N1 = N->getOperand(1);
5597 EVT VT = N->getValueType(0);
5598 SDLoc DL(N);
5599
5600 // fold (mulhs c1, c2)
5601 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5602 return C;
5603
5604 // canonicalize constant to RHS.
5607 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5608
5609 if (VT.isVector()) {
5610 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5611 return FoldedVOp;
5612
5613 // fold (mulhs x, 0) -> 0
5614 // do not return N1, because undef node may exist.
5616 return DAG.getConstant(0, DL, VT);
5617 }
5618
5619 // fold (mulhs x, 0) -> 0
5620 if (isNullConstant(N1))
5621 return N1;
5622
5623 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5624 if (isOneConstant(N1))
5625 return DAG.getNode(
5626 ISD::SRA, DL, VT, N0,
5628
5629 // fold (mulhs x, undef) -> 0
5630 if (N0.isUndef() || N1.isUndef())
5631 return DAG.getConstant(0, DL, VT);
5632
5633 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5634 // plus a shift.
5635 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5636 !VT.isVector()) {
5637 MVT Simple = VT.getSimpleVT();
5638 unsigned SimpleSize = Simple.getSizeInBits();
5639 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5640 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5641 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5642 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5643 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5644 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5645 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5646 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5647 }
5648 }
5649
5650 return SDValue();
5651}
5652
5653SDValue DAGCombiner::visitMULHU(SDNode *N) {
5654 SDValue N0 = N->getOperand(0);
5655 SDValue N1 = N->getOperand(1);
5656 EVT VT = N->getValueType(0);
5657 SDLoc DL(N);
5658
5659 // fold (mulhu c1, c2)
5660 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5661 return C;
5662
5663 // canonicalize constant to RHS.
5666 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5667
5668 if (VT.isVector()) {
5669 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5670 return FoldedVOp;
5671
5672 // fold (mulhu x, 0) -> 0
5673 // do not return N1, because undef node may exist.
5675 return DAG.getConstant(0, DL, VT);
5676 }
5677
5678 // fold (mulhu x, 0) -> 0
5679 if (isNullConstant(N1))
5680 return N1;
5681
5682 // fold (mulhu x, 1) -> 0
5683 if (isOneConstant(N1))
5684 return DAG.getConstant(0, DL, VT);
5685
5686 // fold (mulhu x, undef) -> 0
5687 if (N0.isUndef() || N1.isUndef())
5688 return DAG.getConstant(0, DL, VT);
5689
5690 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5691 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5692 /*AllowTruncation=*/true) &&
5693 (!LegalOperations || hasOperation(ISD::SRL, VT))) {
5694 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5695 unsigned NumEltBits = VT.getScalarSizeInBits();
5696 SDValue SRLAmt = DAG.getNode(
5697 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5698 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5699 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5700 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5701 }
5702 }
5703
5704 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5705 // plus a shift.
5706 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5707 !VT.isVector()) {
5708 MVT Simple = VT.getSimpleVT();
5709 unsigned SimpleSize = Simple.getSizeInBits();
5710 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5711 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5712 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5713 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5714 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5715 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5716 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5717 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5718 }
5719 }
5720
5721 // Simplify the operands using demanded-bits information.
5722 // We don't have demanded bits support for MULHU so this just enables constant
5723 // folding based on known bits.
5725 return SDValue(N, 0);
5726
5727 return SDValue();
5728}
5729
5730SDValue DAGCombiner::visitAVG(SDNode *N) {
5731 unsigned Opcode = N->getOpcode();
5732 SDValue N0 = N->getOperand(0);
5733 SDValue N1 = N->getOperand(1);
5734 EVT VT = N->getValueType(0);
5735 SDLoc DL(N);
5736 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5737
5738 // fold (avg c1, c2)
5739 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5740 return C;
5741
5742 // canonicalize constant to RHS.
5745 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5746
5747 if (VT.isVector())
5748 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5749 return FoldedVOp;
5750
5751 // fold (avg x, undef) -> x
5752 if (N0.isUndef())
5753 return N1;
5754 if (N1.isUndef())
5755 return N0;
5756
5757 // fold (avg x, x) --> x
5758 if (N0 == N1 && Level >= AfterLegalizeTypes)
5759 return N0;
5760
5761 // fold (avgfloor x, 0) -> x >> 1
5762 SDValue X, Y;
5764 return DAG.getNode(ISD::SRA, DL, VT, X,
5765 DAG.getShiftAmountConstant(1, VT, DL));
5767 return DAG.getNode(ISD::SRL, DL, VT, X,
5768 DAG.getShiftAmountConstant(1, VT, DL));
5769
5770 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5771 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5772 if (!IsSigned &&
5773 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5774 X.getValueType() == Y.getValueType() &&
5775 hasOperation(Opcode, X.getValueType())) {
5776 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5777 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5778 }
5779 if (IsSigned &&
5780 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5781 X.getValueType() == Y.getValueType() &&
5782 hasOperation(Opcode, X.getValueType())) {
5783 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5784 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5785 }
5786
5787 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5788 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5789 // Check if avgflooru isn't legal/custom but avgceilu is.
5790 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5791 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5792 if (DAG.isKnownNeverZero(N1))
5793 return DAG.getNode(
5794 ISD::AVGCEILU, DL, VT, N0,
5795 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5796 if (DAG.isKnownNeverZero(N0))
5797 return DAG.getNode(
5798 ISD::AVGCEILU, DL, VT, N1,
5799 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5800 }
5801
5802 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5803 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5804 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5805 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5806 SDValue Add;
5807 if (sd_match(N,
5808 m_c_BinOp(Opcode,
5810 m_One())) ||
5811 sd_match(N, m_c_BinOp(Opcode,
5813 m_Value(Y)))) {
5814
5815 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5816 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5817
5818 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5819 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5820 }
5821 }
5822
5823 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5824 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5825 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5826 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5827 }
5828
5829 return SDValue();
5830}
5831
5832SDValue DAGCombiner::visitABD(SDNode *N) {
5833 unsigned Opcode = N->getOpcode();
5834 SDValue N0 = N->getOperand(0);
5835 SDValue N1 = N->getOperand(1);
5836 EVT VT = N->getValueType(0);
5837 SDLoc DL(N);
5838
5839 // fold (abd c1, c2)
5840 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5841 return C;
5842
5843 // canonicalize constant to RHS.
5846 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5847
5848 if (VT.isVector())
5849 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5850 return FoldedVOp;
5851
5852 // fold (abd x, undef) -> 0
5853 if (N0.isUndef() || N1.isUndef())
5854 return DAG.getConstant(0, DL, VT);
5855
5856 // fold (abd x, x) -> 0
5857 if (N0 == N1)
5858 return DAG.getConstant(0, DL, VT);
5859
5860 SDValue X, Y;
5861
5862 // fold (abds x, 0) -> abs x
5864 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5865 return DAG.getNode(ISD::ABS, DL, VT, X);
5866
5867 // fold (abdu x, 0) -> x
5869 return X;
5870
5871 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5872 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5873 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5874 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5875
5876 // fold (abd? (?ext x), (?ext y)) -> (zext (abd? x, y))
5879 EVT SmallVT = X.getScalarValueSizeInBits() > Y.getScalarValueSizeInBits()
5880 ? X.getValueType()
5881 : Y.getValueType();
5882 if (!LegalOperations || hasOperation(Opcode, SmallVT)) {
5883 SDValue ExtedX = DAG.getExtOrTrunc(X, SDLoc(X), SmallVT, N0->getOpcode());
5884 SDValue ExtedY = DAG.getExtOrTrunc(Y, SDLoc(Y), SmallVT, N0->getOpcode());
5885 SDValue SmallABD = DAG.getNode(Opcode, DL, SmallVT, {ExtedX, ExtedY});
5886 SDValue ZExted = DAG.getZExtOrTrunc(SmallABD, DL, VT);
5887 return ZExted;
5888 }
5889 }
5890
5891 // fold (abd? (?ext ty:x), small_const:c) -> (zext (abd? x, c))
5894 EVT SmallVT = X.getValueType();
5895 if (!LegalOperations || hasOperation(Opcode, SmallVT)) {
5896 uint64_t Bits = SmallVT.getScalarSizeInBits();
5897 unsigned RelevantBits =
5898 (Opcode == ISD::ABDS) ? DAG.ComputeMaxSignificantBits(Y)
5900 bool TruncatingYIsCheap = TLI.isTruncateFree(Y, SmallVT) ||
5902 Y,
5903 [&](auto *C) {
5904 const APInt &YConst = C->getAsAPIntVal();
5905 return (Opcode == ISD::ABDS)
5906 ? YConst.isSignedIntN(Bits)
5907 : YConst.isIntN(Bits);
5908 },
5909 /*AllowUndefs=*/true);
5910
5911 if (RelevantBits <= Bits && TruncatingYIsCheap) {
5912 SDValue NewY = DAG.getNode(ISD::TRUNCATE, SDLoc(Y), SmallVT, Y);
5913 SDValue SmallABD = DAG.getNode(Opcode, DL, SmallVT, {X, NewY});
5914 return DAG.getZExtOrTrunc(SmallABD, DL, VT);
5915 }
5916 }
5917 }
5918
5919 return SDValue();
5920}
5921
5922/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5923/// give the opcodes for the two computations that are being performed. Return
5924/// true if a simplification was made.
5925SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5926 unsigned HiOp) {
5927 // If the high half is not needed, just compute the low half.
5928 bool HiExists = N->hasAnyUseOfValue(1);
5929 if (!HiExists && (!LegalOperations ||
5930 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5931 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5932 return CombineTo(N, Res, Res);
5933 }
5934
5935 // If the low half is not needed, just compute the high half.
5936 bool LoExists = N->hasAnyUseOfValue(0);
5937 if (!LoExists && (!LegalOperations ||
5938 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5939 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5940 return CombineTo(N, Res, Res);
5941 }
5942
5943 // If both halves are used, return as it is.
5944 if (LoExists && HiExists)
5945 return SDValue();
5946
5947 // If the two computed results can be simplified separately, separate them.
5948 if (LoExists) {
5949 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5950 AddToWorklist(Lo.getNode());
5951 SDValue LoOpt = combine(Lo.getNode());
5952 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5953 (!LegalOperations ||
5954 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5955 return CombineTo(N, LoOpt, LoOpt);
5956 }
5957
5958 if (HiExists) {
5959 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5960 AddToWorklist(Hi.getNode());
5961 SDValue HiOpt = combine(Hi.getNode());
5962 if (HiOpt.getNode() && HiOpt != Hi &&
5963 (!LegalOperations ||
5964 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5965 return CombineTo(N, HiOpt, HiOpt);
5966 }
5967
5968 return SDValue();
5969}
5970
5971SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5972 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5973 return Res;
5974
5975 SDValue N0 = N->getOperand(0);
5976 SDValue N1 = N->getOperand(1);
5977 EVT VT = N->getValueType(0);
5978 SDLoc DL(N);
5979
5980 // Constant fold.
5982 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5983
5984 // canonicalize constant to RHS (vector doesn't have to splat)
5987 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5988
5989 // If the type is twice as wide is legal, transform the mulhu to a wider
5990 // multiply plus a shift.
5991 if (VT.isSimple() && !VT.isVector()) {
5992 MVT Simple = VT.getSimpleVT();
5993 unsigned SimpleSize = Simple.getSizeInBits();
5994 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5995 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5996 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5997 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5998 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5999 // Compute the high part as N1.
6000 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
6001 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
6002 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
6003 // Compute the low part as N0.
6004 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
6005 return CombineTo(N, Lo, Hi);
6006 }
6007 }
6008
6009 return SDValue();
6010}
6011
6012SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
6013 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
6014 return Res;
6015
6016 SDValue N0 = N->getOperand(0);
6017 SDValue N1 = N->getOperand(1);
6018 EVT VT = N->getValueType(0);
6019 SDLoc DL(N);
6020
6021 // Constant fold.
6023 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
6024
6025 // canonicalize constant to RHS (vector doesn't have to splat)
6028 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
6029
6030 // (umul_lohi N0, 0) -> (0, 0)
6031 if (isNullConstant(N1)) {
6032 SDValue Zero = DAG.getConstant(0, DL, VT);
6033 return CombineTo(N, Zero, Zero);
6034 }
6035
6036 // (umul_lohi N0, 1) -> (N0, 0)
6037 if (isOneConstant(N1)) {
6038 SDValue Zero = DAG.getConstant(0, DL, VT);
6039 return CombineTo(N, N0, Zero);
6040 }
6041
6042 // If the type is twice as wide is legal, transform the mulhu to a wider
6043 // multiply plus a shift.
6044 if (VT.isSimple() && !VT.isVector()) {
6045 MVT Simple = VT.getSimpleVT();
6046 unsigned SimpleSize = Simple.getSizeInBits();
6047 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
6048 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
6049 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
6050 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
6051 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
6052 // Compute the high part as N1.
6053 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
6054 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
6055 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
6056 // Compute the low part as N0.
6057 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
6058 return CombineTo(N, Lo, Hi);
6059 }
6060 }
6061
6062 return SDValue();
6063}
6064
6065SDValue DAGCombiner::visitMULO(SDNode *N) {
6066 SDValue N0 = N->getOperand(0);
6067 SDValue N1 = N->getOperand(1);
6068 EVT VT = N0.getValueType();
6069 bool IsSigned = (ISD::SMULO == N->getOpcode());
6070
6071 EVT CarryVT = N->getValueType(1);
6072 SDLoc DL(N);
6073
6074 ConstantSDNode *N0C = isConstOrConstSplat(N0);
6075 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6076
6077 // fold operation with constant operands.
6078 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
6079 // multiple results.
6080 if (N0C && N1C) {
6081 bool Overflow;
6082 APInt Result =
6083 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
6084 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
6085 return CombineTo(N, DAG.getConstant(Result, DL, VT),
6086 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
6087 }
6088
6089 // canonicalize constant to RHS.
6092 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
6093
6094 // fold (mulo x, 0) -> 0 + no carry out
6095 if (isNullOrNullSplat(N1))
6096 return CombineTo(N, DAG.getConstant(0, DL, VT),
6097 DAG.getConstant(0, DL, CarryVT));
6098
6099 // (mulo x, 2) -> (addo x, x)
6100 // FIXME: This needs a freeze.
6101 if (N1C && N1C->getAPIntValue() == 2 &&
6102 (!IsSigned || VT.getScalarSizeInBits() > 2))
6103 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
6104 N->getVTList(), N0, N0);
6105
6106 // A 1 bit SMULO overflows if both inputs are 1.
6107 if (IsSigned && VT.getScalarSizeInBits() == 1) {
6108 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
6109 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
6110 DAG.getConstant(0, DL, VT), ISD::SETNE);
6111 return CombineTo(N, And, Cmp);
6112 }
6113
6114 // If it cannot overflow, transform into a mul.
6115 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
6116 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
6117 DAG.getConstant(0, DL, CarryVT));
6118 return SDValue();
6119}
6120
6121// Function to calculate whether the Min/Max pair of SDNodes (potentially
6122// swapped around) make a signed saturate pattern, clamping to between a signed
6123// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
6124// Returns the node being clamped and the bitwidth of the clamp in BW. Should
6125// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
6126// same as SimplifySelectCC. N0<N1 ? N2 : N3.
6128 SDValue N3, ISD::CondCode CC, unsigned &BW,
6129 bool &Unsigned, SelectionDAG &DAG) {
6130 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
6131 ISD::CondCode CC) {
6132 // The compare and select operand should be the same or the select operands
6133 // should be truncated versions of the comparison.
6134 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
6135 return 0;
6136 // The constants need to be the same or a truncated version of each other.
6139 if (!N1C || !N3C)
6140 return 0;
6141 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
6142 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
6143 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
6144 return 0;
6145 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
6146 };
6147
6148 // Check the initial value is a SMIN/SMAX equivalent.
6149 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
6150 if (!Opcode0)
6151 return SDValue();
6152
6153 // We could only need one range check, if the fptosi could never produce
6154 // the upper value.
6155 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
6156 if (isNullOrNullSplat(N3)) {
6157 EVT IntVT = N0.getValueType().getScalarType();
6158 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
6159 if (FPVT.isSimple()) {
6160 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
6161 const fltSemantics &Semantics = InputTy->getFltSemantics();
6162 uint32_t MinBitWidth =
6163 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
6164 if (IntVT.getSizeInBits() >= MinBitWidth) {
6165 Unsigned = true;
6166 BW = PowerOf2Ceil(MinBitWidth);
6167 return N0;
6168 }
6169 }
6170 }
6171 }
6172
6173 SDValue N00, N01, N02, N03;
6174 ISD::CondCode N0CC;
6175 switch (N0.getOpcode()) {
6176 case ISD::SMIN:
6177 case ISD::SMAX:
6178 N00 = N02 = N0.getOperand(0);
6179 N01 = N03 = N0.getOperand(1);
6180 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
6181 break;
6182 case ISD::SELECT_CC:
6183 N00 = N0.getOperand(0);
6184 N01 = N0.getOperand(1);
6185 N02 = N0.getOperand(2);
6186 N03 = N0.getOperand(3);
6187 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
6188 break;
6189 case ISD::SELECT:
6190 case ISD::VSELECT:
6191 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
6192 return SDValue();
6193 N00 = N0.getOperand(0).getOperand(0);
6194 N01 = N0.getOperand(0).getOperand(1);
6195 N02 = N0.getOperand(1);
6196 N03 = N0.getOperand(2);
6197 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
6198 break;
6199 default:
6200 return SDValue();
6201 }
6202
6203 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
6204 if (!Opcode1 || Opcode0 == Opcode1)
6205 return SDValue();
6206
6207 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
6208 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
6209 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
6210 return SDValue();
6211
6212 const APInt &MinC = MinCOp->getAPIntValue();
6213 const APInt &MaxC = MaxCOp->getAPIntValue();
6214 APInt MinCPlus1 = MinC + 1;
6215 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6216 BW = MinCPlus1.exactLogBase2() + 1;
6217 Unsigned = false;
6218 return N02;
6219 }
6220
6221 if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
6222 BW = MinCPlus1.exactLogBase2();
6223 Unsigned = true;
6224 return N02;
6225 }
6226
6227 return SDValue();
6228}
6229
6231 SDValue N3, ISD::CondCode CC,
6232 SelectionDAG &DAG) {
6233 unsigned BW;
6234 bool Unsigned;
6235 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6236 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6237 return SDValue();
6238 EVT FPVT = Fp.getOperand(0).getValueType();
6239 EVT NewVT = FPVT.changeElementType(*DAG.getContext(),
6240 EVT::getIntegerVT(*DAG.getContext(), BW));
6241 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6242 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6243 return SDValue();
6244 SDLoc DL(Fp);
6245 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6246 DAG.getValueType(NewVT.getScalarType()));
6247 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6248}
6249
6251 SDValue N3, ISD::CondCode CC,
6252 SelectionDAG &DAG) {
6253 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6254 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6255 // be truncated versions of the setcc (N0/N1).
6256 if ((N0 != N2 &&
6257 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6258 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6259 return SDValue();
6262 if (!N1C || !N3C)
6263 return SDValue();
6264 const APInt &C1 = N1C->getAPIntValue();
6265 const APInt &C3 = N3C->getAPIntValue();
6266 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6267 C1 != C3.zext(C1.getBitWidth()))
6268 return SDValue();
6269
6270 unsigned BW = (C1 + 1).exactLogBase2();
6271 EVT FPVT = N0.getOperand(0).getValueType();
6272 EVT NewVT = FPVT.changeElementType(*DAG.getContext(),
6273 EVT::getIntegerVT(*DAG.getContext(), BW));
6275 FPVT, NewVT))
6276 return SDValue();
6277
6278 SDValue Sat =
6279 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6280 DAG.getValueType(NewVT.getScalarType()));
6281 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6282}
6283
6284SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6285 SDValue N0 = N->getOperand(0);
6286 SDValue N1 = N->getOperand(1);
6287 EVT VT = N0.getValueType();
6288 unsigned Opcode = N->getOpcode();
6289 SDLoc DL(N);
6290
6291 // fold operation with constant operands.
6292 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6293 return C;
6294
6295 // If the operands are the same, this is a no-op.
6296 if (N0 == N1)
6297 return N0;
6298
6299 // canonicalize constant to RHS
6302 return DAG.getNode(Opcode, DL, VT, N1, N0);
6303
6304 // fold vector ops
6305 if (VT.isVector())
6306 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6307 return FoldedVOp;
6308
6309 // reassociate minmax
6310 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6311 return RMINMAX;
6312
6313 // If both operands are known to have the same sign (both non-negative or both
6314 // negative), flip between UMIN/UMAX and SMIN/SMAX.
6315 // Only do this if:
6316 // 1. The current op isn't legal and the flipped is.
6317 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6318 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6319 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6320
6321 if (IsSatBroken || IsOpIllegal) {
6322 auto HasKnownSameSign = [&](SDValue A, SDValue B) {
6323 if (A.isUndef() || B.isUndef())
6324 return true;
6325
6326 KnownBits KA = DAG.computeKnownBits(A);
6327 if (!KA.isNonNegative() && !KA.isNegative())
6328 return false;
6329
6330 KnownBits KB = DAG.computeKnownBits(B);
6331 if (KA.isNonNegative())
6332 return KB.isNonNegative();
6333 return KB.isNegative();
6334 };
6335
6336 if (HasKnownSameSign(N0, N1)) {
6337 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
6338 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6339 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6340 }
6341 }
6342
6343 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6345 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6346 return S;
6347 if (Opcode == ISD::UMIN)
6348 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6349 return S;
6350
6351 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6352 auto ReductionOpcode = [](unsigned Opcode) {
6353 switch (Opcode) {
6354 case ISD::SMIN:
6355 return ISD::VECREDUCE_SMIN;
6356 case ISD::SMAX:
6357 return ISD::VECREDUCE_SMAX;
6358 case ISD::UMIN:
6359 return ISD::VECREDUCE_UMIN;
6360 case ISD::UMAX:
6361 return ISD::VECREDUCE_UMAX;
6362 default:
6363 llvm_unreachable("Unexpected opcode");
6364 }
6365 };
6366 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6367 SDLoc(N), VT, N0, N1))
6368 return SD;
6369
6370 // Fold operation with vscale operands.
6371 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6372 uint64_t C0 = N0->getConstantOperandVal(0);
6373 uint64_t C1 = N1->getConstantOperandVal(0);
6374 if (Opcode == ISD::UMAX)
6375 return C0 > C1 ? N0 : N1;
6376 else if (Opcode == ISD::UMIN)
6377 return C0 > C1 ? N1 : N0;
6378 }
6379
6380 // If we know the range of vscale, see if we can fold it given a constant.
6381 if (N0.getOpcode() == ISD::VSCALE) {
6382 if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
6383 bool ForSigned = (Opcode == ISD::SMAX || Opcode == ISD::SMIN);
6384 ConstantRange Range = DAG.computeConstantRange(N0, ForSigned);
6385
6386 const APInt &C1V = C1->getAPIntValue();
6387 if ((Opcode == ISD::UMAX && Range.getUnsignedMax().ule(C1V)) ||
6388 (Opcode == ISD::UMIN && Range.getUnsignedMin().uge(C1V)) ||
6389 (Opcode == ISD::SMAX && Range.getSignedMax().sle(C1V)) ||
6390 (Opcode == ISD::SMIN && Range.getSignedMin().sge(C1V))) {
6391 return N1;
6392 }
6393 }
6394 }
6395
6396 // Simplify the operands using demanded-bits information.
6398 return SDValue(N, 0);
6399
6400 return SDValue();
6401}
6402
6403/// If this is a bitwise logic instruction and both operands have the same
6404/// opcode, try to sink the other opcode after the logic instruction.
6405SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6406 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6407 EVT VT = N0.getValueType();
6408 unsigned LogicOpcode = N->getOpcode();
6409 unsigned HandOpcode = N0.getOpcode();
6410 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6411 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6412
6413 // Bail early if none of these transforms apply.
6414 if (N0.getNumOperands() == 0)
6415 return SDValue();
6416
6417 // FIXME: We should check number of uses of the operands to not increase
6418 // the instruction count for all transforms.
6419
6420 // Handle size-changing casts (or sign_extend_inreg).
6421 SDValue X = N0.getOperand(0);
6422 SDValue Y = N1.getOperand(0);
6423 EVT XVT = X.getValueType();
6424 SDLoc DL(N);
6425 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6426 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6427 N0.getOperand(1) == N1.getOperand(1))) {
6428 // If both operands have other uses, this transform would create extra
6429 // instructions without eliminating anything.
6430 if (!N0.hasOneUse() && !N1.hasOneUse())
6431 return SDValue();
6432 // We need matching integer source types.
6433 if (XVT != Y.getValueType())
6434 return SDValue();
6435 // Don't create an illegal op during or after legalization. Don't ever
6436 // create an unsupported vector op.
6437 if ((VT.isVector() || LegalOperations) &&
6438 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6439 return SDValue();
6440 // Avoid infinite looping with PromoteIntBinOp.
6441 // TODO: Should we apply desirable/legal constraints to all opcodes?
6442 if ((HandOpcode == ISD::ANY_EXTEND ||
6443 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6444 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6445 return SDValue();
6446 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6447 SDNodeFlags LogicFlags;
6448 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6449 ISD::isExtOpcode(HandOpcode));
6450 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6451 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6452 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6453 return DAG.getNode(HandOpcode, DL, VT, Logic);
6454 }
6455
6456 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6457 if (HandOpcode == ISD::TRUNCATE) {
6458 // If both operands have other uses, this transform would create extra
6459 // instructions without eliminating anything.
6460 if (!N0.hasOneUse() && !N1.hasOneUse())
6461 return SDValue();
6462 // We need matching source types.
6463 if (XVT != Y.getValueType())
6464 return SDValue();
6465 // Don't create an illegal op during or after legalization.
6466 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6467 return SDValue();
6468 // Be extra careful sinking truncate. If it's free, there's no benefit in
6469 // widening a binop. Also, don't create a logic op on an illegal type.
6470 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6471 return SDValue();
6472 if (!TLI.isTypeLegal(XVT))
6473 return SDValue();
6474 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6475 return DAG.getNode(HandOpcode, DL, VT, Logic);
6476 }
6477
6478 // For binops SHL/SRL/SRA/AND:
6479 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6480 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6481 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6482 N0.getOperand(1) == N1.getOperand(1)) {
6483 // If either operand has other uses, this transform is not an improvement.
6484 if (!N0.hasOneUse() || !N1.hasOneUse())
6485 return SDValue();
6486 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6487 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6488 }
6489
6490 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6491 if (HandOpcode == ISD::BSWAP) {
6492 // If either operand has other uses, this transform is not an improvement.
6493 if (!N0.hasOneUse() || !N1.hasOneUse())
6494 return SDValue();
6495 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6496 return DAG.getNode(HandOpcode, DL, VT, Logic);
6497 }
6498
6499 // For funnel shifts FSHL/FSHR:
6500 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6501 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6502 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6503 N0.getOperand(2) == N1.getOperand(2)) {
6504 if (!N0.hasOneUse() || !N1.hasOneUse())
6505 return SDValue();
6506 SDValue X1 = N0.getOperand(1);
6507 SDValue Y1 = N1.getOperand(1);
6508 SDValue S = N0.getOperand(2);
6509 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6510 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6511 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6512 }
6513
6514 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6515 // Only perform this optimization up until type legalization, before
6516 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6517 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6518 // we don't want to undo this promotion.
6519 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6520 // on scalars.
6521 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6522 Level <= AfterLegalizeTypes) {
6523 // Input types must be integer and the same.
6524 if (XVT.isInteger() && XVT == Y.getValueType() &&
6525 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6526 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6527 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6528 return DAG.getNode(HandOpcode, DL, VT, Logic);
6529 }
6530 }
6531
6532 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6533 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6534 // If both shuffles use the same mask, and both shuffle within a single
6535 // vector, then it is worthwhile to move the swizzle after the operation.
6536 // The type-legalizer generates this pattern when loading illegal
6537 // vector types from memory. In many cases this allows additional shuffle
6538 // optimizations.
6539 // There are other cases where moving the shuffle after the xor/and/or
6540 // is profitable even if shuffles don't perform a swizzle.
6541 // If both shuffles use the same mask, and both shuffles have the same first
6542 // or second operand, then it might still be profitable to move the shuffle
6543 // after the xor/and/or operation.
6544 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6545 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6546 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6547 assert(X.getValueType() == Y.getValueType() &&
6548 "Inputs to shuffles are not the same type");
6549
6550 // Check that both shuffles use the same mask. The masks are known to be of
6551 // the same length because the result vector type is the same.
6552 // Check also that shuffles have only one use to avoid introducing extra
6553 // instructions.
6554 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6555 !SVN0->getMask().equals(SVN1->getMask()))
6556 return SDValue();
6557
6558 // Don't try to fold this node if it requires introducing a
6559 // build vector of all zeros that might be illegal at this stage.
6560 SDValue ShOp = N0.getOperand(1);
6561 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6562 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6563
6564 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6565 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6566 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6567 N0.getOperand(0), N1.getOperand(0));
6568 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6569 }
6570
6571 // Don't try to fold this node if it requires introducing a
6572 // build vector of all zeros that might be illegal at this stage.
6573 ShOp = N0.getOperand(0);
6574 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6575 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6576
6577 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6578 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6579 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6580 N1.getOperand(1));
6581 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6582 }
6583 }
6584
6585 return SDValue();
6586}
6587
6588/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6589SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6590 const SDLoc &DL) {
6591 SDValue LL, LR, RL, RR, N0CC, N1CC;
6592 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6593 !isSetCCEquivalent(N1, RL, RR, N1CC))
6594 return SDValue();
6595
6596 assert(N0.getValueType() == N1.getValueType() &&
6597 "Unexpected operand types for bitwise logic op");
6598 assert(LL.getValueType() == LR.getValueType() &&
6599 RL.getValueType() == RR.getValueType() &&
6600 "Unexpected operand types for setcc");
6601
6602 // If we're here post-legalization or the logic op type is not i1, the logic
6603 // op type must match a setcc result type. Also, all folds require new
6604 // operations on the left and right operands, so those types must match.
6605 EVT VT = N0.getValueType();
6606 EVT OpVT = LL.getValueType();
6607 if (LegalOperations || VT.getScalarType() != MVT::i1)
6608 if (VT != getSetCCResultType(OpVT))
6609 return SDValue();
6610 if (OpVT != RL.getValueType())
6611 return SDValue();
6612
6613 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6614 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6615 bool IsInteger = OpVT.isInteger();
6616 if (LR == RR && CC0 == CC1 && IsInteger) {
6617 bool IsZero = isNullOrNullSplat(LR);
6618 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6619
6620 // All bits clear?
6621 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6622 // All sign bits clear?
6623 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6624 // Any bits set?
6625 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6626 // Any sign bits set?
6627 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6628
6629 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6630 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6631 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6632 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6633 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6634 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6635 AddToWorklist(Or.getNode());
6636 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6637 }
6638
6639 // All bits set?
6640 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6641 // All sign bits set?
6642 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6643 // Any bits clear?
6644 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6645 // Any sign bits clear?
6646 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6647
6648 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6649 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6650 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6651 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6652 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6653 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6654 AddToWorklist(And.getNode());
6655 return DAG.getSetCC(DL, VT, And, LR, CC1);
6656 }
6657 }
6658
6659 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6660 // (or (seteq X, 0), (seteq X, -1)) --> (setult (add X, 1), 2)
6661 if (LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && IsInteger &&
6662 ((IsAnd && CC0 == ISD::SETNE) || (!IsAnd && CC0 == ISD::SETEQ)) &&
6663 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6664 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6665 SDValue One = DAG.getConstant(1, DL, OpVT);
6666 SDValue Two = DAG.getConstant(2, DL, OpVT);
6667 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6668 AddToWorklist(Add.getNode());
6669 return DAG.getSetCC(DL, VT, Add, Two, IsAnd ? ISD::SETUGE : ISD::SETULT);
6670 }
6671
6672 // Try more general transforms if the predicates match and the only user of
6673 // the compares is the 'and' or 'or'.
6674 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6675 N0.hasOneUse() && N1.hasOneUse()) {
6676 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6677 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6678 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6679 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6680 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6681 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6682 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6683 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6684 }
6685
6686 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6687 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6688 // Match a shared variable operand and 2 non-opaque constant operands.
6689 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6690 // The difference of the constants must be a single bit.
6691 const APInt &CMax =
6692 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6693 const APInt &CMin =
6694 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6695 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6696 };
6697 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6698 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6699 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6700 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6701 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6702 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6703 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6704 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6705 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6706 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6707 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6708 }
6709 }
6710 }
6711
6712 // Canonicalize equivalent operands to LL == RL.
6713 if (LL == RR && LR == RL) {
6715 std::swap(RL, RR);
6716 }
6717
6718 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6719 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6720 if (LL == RL && LR == RR) {
6721 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6722 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6723 if (NewCC != ISD::SETCC_INVALID &&
6724 (!LegalOperations ||
6725 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6726 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6727 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6728 }
6729
6730 return SDValue();
6731}
6732
6733static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6734 SelectionDAG &DAG) {
6735 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6736}
6737
6738static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6739 SelectionDAG &DAG) {
6740 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6741}
6742
6743/// Returns an appropriate FP min/max opcode for clamping operations.
6744static unsigned getMinMaxOpcodeForClamp(bool IsMin, SDValue Operand1,
6745 SDValue Operand2, SelectionDAG &DAG,
6746 const TargetLowering &TLI) {
6747 EVT VT = Operand1.getValueType();
6748 unsigned IEEEOp = IsMin ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6749 if (TLI.isOperationLegalOrCustom(IEEEOp, VT) &&
6750 arebothOperandsNotNan(Operand1, Operand2, DAG))
6751 return IEEEOp;
6752 unsigned PreferredOp = IsMin ? ISD::FMINNUM : ISD::FMAXNUM;
6753 if (TLI.isOperationLegalOrCustom(PreferredOp, VT))
6754 return PreferredOp;
6755 return ISD::DELETED_NODE;
6756}
6757
6758// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6760 SDValue Operand1, SDValue Operand2, bool SetCCNoNaNs, ISD::CondCode CC,
6761 unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE,
6762 bool isFMAXNUMFMINNUM) {
6763 // The optimization cannot be applied for all the predicates because
6764 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6765 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6766 // applied at all if one of the operands is a signaling NaN.
6767
6768 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6769 // are non NaN values.
6770 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6771 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6772 return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
6773 isFMAXNUMFMINNUM_IEEE
6776 }
6777
6778 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6779 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6780 return (SetCCNoNaNs || arebothOperandsNotNan(Operand1, Operand2, DAG)) &&
6781 isFMAXNUMFMINNUM_IEEE
6784 }
6785
6786 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6787 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6788 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6789 // that there are not any sNaNs, then the optimization is not valid
6790 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6791 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6792 // we can prove that we do not have any sNaNs, then we can do the
6793 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6794 // cases.
6795 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6796 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6797 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6798 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6799 isFMAXNUMFMINNUM_IEEE
6802 }
6803
6804 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6805 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6806 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6807 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6808 isFMAXNUMFMINNUM_IEEE
6811 }
6812
6813 return ISD::DELETED_NODE;
6814}
6815
6818 assert(
6819 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6820 "Invalid Op to combine SETCC with");
6821
6822 // TODO: Search past casts/truncates.
6823 SDValue LHS = LogicOp->getOperand(0);
6824 SDValue RHS = LogicOp->getOperand(1);
6825 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6826 !LHS->hasOneUse() || !RHS->hasOneUse())
6827 return SDValue();
6828
6829 SDNodeFlags LHSSetCCFlags = LHS->getFlags();
6830 SDNodeFlags RHSSetCCFlags = RHS->getFlags();
6831 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6833 LogicOp, LHS.getNode(), RHS.getNode());
6834
6835 SDValue LHS0 = LHS->getOperand(0);
6836 SDValue RHS0 = RHS->getOperand(0);
6837 SDValue LHS1 = LHS->getOperand(1);
6838 SDValue RHS1 = RHS->getOperand(1);
6839 // TODO: We don't actually need a splat here, for vectors we just need the
6840 // invariants to hold for each element.
6841 auto *LHS1C = isConstOrConstSplat(LHS1);
6842 auto *RHS1C = isConstOrConstSplat(RHS1);
6843 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6844 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6845 EVT VT = LogicOp->getValueType(0);
6846 EVT OpVT = LHS0.getValueType();
6847 SDLoc DL(LogicOp);
6848
6849 // Check if the operands of an and/or operation are comparisons and if they
6850 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6851 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6852 // sequence will be replaced with min-cmp sequence:
6853 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6854 // and and-cmp-cmp will be replaced with max-cmp sequence:
6855 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6856 // The optimization does not work for `==` or `!=` .
6857 // The two comparisons should have either the same predicate or the
6858 // predicate of one of the comparisons is the opposite of the other one.
6859 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6861 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6863 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6864 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6865 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6866 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6867 (OpVT.isFloatingPoint() &&
6868 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6870 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6871 CCL != ISD::SETTRUE &&
6872 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6873
6874 SDValue CommonValue, Operand1, Operand2;
6876 if (CCL == CCR) {
6877 if (LHS0 == RHS0) {
6878 CommonValue = LHS0;
6879 Operand1 = LHS1;
6880 Operand2 = RHS1;
6882 } else if (LHS1 == RHS1) {
6883 CommonValue = LHS1;
6884 Operand1 = LHS0;
6885 Operand2 = RHS0;
6886 CC = CCL;
6887 }
6888 } else {
6889 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6890 if (LHS0 == RHS1) {
6891 CommonValue = LHS0;
6892 Operand1 = LHS1;
6893 Operand2 = RHS0;
6894 CC = CCR;
6895 } else if (RHS0 == LHS1) {
6896 CommonValue = LHS1;
6897 Operand1 = LHS0;
6898 Operand2 = RHS1;
6899 CC = CCL;
6900 }
6901 }
6902
6903 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6904 // handle it using OR/AND.
6905 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6906 CC = ISD::SETCC_INVALID;
6907 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6908 CC = ISD::SETCC_INVALID;
6909
6910 if (CC != ISD::SETCC_INVALID) {
6911 unsigned NewOpcode = ISD::DELETED_NODE;
6912 bool IsSigned = isSignedIntSetCC(CC);
6913 if (OpVT.isInteger()) {
6914 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6915 CC == ISD::SETLT || CC == ISD::SETULT);
6916 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6917 if (IsLess == IsOr)
6918 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6919 else
6920 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6921 } else if (OpVT.isFloatingPoint())
6923 Operand1, Operand2,
6924 LHSSetCCFlags.hasNoNaNs() && RHSSetCCFlags.hasNoNaNs(), CC,
6925 LogicOp->getOpcode(), DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6926
6927 if (NewOpcode != ISD::DELETED_NODE) {
6928 // Propagate fast-math flags from setcc.
6929 SDNodeFlags Flags = LHS->getFlags() & RHS->getFlags();
6930 SDValue MinMaxValue =
6931 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2, Flags);
6932 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC, /*Chain=*/{},
6933 /*IsSignaling=*/false, Flags);
6934 }
6935 }
6936 }
6937
6938 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6939 LHS0.getValueType() == RHS0.getValueType() &&
6940 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6941 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6942 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6943
6944 if (TargetPreference == AndOrSETCCFoldKind::None)
6945 return SDValue();
6946
6947 if (CCL == CCR &&
6948 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6949 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6950 const APInt &APLhs = LHS1C->getAPIntValue();
6951 const APInt &APRhs = RHS1C->getAPIntValue();
6952
6953 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6954 // case this is just a compare).
6955 if (APLhs == (-APRhs) &&
6956 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6957 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6958 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6959 // (icmp eq A, C) | (icmp eq A, -C)
6960 // -> (icmp eq Abs(A), C)
6961 // (icmp ne A, C) & (icmp ne A, -C)
6962 // -> (icmp ne Abs(A), C)
6963 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6964 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6965 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6966 } else if (TargetPreference &
6968
6969 // AndOrSETCCFoldKind::AddAnd:
6970 // A == C0 | A == C1
6971 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6972 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6973 // A != C0 & A != C1
6974 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6975 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6976
6977 // AndOrSETCCFoldKind::NotAnd:
6978 // A == C0 | A == C1
6979 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6980 // -> ~A & smin(C0, C1) == 0
6981 // A != C0 & A != C1
6982 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6983 // -> ~A & smin(C0, C1) != 0
6984
6985 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6986 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6987 APInt Dif = MaxC - MinC;
6988 if (!Dif.isZero() && Dif.isPowerOf2()) {
6989 if (MaxC.isAllOnes() &&
6990 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6991 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6992 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6993 DAG.getConstant(MinC, DL, OpVT));
6994 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6995 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6996 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6997
6998 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6999 DAG.getConstant(-MinC, DL, OpVT));
7000 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
7001 DAG.getConstant(~Dif, DL, OpVT));
7002 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
7003 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
7004 }
7005 }
7006 }
7007 }
7008
7009 return SDValue();
7010}
7011
7012// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
7013// We canonicalize to the `select` form in the middle end, but the `and` form
7014// gets better codegen and all tested targets (arm, x86, riscv)
7016 const SDLoc &DL, SelectionDAG &DAG) {
7017 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7018 if (!isNullConstant(F))
7019 return SDValue();
7020
7021 EVT CondVT = Cond.getValueType();
7022 if (TLI.getBooleanContents(CondVT) !=
7024 return SDValue();
7025
7026 if (T.getOpcode() != ISD::AND)
7027 return SDValue();
7028
7029 if (!isOneConstant(T.getOperand(1)))
7030 return SDValue();
7031
7032 EVT OpVT = T.getValueType();
7033
7034 SDValue CondMask =
7035 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
7036 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
7037}
7038
7039/// This contains all DAGCombine rules which reduce two values combined by
7040/// an And operation to a single value. This makes them reusable in the context
7041/// of visitSELECT(). Rules involving constants are not included as
7042/// visitSELECT() already handles those cases.
7043SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
7044 EVT VT = N1.getValueType();
7045 SDLoc DL(N);
7046
7047 // fold (and x, undef) -> 0
7048 if (N0.isUndef() || N1.isUndef())
7049 return DAG.getConstant(0, DL, VT);
7050
7051 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
7052 return V;
7053
7054 // Canonicalize:
7055 // and(x, add) -> and(add, x)
7056 if (N1.getOpcode() == ISD::ADD)
7057 std::swap(N0, N1);
7058
7059 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
7060 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
7061 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
7062 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7063 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
7064 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
7065 // immediate for an add, but it is legal if its top c2 bits are set,
7066 // transform the ADD so the immediate doesn't need to be materialized
7067 // in a register.
7068 APInt ADDC = ADDI->getAPIntValue();
7069 APInt SRLC = SRLI->getAPIntValue();
7070 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
7071 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
7073 SRLC.getZExtValue());
7074 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
7075 ADDC |= Mask;
7076 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
7077 SDLoc DL0(N0);
7078 SDValue NewAdd =
7079 DAG.getNode(ISD::ADD, DL0, VT,
7080 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
7081 CombineTo(N0.getNode(), NewAdd);
7082 // Return N so it doesn't get rechecked!
7083 return SDValue(N, 0);
7084 }
7085 }
7086 }
7087 }
7088 }
7089 }
7090
7091 return SDValue();
7092}
7093
7094bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
7095 EVT LoadResultTy, EVT &ExtVT) {
7096 if (!AndC->getAPIntValue().isMask())
7097 return false;
7098
7099 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
7100
7101 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7102 EVT LoadedVT = LoadN->getMemoryVT();
7103
7104 if (ExtVT == LoadedVT &&
7105 (!LegalOperations ||
7106 TLI.isLoadLegal(LoadResultTy, ExtVT, LoadN->getAlign(),
7107 LoadN->getAddressSpace(), ISD::ZEXTLOAD, false))) {
7108 // ZEXTLOAD will match without needing to change the size of the value being
7109 // loaded.
7110 return true;
7111 }
7112
7113 // Do not change the width of a volatile or atomic loads.
7114 if (!LoadN->isSimple())
7115 return false;
7116
7117 // Do not generate loads of non-round integer types since these can
7118 // be expensive (and would be wrong if the type is not byte sized).
7119 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
7120 return false;
7121
7122 if (LegalOperations &&
7123 !TLI.isLoadLegal(LoadResultTy, ExtVT, LoadN->getAlign(),
7124 LoadN->getAddressSpace(), ISD::ZEXTLOAD, false))
7125 return false;
7126
7127 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
7128 return false;
7129
7130 return true;
7131}
7132
7133bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
7134 ISD::LoadExtType ExtType, EVT &MemVT,
7135 unsigned ShAmt) {
7136 if (!LDST)
7137 return false;
7138
7139 // Only allow byte offsets.
7140 if (ShAmt % 8)
7141 return false;
7142 const unsigned ByteShAmt = ShAmt / 8;
7143
7144 // Do not generate loads of non-round integer types since these can
7145 // be expensive (and would be wrong if the type is not byte sized).
7146 if (!MemVT.isRound())
7147 return false;
7148
7149 // Don't change the width of a volatile or atomic loads.
7150 if (!LDST->isSimple())
7151 return false;
7152
7153 EVT LdStMemVT = LDST->getMemoryVT();
7154
7155 // Bail out when changing the scalable property, since we can't be sure that
7156 // we're actually narrowing here.
7157 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
7158 return false;
7159
7160 // Verify that we are actually reducing a load width here.
7161 if (LdStMemVT.bitsLT(MemVT))
7162 return false;
7163
7164 // Ensure that this isn't going to produce an unsupported memory access.
7165 if (ShAmt) {
7166 const Align LDSTAlign = LDST->getAlign();
7167 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
7168 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7169 LDST->getAddressSpace(), NarrowAlign,
7170 LDST->getMemOperand()->getFlags()))
7171 return false;
7172 }
7173
7174 // It's not possible to generate a constant of extended or untyped type.
7175 EVT PtrType = LDST->getBasePtr().getValueType();
7176 if (PtrType == MVT::Untyped || PtrType.isExtended())
7177 return false;
7178
7179 if (isa<LoadSDNode>(LDST)) {
7180 LoadSDNode *Load = cast<LoadSDNode>(LDST);
7181 // Don't transform one with multiple uses, this would require adding a new
7182 // load.
7183 if (!SDValue(Load, 0).hasOneUse())
7184 return false;
7185
7186 if (LegalOperations &&
7187 !TLI.isLoadLegal(Load->getValueType(0), MemVT, Load->getAlign(),
7188 Load->getAddressSpace(), ExtType, false))
7189 return false;
7190
7191 // For the transform to be legal, the load must produce only two values
7192 // (the value loaded and the chain). Don't transform a pre-increment
7193 // load, for example, which produces an extra value. Otherwise the
7194 // transformation is not equivalent, and the downstream logic to replace
7195 // uses gets things wrong.
7196 if (Load->getNumValues() > 2)
7197 return false;
7198
7199 // If the load that we're shrinking is an extload and we're not just
7200 // discarding the extension we can't simply shrink the load. Bail.
7201 // TODO: It would be possible to merge the extensions in some cases.
7202 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
7203 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7204 return false;
7205
7206 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
7207 return false;
7208 } else {
7209 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
7210 StoreSDNode *Store = cast<StoreSDNode>(LDST);
7211 // Can't write outside the original store
7212 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7213 return false;
7214
7215 if (LegalOperations &&
7216 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT,
7217 Store->getAlign(), Store->getAddressSpace()))
7218 return false;
7219 }
7220 return true;
7221}
7222
7223bool DAGCombiner::SearchForAndLoads(SDNode *N,
7224 SmallVectorImpl<LoadSDNode*> &Loads,
7225 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
7226 ConstantSDNode *Mask,
7227 SDNode *&NodeToMask) {
7228 // Recursively search for the operands, looking for loads which can be
7229 // narrowed.
7230 for (SDValue Op : N->op_values()) {
7231 if (Op.getValueType().isVector())
7232 return false;
7233
7234 // Some constants may need fixing up later if they are too large.
7235 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7236 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7237 "Expected bitwise logic operation");
7238 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
7239 NodesWithConsts.insert(N);
7240 continue;
7241 }
7242
7243 if (!Op.hasOneUse())
7244 return false;
7245
7246 switch(Op.getOpcode()) {
7247 case ISD::LOAD: {
7248 auto *Load = cast<LoadSDNode>(Op);
7249 EVT ExtVT;
7250 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
7251 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
7252
7253 // ZEXTLOAD is already small enough.
7254 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7255 ExtVT.bitsGE(Load->getMemoryVT()))
7256 continue;
7257
7258 // Use LE to convert equal sized loads to zext.
7259 if (ExtVT.bitsLE(Load->getMemoryVT()))
7260 Loads.push_back(Load);
7261
7262 continue;
7263 }
7264 return false;
7265 }
7266 case ISD::ZERO_EXTEND:
7267 case ISD::AssertZext: {
7268 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7269 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7270 EVT VT = Op.getOpcode() == ISD::AssertZext ?
7271 cast<VTSDNode>(Op.getOperand(1))->getVT() :
7272 Op.getOperand(0).getValueType();
7273
7274 // We can accept extending nodes if the mask is wider or an equal
7275 // width to the original type.
7276 if (ExtVT.bitsGE(VT))
7277 continue;
7278 break;
7279 }
7280 case ISD::OR:
7281 case ISD::XOR:
7282 case ISD::AND:
7283 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7284 NodeToMask))
7285 return false;
7286 continue;
7287 }
7288
7289 // Allow one node which will masked along with any loads found.
7290 if (NodeToMask)
7291 return false;
7292
7293 // Also ensure that the node to be masked only produces one data result.
7294 NodeToMask = Op.getNode();
7295 if (NodeToMask->getNumValues() > 1) {
7296 bool HasValue = false;
7297 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7298 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7299 if (VT != MVT::Glue && VT != MVT::Other) {
7300 if (HasValue) {
7301 NodeToMask = nullptr;
7302 return false;
7303 }
7304 HasValue = true;
7305 }
7306 }
7307 assert(HasValue && "Node to be masked has no data result?");
7308 }
7309 }
7310 return true;
7311}
7312
7313bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7314 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7315 if (!Mask)
7316 return false;
7317
7318 if (!Mask->getAPIntValue().isMask())
7319 return false;
7320
7321 // No need to do anything if the and directly uses a load.
7322 if (isa<LoadSDNode>(N->getOperand(0)))
7323 return false;
7324
7326 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7327 SDNode *FixupNode = nullptr;
7328 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7329 if (Loads.empty())
7330 return false;
7331
7332 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7333 SDValue MaskOp = N->getOperand(1);
7334
7335 // If it exists, fixup the single node we allow in the tree that needs
7336 // masking.
7337 if (FixupNode) {
7338 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7339 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7340 FixupNode->getValueType(0),
7341 SDValue(FixupNode, 0), MaskOp);
7342 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7343 if (And.getOpcode() == ISD ::AND)
7344 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7345 }
7346
7347 // Narrow any constants that need it.
7348 for (auto *LogicN : NodesWithConsts) {
7349 SDValue Op0 = LogicN->getOperand(0);
7350 SDValue Op1 = LogicN->getOperand(1);
7351
7352 // We only need to fix AND if both inputs are constants. And we only need
7353 // to fix one of the constants.
7354 if (LogicN->getOpcode() == ISD::AND &&
7356 continue;
7357
7358 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7359 Op0 =
7360 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7361
7362 if (isa<ConstantSDNode>(Op1))
7363 Op1 =
7364 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7365
7366 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7367 std::swap(Op0, Op1);
7368
7369 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7370 }
7371
7372 // Create narrow loads.
7373 for (auto *Load : Loads) {
7374 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7375 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7376 SDValue(Load, 0), MaskOp);
7377 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7378 if (And.getOpcode() == ISD ::AND)
7379 And = SDValue(
7380 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7381 SDValue NewLoad = reduceLoadWidth(And.getNode());
7382 assert(NewLoad &&
7383 "Shouldn't be masking the load if it can't be narrowed");
7384 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7385 }
7386 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7387 return true;
7388 }
7389 return false;
7390}
7391
7392// Unfold
7393// x & (-1 'logical shift' y)
7394// To
7395// (x 'opposite logical shift' y) 'logical shift' y
7396// if it is better for performance.
7397SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7398 assert(N->getOpcode() == ISD::AND);
7399
7400 SDValue N0 = N->getOperand(0);
7401 SDValue N1 = N->getOperand(1);
7402
7403 // Do we actually prefer shifts over mask?
7405 return SDValue();
7406
7407 // Try to match (-1 '[outer] logical shift' y)
7408 unsigned OuterShift;
7409 unsigned InnerShift; // The opposite direction to the OuterShift.
7410 SDValue Y; // Shift amount.
7411 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7412 if (!M.hasOneUse())
7413 return false;
7414 OuterShift = M->getOpcode();
7415 if (OuterShift == ISD::SHL)
7416 InnerShift = ISD::SRL;
7417 else if (OuterShift == ISD::SRL)
7418 InnerShift = ISD::SHL;
7419 else
7420 return false;
7421 if (!isAllOnesConstant(M->getOperand(0)))
7422 return false;
7423 Y = M->getOperand(1);
7424 return true;
7425 };
7426
7427 SDValue X;
7428 if (matchMask(N1))
7429 X = N0;
7430 else if (matchMask(N0))
7431 X = N1;
7432 else
7433 return SDValue();
7434
7435 SDLoc DL(N);
7436 EVT VT = N->getValueType(0);
7437
7438 // tmp = x 'opposite logical shift' y
7439 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7440 // ret = tmp 'logical shift' y
7441 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7442
7443 return T1;
7444}
7445
7446/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7447/// For a target with a bit test, this is expected to become test + set and save
7448/// at least 1 instruction.
7450 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7451
7452 // Look through an optional extension.
7453 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7454 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7455 And0 = And0.getOperand(0);
7456 if (!isOneConstant(And1) || !And0.hasOneUse())
7457 return SDValue();
7458
7459 SDValue Src = And0;
7460
7461 // Attempt to find a 'not' op.
7462 // TODO: Should we favor test+set even without the 'not' op?
7463 bool FoundNot = false;
7464 if (isBitwiseNot(Src)) {
7465 FoundNot = true;
7466 Src = Src.getOperand(0);
7467
7468 // Look though an optional truncation. The source operand may not be the
7469 // same type as the original 'and', but that is ok because we are masking
7470 // off everything but the low bit.
7471 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7472 Src = Src.getOperand(0);
7473 }
7474
7475 // Match a shift-right by constant.
7476 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7477 return SDValue();
7478
7479 // This is probably not worthwhile without a supported type.
7480 EVT SrcVT = Src.getValueType();
7481 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7482 if (!TLI.isTypeLegal(SrcVT))
7483 return SDValue();
7484
7485 // We might have looked through casts that make this transform invalid.
7486 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7487 SDValue ShiftAmt = Src.getOperand(1);
7488 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7489 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7490 return SDValue();
7491
7492 // Set source to shift source.
7493 Src = Src.getOperand(0);
7494
7495 // Try again to find a 'not' op.
7496 // TODO: Should we favor test+set even with two 'not' ops?
7497 if (!FoundNot) {
7498 if (!isBitwiseNot(Src))
7499 return SDValue();
7500 Src = Src.getOperand(0);
7501 }
7502
7503 if (!TLI.hasBitTest(Src, ShiftAmt))
7504 return SDValue();
7505
7506 // Turn this into a bit-test pattern using mask op + setcc:
7507 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7508 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7509 SDLoc DL(And);
7510 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7511 EVT CCVT =
7512 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7513 SDValue Mask = DAG.getConstant(
7514 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7515 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7516 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7517 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7518 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7519}
7520
7521/// For targets that support usubsat, match a bit-hack form of that operation
7522/// that ends in 'and' and convert it.
7524 EVT VT = N->getValueType(0);
7525 unsigned BitWidth = VT.getScalarSizeInBits();
7526 APInt SignMask = APInt::getSignMask(BitWidth);
7527
7528 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7529 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7530 // xor/add with SMIN (signmask) are logically equivalent.
7531 SDValue X;
7532 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7534 m_SpecificInt(BitWidth - 1))))) &&
7537 m_SpecificInt(BitWidth - 1))))))
7538 return SDValue();
7539
7540 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7541 DAG.getConstant(SignMask, DL, VT));
7542}
7543
7544/// Given a bitwise logic operation N with a matching bitwise logic operand,
7545/// fold a pattern where 2 of the source operands are identically shifted
7546/// values. For example:
7547/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7549 SelectionDAG &DAG) {
7550 unsigned LogicOpcode = N->getOpcode();
7551 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7552 "Expected bitwise logic operation");
7553
7554 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7555 return SDValue();
7556
7557 // Match another bitwise logic op and a shift.
7558 unsigned ShiftOpcode = ShiftOp.getOpcode();
7559 if (LogicOp.getOpcode() != LogicOpcode ||
7560 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7561 ShiftOpcode == ISD::SRA))
7562 return SDValue();
7563
7564 // Match another shift op inside the first logic operand. Handle both commuted
7565 // possibilities.
7566 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7567 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7568 SDValue X1 = ShiftOp.getOperand(0);
7569 SDValue Y = ShiftOp.getOperand(1);
7570 SDValue X0, Z;
7571 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7572 LogicOp.getOperand(0).getOperand(1) == Y) {
7573 X0 = LogicOp.getOperand(0).getOperand(0);
7574 Z = LogicOp.getOperand(1);
7575 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7576 LogicOp.getOperand(1).getOperand(1) == Y) {
7577 X0 = LogicOp.getOperand(1).getOperand(0);
7578 Z = LogicOp.getOperand(0);
7579 } else {
7580 return SDValue();
7581 }
7582
7583 EVT VT = N->getValueType(0);
7584 SDLoc DL(N);
7585 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7586 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7587 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7588}
7589
7590/// Given a tree of logic operations with shape like
7591/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7592/// try to match and fold shift operations with the same shift amount.
7593/// For example:
7594/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7595/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7597 SDValue RightHand, SelectionDAG &DAG) {
7598 unsigned LogicOpcode = N->getOpcode();
7599 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7600 "Expected bitwise logic operation");
7601 if (LeftHand.getOpcode() != LogicOpcode ||
7602 RightHand.getOpcode() != LogicOpcode)
7603 return SDValue();
7604 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7605 return SDValue();
7606
7607 // Try to match one of following patterns:
7608 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7609 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7610 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7611 // itself.
7612 SDValue CombinedShifts, W;
7613 SDValue R0 = RightHand.getOperand(0);
7614 SDValue R1 = RightHand.getOperand(1);
7615 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7616 W = R1;
7617 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7618 W = R0;
7619 else
7620 return SDValue();
7621
7622 EVT VT = N->getValueType(0);
7623 SDLoc DL(N);
7624 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7625}
7626
7627/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7628/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7629/// pattern. This is typically a better representation for targets without a
7630/// fused "and-not" operation.
7632 const TargetLowering &TLI, const SDLoc &DL) {
7633 // Note that masked-merge variants using XOR or ADD expressions are
7634 // normalized to OR by InstCombine so we only check for OR or AND.
7635 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7636 "Must be called with ISD::OR or ISD::AND node");
7637
7638 // If the target supports and-not, don't fold this.
7639 if (TLI.hasAndNot(SDValue(Node, 0)))
7640 return SDValue();
7641
7642 SDValue M, X, Y;
7643
7644 if (sd_match(Node,
7646 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7647 sd_match(Node,
7649 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7650 EVT VT = M.getValueType();
7651 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7652 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7653 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7654 }
7655 return SDValue();
7656}
7657
7658SDValue DAGCombiner::visitAND(SDNode *N) {
7659 SDValue N0 = N->getOperand(0);
7660 SDValue N1 = N->getOperand(1);
7661 EVT VT = N1.getValueType();
7662 SDLoc DL(N);
7663
7664 // x & x --> x
7665 if (N0 == N1)
7666 return N0;
7667
7668 // fold (and c1, c2) -> c1&c2
7669 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7670 return C;
7671
7672 // canonicalize constant to RHS
7675 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7676
7677 if (areBitwiseNotOfEachother(N0, N1))
7678 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7679
7680 // fold vector ops
7681 if (VT.isVector()) {
7682 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7683 return FoldedVOp;
7684
7685 // fold (and x, 0) -> 0, vector edition
7687 // do not return N1, because undef node may exist in N1
7689 N1.getValueType());
7690
7691 // fold (and x, -1) -> x, vector edition
7693 return N0;
7694
7695 // fold (and buildvector(x,0,-1,w), buildvector(0,y,z,w))
7696 // --> buildvector(0,0,z,w)
7697 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
7698 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
7699 if (BV0 && BV1 && !BV0->getSplatValue() && !BV1->getSplatValue() &&
7700 N0.hasOneUse() && N1.hasOneUse() &&
7701 BV0->getOperand(0).getValueType() ==
7702 BV1->getOperand(0).getValueType()) {
7703 SmallVector<SDValue> MergedOps;
7704 unsigned NumElts = VT.getVectorNumElements();
7705 EVT EltVT = BV0->getOperand(0).getValueType();
7706 for (unsigned I = 0; I != NumElts; ++I) {
7707 auto *C0 = dyn_cast<ConstantSDNode>(BV0->getOperand(I));
7708 auto *C1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
7709 if (C0 && C1)
7710 MergedOps.push_back(DAG.getConstant(
7711 C0->getAPIntValue() & C1->getAPIntValue(), DL, EltVT));
7712 else if (C0 && C0->isZero())
7713 MergedOps.push_back(BV0->getOperand(I));
7714 else if (C1 && C1->isZero())
7715 MergedOps.push_back(BV1->getOperand(I));
7716 else if (C0 && C0->isAllOnes())
7717 MergedOps.push_back(BV1->getOperand(I));
7718 else if (C1 && C1->isAllOnes())
7719 MergedOps.push_back(BV0->getOperand(I));
7720 else if (BV0->getOperand(I) == BV1->getOperand(I))
7721 MergedOps.push_back(BV0->getOperand(I));
7722 else
7723 break;
7724 }
7725 if (MergedOps.size() == NumElts)
7726 return DAG.getBuildVector(VT, DL, MergedOps);
7727 }
7728
7729 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7730 bool Frozen = N0.getOpcode() == ISD::FREEZE;
7731 auto *MLoad = dyn_cast<MaskedLoadSDNode>(Frozen ? N0.getOperand(0) : N0);
7732 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7733 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7734 EVT MemVT = MLoad->getMemoryVT();
7735 if (TLI.isLoadLegal(VT, MemVT, MLoad->getAlign(),
7736 MLoad->getAddressSpace(), ISD::ZEXTLOAD, false)) {
7737 // For this AND to be a zero extension of the masked load the elements
7738 // of the BuildVec must mask the bottom bits of the extended element
7739 // type
7740 if (Splat->getAPIntValue().isMask(MemVT.getScalarSizeInBits())) {
7741 SDValue NewLoad = DAG.getMaskedLoad(
7742 VT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7743 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), MemVT,
7744 MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD,
7745 MLoad->isExpandingLoad());
7746 CombineTo(N, Frozen ? N0 : NewLoad);
7747 CombineTo(MLoad, NewLoad, NewLoad.getValue(1));
7748 return SDValue(N, 0);
7749 }
7750 }
7751 }
7752 }
7753
7754 // fold (and x, -1) -> x
7755 if (isAllOnesConstant(N1))
7756 return N0;
7757
7758 // if (and x, c) is known to be zero, return 0
7759 unsigned BitWidth = VT.getScalarSizeInBits();
7760 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7762 return DAG.getConstant(0, DL, VT);
7763
7764 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7765 return R;
7766
7767 if (SDValue NewSel = foldBinOpIntoSelect(N))
7768 return NewSel;
7769
7770 // reassociate and
7771 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7772 return RAND;
7773
7774 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7775 if (SDValue SD =
7776 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7777 return SD;
7778
7779 // fold (and (or x, C), D) -> D if (C & D) == D
7780 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7781 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7782 };
7783 if (N0.getOpcode() == ISD::OR &&
7784 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7785 return N1;
7786
7787 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7788 SDValue N0Op0 = N0.getOperand(0);
7789 EVT SrcVT = N0Op0.getValueType();
7790 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7791 APInt Mask = ~N1C->getAPIntValue();
7792 Mask = Mask.trunc(SrcBitWidth);
7793
7794 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7795 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7796 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7797
7798 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7799 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7800 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7801 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7802 TLI.isNarrowingProfitable(N, VT, SrcVT))
7803 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7804 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7805 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7806 }
7807
7808 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7809 if (ISD::isExtOpcode(N0.getOpcode())) {
7810 unsigned ExtOpc = N0.getOpcode();
7811 SDValue N0Op0 = N0.getOperand(0);
7812 if (N0Op0.getOpcode() == ISD::AND &&
7813 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7814 N0->hasOneUse() && N0Op0->hasOneUse()) {
7815 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7816 {N0Op0.getOperand(1)})) {
7817 if (SDValue NewMask =
7818 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7819 return DAG.getNode(ISD::AND, DL, VT,
7820 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7821 NewMask);
7822 }
7823 }
7824 }
7825 }
7826
7827 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7828 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7829 // already be zero by virtue of the width of the base type of the load.
7830 //
7831 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7832 // more cases.
7833 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7835 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7836 N0.getOperand(0).getResNo() == 0) ||
7837 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7838 auto *Load =
7839 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7840
7841 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7842 // This can be a pure constant or a vector splat, in which case we treat the
7843 // vector as a scalar and use the splat value.
7844 APInt Constant = APInt::getZero(1);
7845 if (const ConstantSDNode *C = isConstOrConstSplat(
7846 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7847 Constant = C->getAPIntValue();
7848 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7849 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7850 APInt SplatValue, SplatUndef;
7851 unsigned SplatBitSize;
7852 bool HasAnyUndefs;
7853 // Endianness should not matter here. Code below makes sure that we only
7854 // use the result if the SplatBitSize is a multiple of the vector element
7855 // size. And after that we AND all element sized parts of the splat
7856 // together. So the end result should be the same regardless of in which
7857 // order we do those operations.
7858 const bool IsBigEndian = false;
7859 bool IsSplat =
7860 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7861 HasAnyUndefs, EltBitWidth, IsBigEndian);
7862
7863 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7864 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7865 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7866 // Undef bits can contribute to a possible optimisation if set, so
7867 // set them.
7868 SplatValue |= SplatUndef;
7869
7870 // The splat value may be something like "0x00FFFFFF", which means 0 for
7871 // the first vector value and FF for the rest, repeating. We need a mask
7872 // that will apply equally to all members of the vector, so AND all the
7873 // lanes of the constant together.
7874 Constant = APInt::getAllOnes(EltBitWidth);
7875 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7876 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7877 }
7878 }
7879
7880 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7881 // actually legal and isn't going to get expanded, else this is a false
7882 // optimisation.
7883 bool CanZextLoadProfitably = TLI.isLoadLegal(
7884 Load->getValueType(0), Load->getMemoryVT(), Load->getAlign(),
7885 Load->getAddressSpace(), ISD::ZEXTLOAD, false);
7886
7887 // Resize the constant to the same size as the original memory access before
7888 // extension. If it is still the AllOnesValue then this AND is completely
7889 // unneeded.
7890 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7891
7892 bool B;
7893 switch (Load->getExtensionType()) {
7894 default: B = false; break;
7895 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7896 case ISD::ZEXTLOAD:
7897 case ISD::NON_EXTLOAD: B = true; break;
7898 }
7899
7900 if (B && Constant.isAllOnes()) {
7901 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7902 // preserve semantics once we get rid of the AND.
7903 SDValue NewLoad(Load, 0);
7904
7905 // Fold the AND away. NewLoad may get replaced immediately.
7906 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7907
7908 if (Load->getExtensionType() == ISD::EXTLOAD) {
7909 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7910 Load->getValueType(0), SDLoc(Load),
7911 Load->getChain(), Load->getBasePtr(),
7912 Load->getOffset(), Load->getMemoryVT(),
7913 Load->getMemOperand());
7914 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7915 if (Load->getNumValues() == 3) {
7916 // PRE/POST_INC loads have 3 values.
7917 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7918 NewLoad.getValue(2) };
7919 CombineTo(Load, To, 3, true);
7920 } else {
7921 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7922 }
7923 }
7924
7925 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7926 }
7927 }
7928
7929 // Try to convert a constant mask AND into a shuffle clear mask.
7930 if (VT.isVector())
7931 if (SDValue Shuffle = XformToShuffleWithZero(N))
7932 return Shuffle;
7933
7934 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7935 return Combined;
7936
7937 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7939 SDValue Ext = N0.getOperand(0);
7940 EVT ExtVT = Ext->getValueType(0);
7941 SDValue Extendee = Ext->getOperand(0);
7942
7943 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7944 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7945 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7946 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7947 // => (extract_subvector (iN_zeroext v))
7948 SDValue ZeroExtExtendee =
7949 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7950
7951 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7952 N0.getOperand(1));
7953 }
7954 }
7955
7956 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7957 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7958 EVT MemVT = GN0->getMemoryVT();
7959 EVT ScalarVT = MemVT.getScalarType();
7960
7961 if (SDValue(GN0, 0).hasOneUse() &&
7962 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7964 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7965 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7966
7967 SDValue ZExtLoad = DAG.getMaskedGather(
7968 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7969 GN0->getIndexType(), ISD::ZEXTLOAD);
7970
7971 CombineTo(N, ZExtLoad);
7972 AddToWorklist(ZExtLoad.getNode());
7973 // Avoid recheck of N.
7974 return SDValue(N, 0);
7975 }
7976 }
7977
7978 // fold (and (load x), 255) -> (zextload x, i8)
7979 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7980 // fold (and (freeze (load x)), 255) -> (freeze (zextload x, i8))
7981 // fold (and (freeze (extload x, i16)), 255) -> (freeze (zextload x, i8))
7982 if (N1C && !VT.isVector()) {
7983 SDValue Inner = peekThroughFreeze(N0);
7984 if (Inner.getOpcode() == ISD::LOAD)
7985 if (SDValue Res = reduceLoadWidth(N))
7986 return Res;
7987 }
7988
7989 if (LegalTypes) {
7990 // Attempt to propagate the AND back up to the leaves which, if they're
7991 // loads, can be combined to narrow loads and the AND node can be removed.
7992 // Perform after legalization so that extend nodes will already be
7993 // combined into the loads.
7994 if (BackwardsPropagateMask(N))
7995 return SDValue(N, 0);
7996 }
7997
7998 if (SDValue Combined = visitANDLike(N0, N1, N))
7999 return Combined;
8000
8001 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
8002 if (N0.getOpcode() == N1.getOpcode())
8003 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8004 return V;
8005
8006 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8007 return R;
8008 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
8009 return R;
8010
8011 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
8012 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
8013 SDValue X, Y, Z, NotY;
8014 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
8015 if (sd_match(N,
8016 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
8017 sd_match(NotY, m_Not(m_Value(Y))) &&
8018 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
8019 return DAG.getNode(ISD::AND, DL, VT, X,
8020 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
8021
8022 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
8023 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
8024 if (sd_match(N, m_And(m_Value(X),
8025 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
8026 sd_match(NotY, m_Not(m_Value(Y))) &&
8027 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
8028 return DAG.getNode(ISD::AND, DL, VT, X,
8029 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
8030
8031 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
8032 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
8033 if (TLI.hasAndNot(SDValue(N, 0)))
8034 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
8035 return Folded;
8036
8037 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
8038 // If we are shifting down an extended sign bit, see if we can simplify
8039 // this to shifting the MSB directly to expose further simplifications.
8040 // This pattern often appears after sext_inreg legalization.
8041 APInt Amt;
8042 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
8043 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
8044 return DAG.getNode(ISD::SRL, DL, VT, X,
8045 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
8046
8047 // Masking the negated extension of a boolean is just the zero-extended
8048 // boolean:
8049 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
8050 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
8051 //
8052 // Note: the SimplifyDemandedBits fold below can make an information-losing
8053 // transform, and then we have no way to find this better fold.
8054 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
8055 if (X.getOpcode() == ISD::ZERO_EXTEND &&
8056 X.getOperand(0).getScalarValueSizeInBits() == 1)
8057 return X;
8058 if (X.getOpcode() == ISD::SIGN_EXTEND &&
8059 X.getOperand(0).getScalarValueSizeInBits() == 1)
8060 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
8061 }
8062
8063 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
8064 // fold (and (sra)) -> (and (srl)) when possible.
8066 return SDValue(N, 0);
8067
8068 // fold (zext_inreg (extload x)) -> (zextload x)
8069 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
8070 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
8071 (ISD::isEXTLoad(N0.getNode()) ||
8072 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
8073 auto *LN0 = cast<LoadSDNode>(N0);
8074 EVT MemVT = LN0->getMemoryVT();
8075 // If we zero all the possible extended bits, then we can turn this into
8076 // a zextload if we are running before legalize or the operation is legal.
8077 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
8078 unsigned MemBitSize = MemVT.getScalarSizeInBits();
8079 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
8080 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
8081 ((!LegalOperations && LN0->isSimple()) ||
8082 TLI.isLoadLegal(VT, MemVT, LN0->getAlign(), LN0->getAddressSpace(),
8083 ISD::ZEXTLOAD, false))) {
8084 SDValue ExtLoad =
8085 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
8086 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8087 AddToWorklist(N);
8088 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
8089 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8090 }
8091 }
8092
8093 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
8094 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
8095 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
8096 N0.getOperand(1), false))
8097 return BSwap;
8098 }
8099
8100 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
8101 return Shifts;
8102
8103 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
8104 return V;
8105
8106 // Recognize the following pattern:
8107 //
8108 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
8109 //
8110 // where bitmask is a mask that clears the upper bits of AndVT. The
8111 // number of bits in bitmask must be a power of two.
8112 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
8113 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
8114 return false;
8115
8116 auto *C = isConstOrConstSplat(RHS, false, true);
8117 if (!C)
8118 return false;
8119
8120 if (!C->getAPIntValue().isMask(
8121 LHS.getOperand(0).getValueType().getScalarSizeInBits()))
8122 return false;
8123
8124 return true;
8125 };
8126
8127 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
8128 if (IsAndZeroExtMask(N0, N1) &&
8129 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)))
8130 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
8131
8132 if (hasOperation(ISD::USUBSAT, VT))
8133 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
8134 return V;
8135
8136 // Postpone until legalization completed to avoid interference with bswap
8137 // folding
8138 if (LegalOperations || VT.isVector())
8139 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8140 return R;
8141
8142 if (VT.isScalarInteger() && VT != MVT::i1)
8143 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8144 return R;
8145
8146 return SDValue();
8147}
8148
8149/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
8150SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
8151 bool DemandHighBits) {
8152 if (!LegalOperations)
8153 return SDValue();
8154
8155 EVT VT = N->getValueType(0);
8156 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
8157 return SDValue();
8159 return SDValue();
8160
8161 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
8162 bool LookPassAnd0 = false;
8163 bool LookPassAnd1 = false;
8164 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
8165 std::swap(N0, N1);
8166 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
8167 std::swap(N0, N1);
8168 if (N0.getOpcode() == ISD::AND) {
8169 if (!N0->hasOneUse())
8170 return SDValue();
8171 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8172 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
8173 // This is needed for X86.
8174 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
8175 N01C->getZExtValue() != 0xFFFF))
8176 return SDValue();
8177 N0 = N0.getOperand(0);
8178 LookPassAnd0 = true;
8179 }
8180
8181 if (N1.getOpcode() == ISD::AND) {
8182 if (!N1->hasOneUse())
8183 return SDValue();
8184 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
8185 if (!N11C || N11C->getZExtValue() != 0xFF)
8186 return SDValue();
8187 N1 = N1.getOperand(0);
8188 LookPassAnd1 = true;
8189 }
8190
8191 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
8192 std::swap(N0, N1);
8193 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
8194 return SDValue();
8195 if (!N0->hasOneUse() || !N1->hasOneUse())
8196 return SDValue();
8197
8198 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8199 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
8200 if (!N01C || !N11C)
8201 return SDValue();
8202 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
8203 return SDValue();
8204
8205 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
8206 SDValue N00 = N0->getOperand(0);
8207 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
8208 if (!N00->hasOneUse())
8209 return SDValue();
8210 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
8211 if (!N001C || N001C->getZExtValue() != 0xFF)
8212 return SDValue();
8213 N00 = N00.getOperand(0);
8214 LookPassAnd0 = true;
8215 }
8216
8217 SDValue N10 = N1->getOperand(0);
8218 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
8219 if (!N10->hasOneUse())
8220 return SDValue();
8221 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
8222 // Also allow 0xFFFF since the bits will be shifted out. This is needed
8223 // for X86.
8224 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
8225 N101C->getZExtValue() != 0xFFFF))
8226 return SDValue();
8227 N10 = N10.getOperand(0);
8228 LookPassAnd1 = true;
8229 }
8230
8231 if (N00 != N10)
8232 return SDValue();
8233
8234 // Make sure everything beyond the low halfword gets set to zero since the SRL
8235 // 16 will clear the top bits.
8236 unsigned OpSizeInBits = VT.getSizeInBits();
8237 if (OpSizeInBits > 16) {
8238 // If the left-shift isn't masked out then the only way this is a bswap is
8239 // if all bits beyond the low 8 are 0. In that case the entire pattern
8240 // reduces to a left shift anyway: leave it for other parts of the combiner.
8241 if (DemandHighBits && !LookPassAnd0)
8242 return SDValue();
8243
8244 // However, if the right shift isn't masked out then it might be because
8245 // it's not needed. See if we can spot that too. If the high bits aren't
8246 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
8247 // upper bits to be zero.
8248 if (!LookPassAnd1) {
8249 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
8250 if (!DAG.MaskedValueIsZero(N10,
8251 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
8252 return SDValue();
8253 }
8254 }
8255
8256 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
8257 if (OpSizeInBits > 16) {
8258 SDLoc DL(N);
8259 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8260 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
8261 }
8262 return Res;
8263}
8264
8265/// Return true if the specified node is an element that makes up a 32-bit
8266/// packed halfword byteswap.
8267/// ((x & 0x000000ff) << 8) |
8268/// ((x & 0x0000ff00) >> 8) |
8269/// ((x & 0x00ff0000) << 8) |
8270/// ((x & 0xff000000) >> 8)
8272 if (!N->hasOneUse())
8273 return false;
8274
8275 unsigned Opc = N.getOpcode();
8276 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8277 return false;
8278
8279 SDValue N0 = N.getOperand(0);
8280 unsigned Opc0 = N0.getOpcode();
8281 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8282 return false;
8283
8284 ConstantSDNode *N1C = nullptr;
8285 // SHL or SRL: look upstream for AND mask operand
8286 if (Opc == ISD::AND)
8287 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8288 else if (Opc0 == ISD::AND)
8290 if (!N1C)
8291 return false;
8292
8293 unsigned MaskByteOffset;
8294 switch (N1C->getZExtValue()) {
8295 default:
8296 return false;
8297 case 0xFF: MaskByteOffset = 0; break;
8298 case 0xFF00: MaskByteOffset = 1; break;
8299 case 0xFFFF:
8300 // In case demanded bits didn't clear the bits that will be shifted out.
8301 // This is needed for X86.
8302 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8303 MaskByteOffset = 1;
8304 break;
8305 }
8306 return false;
8307 case 0xFF0000: MaskByteOffset = 2; break;
8308 case 0xFF000000: MaskByteOffset = 3; break;
8309 }
8310
8311 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8312 if (Opc == ISD::AND) {
8313 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8314 // (x >> 8) & 0xff
8315 // (x >> 8) & 0xff0000
8316 if (Opc0 != ISD::SRL)
8317 return false;
8319 if (!C || C->getZExtValue() != 8)
8320 return false;
8321 } else {
8322 // (x << 8) & 0xff00
8323 // (x << 8) & 0xff000000
8324 if (Opc0 != ISD::SHL)
8325 return false;
8327 if (!C || C->getZExtValue() != 8)
8328 return false;
8329 }
8330 } else if (Opc == ISD::SHL) {
8331 // (x & 0xff) << 8
8332 // (x & 0xff0000) << 8
8333 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8334 return false;
8335 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8336 if (!C || C->getZExtValue() != 8)
8337 return false;
8338 } else { // Opc == ISD::SRL
8339 // (x & 0xff00) >> 8
8340 // (x & 0xff000000) >> 8
8341 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8342 return false;
8343 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8344 if (!C || C->getZExtValue() != 8)
8345 return false;
8346 }
8347
8348 if (Parts[MaskByteOffset])
8349 return false;
8350
8351 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8352 return true;
8353}
8354
8355// Match 2 elements of a packed halfword bswap.
8357 if (N.getOpcode() == ISD::OR)
8358 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8359 isBSwapHWordElement(N.getOperand(1), Parts);
8360
8361 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8362 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8363 if (!C || C->getAPIntValue() != 16)
8364 return false;
8365 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8366 return true;
8367 }
8368
8369 return false;
8370}
8371
8372// Match this pattern:
8373// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8374// And rewrite this to:
8375// (rotr (bswap A), 16)
8377 SelectionDAG &DAG, SDNode *N, SDValue N0,
8378 SDValue N1, EVT VT) {
8379 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8380 "MatchBSwapHWordOrAndAnd: expecting i32");
8381 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8382 return SDValue();
8383 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8384 return SDValue();
8385 // TODO: this is too restrictive; lifting this restriction requires more tests
8386 if (!N0->hasOneUse() || !N1->hasOneUse())
8387 return SDValue();
8390 if (!Mask0 || !Mask1)
8391 return SDValue();
8392 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8393 Mask1->getAPIntValue() != 0x00ff00ff)
8394 return SDValue();
8395 SDValue Shift0 = N0.getOperand(0);
8396 SDValue Shift1 = N1.getOperand(0);
8397 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8398 return SDValue();
8399 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8400 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8401 if (!ShiftAmt0 || !ShiftAmt1)
8402 return SDValue();
8403 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8404 return SDValue();
8405 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8406 return SDValue();
8407
8408 SDLoc DL(N);
8409 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8410 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8411 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8412}
8413
8414/// Match a 32-bit packed halfword bswap. That is
8415/// ((x & 0x000000ff) << 8) |
8416/// ((x & 0x0000ff00) >> 8) |
8417/// ((x & 0x00ff0000) << 8) |
8418/// ((x & 0xff000000) >> 8)
8419/// => (rotl (bswap x), 16)
8420SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8421 if (!LegalOperations)
8422 return SDValue();
8423
8424 EVT VT = N->getValueType(0);
8425 if (VT != MVT::i32)
8426 return SDValue();
8428 return SDValue();
8429
8430 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8431 return BSwap;
8432
8433 // Try again with commuted operands.
8434 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8435 return BSwap;
8436
8437
8438 // Look for either
8439 // (or (bswaphpair), (bswaphpair))
8440 // (or (or (bswaphpair), (and)), (and))
8441 // (or (or (and), (bswaphpair)), (and))
8442 SDNode *Parts[4] = {};
8443
8444 if (isBSwapHWordPair(N0, Parts)) {
8445 // (or (or (and), (and)), (or (and), (and)))
8446 if (!isBSwapHWordPair(N1, Parts))
8447 return SDValue();
8448 } else if (N0.getOpcode() == ISD::OR) {
8449 // (or (or (or (and), (and)), (and)), (and))
8450 if (!isBSwapHWordElement(N1, Parts))
8451 return SDValue();
8452 SDValue N00 = N0.getOperand(0);
8453 SDValue N01 = N0.getOperand(1);
8454 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8455 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8456 return SDValue();
8457 } else {
8458 return SDValue();
8459 }
8460
8461 // Make sure the parts are all coming from the same node.
8462 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8463 return SDValue();
8464
8465 SDLoc DL(N);
8466 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8467 SDValue(Parts[0], 0));
8468
8469 // Result of the bswap should be rotated by 16. If it's not legal, then
8470 // do (x << 16) | (x >> 16).
8471 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8473 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8475 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8476 return DAG.getNode(ISD::OR, DL, VT,
8477 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8478 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8479}
8480
8481/// This contains all DAGCombine rules which reduce two values combined by
8482/// an Or operation to a single value \see visitANDLike().
8483SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8484 EVT VT = N1.getValueType();
8485
8486 // fold (or x, undef) -> -1
8487 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8488 return DAG.getAllOnesConstant(DL, VT);
8489
8490 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8491 return V;
8492
8493 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8494 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8495 // Don't increase # computations.
8496 (N0->hasOneUse() || N1->hasOneUse())) {
8497 // We can only do this xform if we know that bits from X that are set in C2
8498 // but not in C1 are already zero. Likewise for Y.
8499 if (const ConstantSDNode *N0O1C =
8501 if (const ConstantSDNode *N1O1C =
8503 // We can only do this xform if we know that bits from X that are set in
8504 // C2 but not in C1 are already zero. Likewise for Y.
8505 const APInt &LHSMask = N0O1C->getAPIntValue();
8506 const APInt &RHSMask = N1O1C->getAPIntValue();
8507
8508 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8509 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8510 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8511 N0.getOperand(0), N1.getOperand(0));
8512 return DAG.getNode(ISD::AND, DL, VT, X,
8513 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8514 }
8515 }
8516 }
8517 }
8518
8519 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8520 if (N0.getOpcode() == ISD::AND &&
8521 N1.getOpcode() == ISD::AND &&
8522 N0.getOperand(0) == N1.getOperand(0) &&
8523 // Don't increase # computations.
8524 (N0->hasOneUse() || N1->hasOneUse())) {
8525 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8526 N0.getOperand(1), N1.getOperand(1));
8527 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8528 }
8529
8530 return SDValue();
8531}
8532
8533/// OR combines for which the commuted variant will be tried as well.
8535 SDNode *N) {
8536 EVT VT = N0.getValueType();
8537 unsigned BW = VT.getScalarSizeInBits();
8538 SDLoc DL(N);
8539
8540 auto peekThroughResize = [](SDValue V) {
8541 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8542 return V->getOperand(0);
8543 return V;
8544 };
8545
8546 SDValue N0Resized = peekThroughResize(N0);
8547 if (N0Resized.getOpcode() == ISD::AND) {
8548 SDValue N1Resized = peekThroughResize(N1);
8549 SDValue N00 = N0Resized.getOperand(0);
8550 SDValue N01 = N0Resized.getOperand(1);
8551
8552 // fold or (and x, y), x --> x
8553 if (N00 == N1Resized || N01 == N1Resized)
8554 return N1;
8555
8556 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8557 // TODO: Set AllowUndefs = true.
8558 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8559 /* AllowUndefs */ false)) {
8560 if (peekThroughResize(NotOperand) == N1Resized)
8561 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8562 N1);
8563 }
8564
8565 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8566 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8567 /* AllowUndefs */ false)) {
8568 if (peekThroughResize(NotOperand) == N1Resized)
8569 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8570 N1);
8571 }
8572 }
8573
8574 SDValue X, Y;
8575
8576 // fold or (xor X, N1), N1 --> or X, N1
8577 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8578 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8579
8580 // fold or (xor x, y), (x and/or y) --> or x, y
8581 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8582 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8584 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8585
8586 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8587 return R;
8588
8589 auto peekThroughZext = [](SDValue V) {
8590 if (V->getOpcode() == ISD::ZERO_EXTEND)
8591 return V->getOperand(0);
8592 return V;
8593 };
8594
8595 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8596 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) {
8597 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8598 if (N0.getOperand(0) == N1.getOperand(0))
8599 return N0;
8600 // (fshl A, X, Y) | (shl X, Y) --> fshl (A|X), X, Y
8601 if (N0.getOperand(1) == N1.getOperand(0) && N0.hasOneUse() &&
8602 N1.hasOneUse()) {
8603 SDValue A = N0.getOperand(0);
8604 SDValue X = N1.getOperand(0);
8605 SDValue NewLHS = DAG.getNode(ISD::OR, DL, VT, A, X);
8606 return DAG.getNode(ISD::FSHL, DL, VT, NewLHS, X, N0.getOperand(2));
8607 }
8608 }
8609
8610 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8611 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) {
8612 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8613 if (N0.getOperand(1) == N1.getOperand(0))
8614 return N0;
8615 // (fshr X, B, Y) | (srl X, Y) --> fshr X, (X|B), Y
8616 if (N0.getOperand(0) == N1.getOperand(0) && N0.hasOneUse() &&
8617 N1.hasOneUse()) {
8618 SDValue X = N1.getOperand(0);
8619 SDValue B = N0.getOperand(1);
8620 SDValue NewRHS = DAG.getNode(ISD::OR, DL, VT, X, B);
8621 return DAG.getNode(ISD::FSHR, DL, VT, X, NewRHS, N0.getOperand(2));
8622 }
8623 }
8624
8625 // (fshl A, B, S0) | (fshr C, D, S1) --> fshl (A|C), (B|D), S0
8626 // iff S0 + S1 == bitwidth(S1)
8627 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::FSHR &&
8628 N0.hasOneUse() && N1.hasOneUse()) {
8629 auto *S0 = dyn_cast<ConstantSDNode>(N0.getOperand(2));
8630 auto *S1 = dyn_cast<ConstantSDNode>(N1.getOperand(2));
8631 if (S0 && S1 && S0->getZExtValue() < BW && S1->getZExtValue() < BW &&
8632 S0->getZExtValue() == (BW - S1->getZExtValue())) {
8633 SDValue A = N0.getOperand(0);
8634 SDValue B = N0.getOperand(1);
8635 SDValue C = N1.getOperand(0);
8636 SDValue D = N1.getOperand(1);
8637 SDValue NewLHS = DAG.getNode(ISD::OR, DL, VT, A, C);
8638 SDValue NewRHS = DAG.getNode(ISD::OR, DL, VT, B, D);
8639 return DAG.getNode(ISD::FSHL, DL, VT, NewLHS, NewRHS, N0.getOperand(2));
8640 }
8641 }
8642
8643 // Attempt to match a legalized build_pair-esque pattern:
8644 // or(shl(aext(Hi),BW/2),zext(Lo))
8645 SDValue Lo, Hi;
8646 if (sd_match(N0,
8648 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8649 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8650 Lo.getValueType() == Hi.getValueType()) {
8651 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8652 SDValue NotLo, NotHi;
8653 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8654 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8655 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8656 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8657 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8658 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8659 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8660 }
8661 }
8662
8663 return SDValue();
8664}
8665
8666SDValue DAGCombiner::visitOR(SDNode *N) {
8667 SDValue N0 = N->getOperand(0);
8668 SDValue N1 = N->getOperand(1);
8669 EVT VT = N1.getValueType();
8670 SDLoc DL(N);
8671
8672 // x | x --> x
8673 if (N0 == N1)
8674 return N0;
8675
8676 // fold (or c1, c2) -> c1|c2
8677 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8678 return C;
8679
8680 // canonicalize constant to RHS
8683 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8684
8685 // fold vector ops
8686 if (VT.isVector()) {
8687 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8688 return FoldedVOp;
8689
8690 // fold (or x, 0) -> x, vector edition
8692 return N0;
8693
8694 // fold (or x, -1) -> -1, vector edition
8696 // do not return N1, because undef node may exist in N1
8697 return DAG.getAllOnesConstant(DL, N1.getValueType());
8698
8699 // fold (or buildvector(x,0,-1,w), buildvector(0,y,z,w))
8700 // --> buildvector(x,y,-1,w)
8701 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
8702 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
8703 if (BV0 && BV1 && !BV0->getSplatValue() && !BV1->getSplatValue() &&
8704 N0.hasOneUse() && N1.hasOneUse() &&
8705 BV0->getOperand(0).getValueType() ==
8706 BV1->getOperand(0).getValueType()) {
8707 SmallVector<SDValue> MergedOps;
8708 unsigned NumElts = VT.getVectorNumElements();
8709 EVT EltVT = BV0->getOperand(0).getValueType();
8710 for (unsigned I = 0; I != NumElts; ++I) {
8711 auto *C0 = dyn_cast<ConstantSDNode>(BV0->getOperand(I));
8712 auto *C1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
8713 if (C0 && C1)
8714 MergedOps.push_back(DAG.getConstant(
8715 C0->getAPIntValue() | C1->getAPIntValue(), DL, EltVT));
8716 else if (C0 && C0->isZero())
8717 MergedOps.push_back(BV1->getOperand(I));
8718 else if (C1 && C1->isZero())
8719 MergedOps.push_back(BV0->getOperand(I));
8720 else if (C0 && C0->isAllOnes())
8721 MergedOps.push_back(BV0->getOperand(I));
8722 else if (C1 && C1->isAllOnes())
8723 MergedOps.push_back(BV1->getOperand(I));
8724 else if (BV0->getOperand(I) == BV1->getOperand(I))
8725 MergedOps.push_back(BV0->getOperand(I));
8726 else
8727 break;
8728 }
8729 if (MergedOps.size() == NumElts)
8730 return DAG.getBuildVector(VT, DL, MergedOps);
8731 }
8732
8733 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8734 // Do this only if the resulting type / shuffle is legal.
8735 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8736 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8737 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8738 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8739 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8740 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8741 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8742 // Ensure both shuffles have a zero input.
8743 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8744 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8745 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8746 bool CanFold = true;
8747 int NumElts = VT.getVectorNumElements();
8748 SmallVector<int, 4> Mask(NumElts, -1);
8749
8750 for (int i = 0; i != NumElts; ++i) {
8751 int M0 = SV0->getMaskElt(i);
8752 int M1 = SV1->getMaskElt(i);
8753
8754 // Determine if either index is pointing to a zero vector.
8755 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8756 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8757
8758 // If one element is zero and the otherside is undef, keep undef.
8759 // This also handles the case that both are undef.
8760 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8761 continue;
8762
8763 // Make sure only one of the elements is zero.
8764 if (M0Zero == M1Zero) {
8765 CanFold = false;
8766 break;
8767 }
8768
8769 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8770
8771 // We have a zero and non-zero element. If the non-zero came from
8772 // SV0 make the index a LHS index. If it came from SV1, make it
8773 // a RHS index. We need to mod by NumElts because we don't care
8774 // which operand it came from in the original shuffles.
8775 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8776 }
8777
8778 if (CanFold) {
8779 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8780 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8781 SDValue LegalShuffle =
8782 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8783 if (LegalShuffle)
8784 return LegalShuffle;
8785 }
8786 }
8787 }
8788 }
8789
8790 // fold (or x, 0) -> x
8791 if (isNullConstant(N1))
8792 return N0;
8793
8794 // fold (or x, -1) -> -1
8795 if (isAllOnesConstant(N1))
8796 return N1;
8797
8798 if (SDValue NewSel = foldBinOpIntoSelect(N))
8799 return NewSel;
8800
8801 // fold (or x, c) -> c iff (x & ~c) == 0
8802 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8803 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8804 return N1;
8805
8806 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8807 return R;
8808
8809 if (SDValue Combined = visitORLike(N0, N1, DL))
8810 return Combined;
8811
8812 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8813 return Combined;
8814
8815 if (SDValue Combined = combineOrOfSetCCToUSUBOCarry(N, DAG, TLI))
8816 return Combined;
8817
8818 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8819 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8820 return BSwap;
8821 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8822 return BSwap;
8823
8824 // reassociate or
8825 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8826 return ROR;
8827
8828 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8829 if (SDValue SD =
8830 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8831 return SD;
8832
8833 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8834 // iff (c1 & c2) != 0 or c1/c2 are undef.
8835 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8836 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8837 };
8838 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8839 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8840 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8841 {N1, N0.getOperand(1)})) {
8842 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8843 AddToWorklist(IOR.getNode());
8844 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8845 }
8846 }
8847
8848 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8849 return Combined;
8850 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8851 return Combined;
8852
8853 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8854 if (N0.getOpcode() == N1.getOpcode())
8855 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8856 return V;
8857
8858 // See if this is some rotate idiom.
8859 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8860 return Rot;
8861
8862 if (SDValue Load = MatchLoadCombine(N))
8863 return Load;
8864
8865 // Simplify the operands using demanded-bits information.
8867 return SDValue(N, 0);
8868
8869 // If OR can be rewritten into ADD, try combines based on ADD.
8870 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8871 DAG.isADDLike(SDValue(N, 0)))
8872 if (SDValue Combined = visitADDLike(N))
8873 return Combined;
8874
8875 // Postpone until legalization completed to avoid interference with bswap
8876 // folding
8877 if (LegalOperations || VT.isVector())
8878 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8879 return R;
8880
8881 if (VT.isScalarInteger() && VT != MVT::i1)
8882 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8883 return R;
8884
8885 return SDValue();
8886}
8887
8889 SDValue &Mask) {
8890 if (Op.getOpcode() == ISD::AND &&
8891 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8892 Mask = Op.getOperand(1);
8893 return Op.getOperand(0);
8894 }
8895 return Op;
8896}
8897
8898/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8899static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8900 SDValue &Mask) {
8901 Op = stripConstantMask(DAG, Op, Mask);
8902 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8903 Shift = Op;
8904 return true;
8905 }
8906 return false;
8907}
8908
8909/// Helper function for visitOR to extract the needed side of a rotate idiom
8910/// from a shl/srl/mul/udiv. This is meant to handle cases where
8911/// InstCombine merged some outside op with one of the shifts from
8912/// the rotate pattern.
8913/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8914/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8915/// patterns:
8916///
8917/// (or (add v v) (shrl v bitwidth-1)):
8918/// expands (add v v) -> (shl v 1)
8919///
8920/// (or (mul v c0) (shrl (mul v c1) c2)):
8921/// expands (mul v c0) -> (shl (mul v c1) c3)
8922///
8923/// (or (udiv v c0) (shl (udiv v c1) c2)):
8924/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8925///
8926/// (or (shl v c0) (shrl (shl v c1) c2)):
8927/// expands (shl v c0) -> (shl (shl v c1) c3)
8928///
8929/// (or (shrl v c0) (shl (shrl v c1) c2)):
8930/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8931///
8932/// Such that in all cases, c3+c2==bitwidth(op v c1).
8934 SDValue ExtractFrom, SDValue &Mask,
8935 const SDLoc &DL) {
8936 assert(OppShift && ExtractFrom && "Empty SDValue");
8937 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8938 return SDValue();
8939
8940 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8941
8942 // Value and Type of the shift.
8943 SDValue OppShiftLHS = OppShift.getOperand(0);
8944 EVT ShiftedVT = OppShiftLHS.getValueType();
8945
8946 // Amount of the existing shift.
8947 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8948
8949 // (add v v) -> (shl v 1)
8950 // TODO: Should this be a general DAG canonicalization?
8951 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8952 ExtractFrom.getOpcode() == ISD::ADD &&
8953 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8954 ExtractFrom.getOperand(0) == OppShiftLHS &&
8955 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8956 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8957 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8958
8959 // Preconditions:
8960 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8961 //
8962 // Find opcode of the needed shift to be extracted from (op0 v c0).
8963 unsigned Opcode = ISD::DELETED_NODE;
8964 bool IsMulOrDiv = false;
8965 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8966 // opcode or its arithmetic (mul or udiv) variant.
8967 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8968 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8969 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8970 return false;
8971 Opcode = NeededShift;
8972 return true;
8973 };
8974 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8975 // that the needed shift can be extracted from.
8976 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8977 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8978 return SDValue();
8979
8980 // op0 must be the same opcode on both sides, have the same LHS argument,
8981 // and produce the same value type.
8982 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8983 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8984 ShiftedVT != ExtractFrom.getValueType())
8985 return SDValue();
8986
8987 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8988 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8989 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8990 ConstantSDNode *ExtractFromCst =
8991 isConstOrConstSplat(ExtractFrom.getOperand(1));
8992 // TODO: We should be able to handle non-uniform constant vectors for these values
8993 // Check that we have constant values.
8994 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8995 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8996 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8997 return SDValue();
8998
8999 // Compute the shift amount we need to extract to complete the rotate.
9000 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
9001 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
9002 return SDValue();
9003 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
9004 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
9005 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
9006 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
9007 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
9008
9009 // Now try extract the needed shift from the ExtractFrom op and see if the
9010 // result matches up with the existing shift's LHS op.
9011 if (IsMulOrDiv) {
9012 // Op to extract from is a mul or udiv by a constant.
9013 // Check:
9014 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
9015 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
9016 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
9017 NeededShiftAmt.getZExtValue());
9018 APInt ResultAmt;
9019 APInt Rem;
9020 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
9021 if (Rem != 0 || ResultAmt != OppLHSAmt)
9022 return SDValue();
9023 } else {
9024 // Op to extract from is a shift by a constant.
9025 // Check:
9026 // c2 - (bitwidth(op0 v c0) - c1) == c0
9027 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
9028 ExtractFromAmt.getBitWidth()))
9029 return SDValue();
9030 }
9031
9032 // Return the expanded shift op that should allow a rotate to be formed.
9033 EVT ShiftVT = OppShift.getOperand(1).getValueType();
9034 EVT ResVT = ExtractFrom.getValueType();
9035 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
9036 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
9037}
9038
9039// Return true if we can prove that, whenever Neg and Pos are both in the
9040// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
9041// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
9042//
9043// (or (shift1 X, Neg), (shift2 X, Pos))
9044//
9045// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
9046// in direction shift1 by Neg. The range [0, EltSize) means that we only need
9047// to consider shift amounts with defined behavior.
9048//
9049// The IsRotate flag should be set when the LHS of both shifts is the same.
9050// Otherwise if matching a general funnel shift, it should be clear.
9051static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
9052 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
9053 const auto &TLI = DAG.getTargetLoweringInfo();
9054 // If EltSize is a power of 2 then:
9055 //
9056 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
9057 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
9058 //
9059 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
9060 // for the stronger condition:
9061 //
9062 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
9063 //
9064 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
9065 // we can just replace Neg with Neg' for the rest of the function.
9066 //
9067 // In other cases we check for the even stronger condition:
9068 //
9069 // Neg == EltSize - Pos [B]
9070 //
9071 // for all Neg and Pos. Note that the (or ...) then invokes undefined
9072 // behavior if Pos == 0 (and consequently Neg == EltSize).
9073 //
9074 // We could actually use [A] whenever EltSize is a power of 2, but the
9075 // only extra cases that it would match are those uninteresting ones
9076 // where Neg and Pos are never in range at the same time. E.g. for
9077 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
9078 // as well as (sub 32, Pos), but:
9079 //
9080 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
9081 //
9082 // always invokes undefined behavior for 32-bit X.
9083 //
9084 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
9085 // This allows us to peek through any operations that only affect Mask's
9086 // un-demanded bits.
9087 //
9088 // NOTE: We can only do this when matching operations which won't modify the
9089 // least Log2(EltSize) significant bits and not a general funnel shift.
9090 unsigned MaskLoBits = 0;
9091 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
9092 unsigned Bits = Log2_64(EltSize);
9093 unsigned NegBits = Neg.getScalarValueSizeInBits();
9094 if (NegBits >= Bits) {
9095 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
9096 if (SDValue Inner =
9098 Neg = Inner;
9099 MaskLoBits = Bits;
9100 }
9101 }
9102 }
9103
9104 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
9105 if (Neg.getOpcode() != ISD::SUB)
9106 return false;
9108 if (!NegC)
9109 return false;
9110 SDValue NegOp1 = Neg.getOperand(1);
9111
9112 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
9113 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
9114 // are redundant for the purpose of the equality.
9115 if (MaskLoBits) {
9116 unsigned PosBits = Pos.getScalarValueSizeInBits();
9117 if (PosBits >= MaskLoBits) {
9118 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
9119 if (SDValue Inner =
9121 Pos = Inner;
9122 }
9123 }
9124 }
9125
9126 // The condition we need is now:
9127 //
9128 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
9129 //
9130 // If NegOp1 == Pos then we need:
9131 //
9132 // EltSize & Mask == NegC & Mask
9133 //
9134 // (because "x & Mask" is a truncation and distributes through subtraction).
9135 //
9136 // We also need to account for a potential truncation of NegOp1 if the amount
9137 // has already been legalized to a shift amount type.
9138 APInt Width;
9139 if ((Pos == NegOp1) ||
9140 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
9141 Width = NegC->getAPIntValue();
9142
9143 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
9144 // Then the condition we want to prove becomes:
9145 //
9146 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
9147 //
9148 // which, again because "x & Mask" is a truncation, becomes:
9149 //
9150 // NegC & Mask == (EltSize - PosC) & Mask
9151 // EltSize & Mask == (NegC + PosC) & Mask
9152 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
9153 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
9154 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
9155 else
9156 return false;
9157 } else
9158 return false;
9159
9160 // Now we just need to check that EltSize & Mask == Width & Mask.
9161 if (MaskLoBits)
9162 // EltSize & Mask is 0 since Mask is EltSize - 1.
9163 return Width.getLoBits(MaskLoBits) == 0;
9164 return Width == EltSize;
9165}
9166
9167// A subroutine of MatchRotate used once we have found an OR of two opposite
9168// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
9169// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
9170// former being preferred if supported. InnerPos and InnerNeg are Pos and
9171// Neg with outer conversions stripped away.
9172SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
9173 SDValue Neg, SDValue InnerPos,
9174 SDValue InnerNeg, bool FromAdd,
9175 bool HasPos, unsigned PosOpcode,
9176 unsigned NegOpcode, const SDLoc &DL) {
9177 // fold (or/add (shl x, (*ext y)),
9178 // (srl x, (*ext (sub 32, y)))) ->
9179 // (rotl x, y) or (rotr x, (sub 32, y))
9180 //
9181 // fold (or/add (shl x, (*ext (sub 32, y))),
9182 // (srl x, (*ext y))) ->
9183 // (rotr x, y) or (rotl x, (sub 32, y))
9184 EVT VT = Shifted.getValueType();
9185 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
9186 /*IsRotate*/ true, FromAdd))
9187 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
9188 HasPos ? Pos : Neg);
9189
9190 return SDValue();
9191}
9192
9193// A subroutine of MatchRotate used once we have found an OR of two opposite
9194// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
9195// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
9196// former being preferred if supported. InnerPos and InnerNeg are Pos and
9197// Neg with outer conversions stripped away.
9198// TODO: Merge with MatchRotatePosNeg.
9199SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
9200 SDValue Neg, SDValue InnerPos,
9201 SDValue InnerNeg, bool FromAdd,
9202 bool HasPos, unsigned PosOpcode,
9203 unsigned NegOpcode, const SDLoc &DL) {
9204 EVT VT = N0.getValueType();
9205 unsigned EltBits = VT.getScalarSizeInBits();
9206
9207 // fold (or/add (shl x0, (*ext y)),
9208 // (srl x1, (*ext (sub 32, y)))) ->
9209 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
9210 //
9211 // fold (or/add (shl x0, (*ext (sub 32, y))),
9212 // (srl x1, (*ext y))) ->
9213 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
9214 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
9215 FromAdd))
9216 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
9217 HasPos ? Pos : Neg);
9218
9219 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
9220 // so for now just use the PosOpcode case if its legal.
9221 // TODO: When can we use the NegOpcode case?
9222 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
9223 SDValue X;
9224 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
9225 // -> (fshl x0, x1, y)
9226 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
9227 sd_match(InnerNeg,
9228 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
9230 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
9231 }
9232
9233 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
9234 // -> (fshr x0, x1, y)
9235 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
9236 sd_match(InnerPos,
9237 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
9239 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
9240 }
9241
9242 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
9243 // -> (fshr x0, x1, y)
9244 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
9245 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
9246 sd_match(InnerPos,
9247 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
9249 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
9250 }
9251 }
9252
9253 return SDValue();
9254}
9255
9256// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
9257// many idioms for rotate, and if the target supports rotation instructions,
9258// generate a rot[lr]. This also matches funnel shift patterns, similar to
9259// rotation but with different shifted sources.
9260SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
9261 bool FromAdd) {
9262 EVT VT = LHS.getValueType();
9263
9264 // The target must have at least one rotate/funnel flavor.
9265 // We still try to match rotate by constant pre-legalization.
9266 // TODO: Support pre-legalization funnel-shift by constant.
9267 bool HasROTL = hasOperation(ISD::ROTL, VT);
9268 bool HasROTR = hasOperation(ISD::ROTR, VT);
9269 bool HasFSHL = hasOperation(ISD::FSHL, VT);
9270 bool HasFSHR = hasOperation(ISD::FSHR, VT);
9271
9272 // If the type is going to be promoted and the target has enabled custom
9273 // lowering for rotate, allow matching rotate by non-constants. Only allow
9274 // this for scalar types.
9275 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
9279 }
9280
9281 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9282 return SDValue();
9283
9284 // Check for truncated rotate.
9285 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
9286 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
9287 assert(LHS.getValueType() == RHS.getValueType());
9288 if (SDValue Rot =
9289 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
9290 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
9291 }
9292
9293 // Match "(X shl/srl V1) & V2" where V2 may not be present.
9294 SDValue LHSShift; // The shift.
9295 SDValue LHSMask; // AND value if any.
9296 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
9297
9298 SDValue RHSShift; // The shift.
9299 SDValue RHSMask; // AND value if any.
9300 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
9301
9302 // If neither side matched a rotate half, bail
9303 if (!LHSShift && !RHSShift)
9304 return SDValue();
9305
9306 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
9307 // side of the rotate, so try to handle that here. In all cases we need to
9308 // pass the matched shift from the opposite side to compute the opcode and
9309 // needed shift amount to extract. We still want to do this if both sides
9310 // matched a rotate half because one half may be a potential overshift that
9311 // can be broken down (ie if InstCombine merged two shl or srl ops into a
9312 // single one).
9313
9314 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
9315 if (LHSShift)
9316 if (SDValue NewRHSShift =
9317 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
9318 RHSShift = NewRHSShift;
9319 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
9320 if (RHSShift)
9321 if (SDValue NewLHSShift =
9322 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
9323 LHSShift = NewLHSShift;
9324
9325 // If a side is still missing, nothing else we can do.
9326 if (!RHSShift || !LHSShift)
9327 return SDValue();
9328
9329 // At this point we've matched or extracted a shift op on each side.
9330
9331 if (LHSShift.getOpcode() == RHSShift.getOpcode())
9332 return SDValue(); // Shifts must disagree.
9333
9334 // Canonicalize shl to left side in a shl/srl pair.
9335 if (RHSShift.getOpcode() == ISD::SHL) {
9336 std::swap(LHS, RHS);
9337 std::swap(LHSShift, RHSShift);
9338 std::swap(LHSMask, RHSMask);
9339 }
9340
9341 // Something has gone wrong - we've lost the shl/srl pair - bail.
9342 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
9343 return SDValue();
9344
9345 unsigned EltSizeInBits = VT.getScalarSizeInBits();
9346 SDValue LHSShiftArg = LHSShift.getOperand(0);
9347 SDValue LHSShiftAmt = LHSShift.getOperand(1);
9348 SDValue RHSShiftArg = RHSShift.getOperand(0);
9349 SDValue RHSShiftAmt = RHSShift.getOperand(1);
9350
9351 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9352 ConstantSDNode *RHS) {
9353 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9354 };
9355
9356 auto ApplyMasks = [&](SDValue Res) {
9357 // If there is an AND of either shifted operand, apply it to the result.
9358 if (LHSMask.getNode() || RHSMask.getNode()) {
9361
9362 if (LHSMask.getNode()) {
9363 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
9364 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9365 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
9366 }
9367 if (RHSMask.getNode()) {
9368 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
9369 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9370 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
9371 }
9372
9373 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
9374 }
9375
9376 return Res;
9377 };
9378
9379 // TODO: Support pre-legalization funnel-shift by constant.
9380 bool IsRotate = LHSShiftArg == RHSShiftArg;
9381 if (!IsRotate && !(HasFSHL || HasFSHR)) {
9382 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9383 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9384 // Look for a disguised rotate by constant.
9385 // The common shifted operand X may be hidden inside another 'or'.
9386 SDValue X, Y;
9387 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9388 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9389 return false;
9390 if (CommonOp == Or.getOperand(0)) {
9391 X = CommonOp;
9392 Y = Or.getOperand(1);
9393 return true;
9394 }
9395 if (CommonOp == Or.getOperand(1)) {
9396 X = CommonOp;
9397 Y = Or.getOperand(0);
9398 return true;
9399 }
9400 return false;
9401 };
9402
9403 SDValue Res;
9404 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9405 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9406 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9407 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9408 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9409 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9410 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9411 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9412 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9413 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9414 } else {
9415 return SDValue();
9416 }
9417
9418 return ApplyMasks(Res);
9419 }
9420
9421 return SDValue(); // Requires funnel shift support.
9422 }
9423
9424 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9425 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9426 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9427 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9428 // iff C1+C2 == EltSizeInBits
9429 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9430 SDValue Res;
9431 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9432 bool UseROTL = !LegalOperations || HasROTL;
9433 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9434 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9435 } else {
9436 bool UseFSHL = !LegalOperations || HasFSHL;
9437 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9438 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9439 }
9440
9441 return ApplyMasks(Res);
9442 }
9443
9444 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9445 // shift.
9446 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9447 return SDValue();
9448
9449 // If there is a mask here, and we have a variable shift, we can't be sure
9450 // that we're masking out the right stuff.
9451 if (LHSMask.getNode() || RHSMask.getNode())
9452 return SDValue();
9453
9454 // If the shift amount is sign/zext/any-extended just peel it off.
9455 SDValue LExtOp0 = LHSShiftAmt;
9456 SDValue RExtOp0 = RHSShiftAmt;
9457 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9458 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9459 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9460 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9461 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9462 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9463 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9464 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9465 LExtOp0 = LHSShiftAmt.getOperand(0);
9466 RExtOp0 = RHSShiftAmt.getOperand(0);
9467 }
9468
9469 if (IsRotate && (HasROTL || HasROTR)) {
9470 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9471 LExtOp0, RExtOp0, FromAdd, HasROTL,
9473 return TryL;
9474
9475 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9476 RExtOp0, LExtOp0, FromAdd, HasROTR,
9478 return TryR;
9479 }
9480
9481 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9482 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9483 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9484 return TryL;
9485
9486 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9487 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9488 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9489 return TryR;
9490
9491 return SDValue();
9492}
9493
9494/// Recursively traverses the expression calculating the origin of the requested
9495/// byte of the given value. Returns std::nullopt if the provider can't be
9496/// calculated.
9497///
9498/// For all the values except the root of the expression, we verify that the
9499/// value has exactly one use and if not then return std::nullopt. This way if
9500/// the origin of the byte is returned it's guaranteed that the values which
9501/// contribute to the byte are not used outside of this expression.
9502
9503/// However, there is a special case when dealing with vector loads -- we allow
9504/// more than one use if the load is a vector type. Since the values that
9505/// contribute to the byte ultimately come from the ExtractVectorElements of the
9506/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9507/// because those operations are independent from the pattern to be combined.
9508/// For vector loads, we simply care that the ByteProviders are adjacent
9509/// positions of the same vector, and their index matches the byte that is being
9510/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9511/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9512/// byte position we are trying to provide for the LoadCombine. If these do
9513/// not match, then we can not combine the vector loads. \p Index uses the
9514/// byte position we are trying to provide for and is matched against the
9515/// shl and load size. The \p Index algorithm ensures the requested byte is
9516/// provided for by the pattern, and the pattern does not over provide bytes.
9517///
9518///
9519/// The supported LoadCombine pattern for vector loads is as follows
9520/// or
9521/// / \
9522/// or shl
9523/// / \ |
9524/// or shl zext
9525/// / \ | |
9526/// shl zext zext EVE*
9527/// | | | |
9528/// zext EVE* EVE* LOAD
9529/// | | |
9530/// EVE* LOAD LOAD
9531/// |
9532/// LOAD
9533///
9534/// *ExtractVectorElement
9536
9537static std::optional<SDByteProvider>
9538calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9539 std::optional<uint64_t> VectorIndex,
9540 unsigned StartingIndex = 0) {
9541
9542 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9543 if (Depth == 10)
9544 return std::nullopt;
9545
9546 // Only allow multiple uses if the instruction is a vector load (in which
9547 // case we will use the load for every ExtractVectorElement)
9548 if (Depth && !Op.hasOneUse() &&
9549 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9550 return std::nullopt;
9551
9552 // Fail to combine if we have encountered anything but a LOAD after handling
9553 // an ExtractVectorElement.
9554 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9555 return std::nullopt;
9556
9557 unsigned BitWidth = Op.getScalarValueSizeInBits();
9558 if (BitWidth % 8 != 0)
9559 return std::nullopt;
9560 unsigned ByteWidth = BitWidth / 8;
9561 assert(Index < ByteWidth && "invalid index requested");
9562 (void) ByteWidth;
9563
9564 switch (Op.getOpcode()) {
9565 case ISD::OR: {
9566 auto LHS =
9567 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9568 if (!LHS)
9569 return std::nullopt;
9570 auto RHS =
9571 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9572 if (!RHS)
9573 return std::nullopt;
9574
9575 if (LHS->isConstantZero())
9576 return RHS;
9577 if (RHS->isConstantZero())
9578 return LHS;
9579 return std::nullopt;
9580 }
9581 case ISD::SHL: {
9582 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9583 if (!ShiftOp)
9584 return std::nullopt;
9585
9586 uint64_t BitShift = ShiftOp->getZExtValue();
9587
9588 if (BitShift % 8 != 0)
9589 return std::nullopt;
9590 uint64_t ByteShift = BitShift / 8;
9591
9592 // If we are shifting by an amount greater than the index we are trying to
9593 // provide, then do not provide anything. Otherwise, subtract the index by
9594 // the amount we shifted by.
9595 return Index < ByteShift
9597 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9598 Depth + 1, VectorIndex, Index);
9599 }
9600 case ISD::ANY_EXTEND:
9601 case ISD::SIGN_EXTEND:
9602 case ISD::ZERO_EXTEND: {
9603 SDValue NarrowOp = Op->getOperand(0);
9604 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9605 if (NarrowBitWidth % 8 != 0)
9606 return std::nullopt;
9607 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9608
9609 if (Index >= NarrowByteWidth)
9610 return Op.getOpcode() == ISD::ZERO_EXTEND
9611 ? std::optional<SDByteProvider>(
9613 : std::nullopt;
9614 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9615 StartingIndex);
9616 }
9617 case ISD::BSWAP:
9618 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9619 Depth + 1, VectorIndex, StartingIndex);
9621 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9622 if (!OffsetOp)
9623 return std::nullopt;
9624
9625 VectorIndex = OffsetOp->getZExtValue();
9626
9627 SDValue NarrowOp = Op->getOperand(0);
9628 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9629 if (NarrowBitWidth % 8 != 0)
9630 return std::nullopt;
9631 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9632 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9633 // type, leaving the high bits undefined.
9634 if (Index >= NarrowByteWidth)
9635 return std::nullopt;
9636
9637 // Check to see if the position of the element in the vector corresponds
9638 // with the byte we are trying to provide for. In the case of a vector of
9639 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9640 // the element will provide a range of bytes. For example, if we have a
9641 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9642 // 3).
9643 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9644 return std::nullopt;
9645 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9646 return std::nullopt;
9647
9648 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9649 VectorIndex, StartingIndex);
9650 }
9651 case ISD::LOAD: {
9652 auto L = cast<LoadSDNode>(Op.getNode());
9653 if (!L->isSimple() || L->isIndexed())
9654 return std::nullopt;
9655
9656 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9657 if (NarrowBitWidth % 8 != 0)
9658 return std::nullopt;
9659 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9660
9661 // If the width of the load does not reach byte we are trying to provide for
9662 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9663 // question
9664 if (Index >= NarrowByteWidth)
9665 return L->getExtensionType() == ISD::ZEXTLOAD
9666 ? std::optional<SDByteProvider>(
9668 : std::nullopt;
9669
9670 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9671 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9672 }
9673 }
9674
9675 return std::nullopt;
9676}
9677
9678static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9679 return i;
9680}
9681
9682static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9683 return BW - i - 1;
9684}
9685
9686// Check if the bytes offsets we are looking at match with either big or
9687// little endian value loaded. Return true for big endian, false for little
9688// endian, and std::nullopt if match failed.
9689static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
9690 int64_t FirstOffset) {
9691 // The endian can be decided only when it is 2 bytes at least.
9692 unsigned Width = ByteOffsets.size();
9693 if (Width < 2)
9694 return std::nullopt;
9695
9696 bool BigEndian = true, LittleEndian = true;
9697 for (unsigned i = 0; i < Width; i++) {
9698 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9699 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9700 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9701 if (!BigEndian && !LittleEndian)
9702 return std::nullopt;
9703 }
9704
9705 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9706 "little endian");
9707 return BigEndian;
9708}
9709
9710// Look through one layer of truncate or extend.
9712 switch (Value.getOpcode()) {
9713 case ISD::TRUNCATE:
9714 case ISD::ZERO_EXTEND:
9715 case ISD::SIGN_EXTEND:
9716 case ISD::ANY_EXTEND:
9717 return Value.getOperand(0);
9718 }
9719 return SDValue();
9720}
9721
9722/// Match a pattern where a wide type scalar value is stored by several narrow
9723/// stores. Fold it into a single store or a BSWAP and a store if the targets
9724/// supports it.
9725///
9726/// Assuming little endian target:
9727/// i8 *p = ...
9728/// i32 val = ...
9729/// p[0] = (val >> 0) & 0xFF;
9730/// p[1] = (val >> 8) & 0xFF;
9731/// p[2] = (val >> 16) & 0xFF;
9732/// p[3] = (val >> 24) & 0xFF;
9733/// =>
9734/// *((i32)p) = val;
9735///
9736/// i8 *p = ...
9737/// i32 val = ...
9738/// p[0] = (val >> 24) & 0xFF;
9739/// p[1] = (val >> 16) & 0xFF;
9740/// p[2] = (val >> 8) & 0xFF;
9741/// p[3] = (val >> 0) & 0xFF;
9742/// =>
9743/// *((i32)p) = BSWAP(val);
9744SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9745 // The matching looks for "store (trunc x)" patterns that appear early but are
9746 // likely to be replaced by truncating store nodes during combining.
9747 // TODO: If there is evidence that running this later would help, this
9748 // limitation could be removed. Legality checks may need to be added
9749 // for the created store and optional bswap/rotate.
9750 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9751 return SDValue();
9752
9753 // We only handle merging simple stores of 1-4 bytes.
9754 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9755 EVT MemVT = N->getMemoryVT();
9756 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9757 !N->isSimple() || N->isIndexed())
9758 return SDValue();
9759
9760 // Collect all of the stores in the chain, upto the maximum store width (i64).
9761 SDValue Chain = N->getChain();
9763 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9764 unsigned MaxWideNumBits = 64;
9765 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9766 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9767 // All stores must be the same size to ensure that we are writing all of the
9768 // bytes in the wide value.
9769 // This store should have exactly one use as a chain operand for another
9770 // store in the merging set. If there are other chain uses, then the
9771 // transform may not be safe because order of loads/stores outside of this
9772 // set may not be preserved.
9773 // TODO: We could allow multiple sizes by tracking each stored byte.
9774 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9775 Store->isIndexed() || !Store->hasOneUse())
9776 return SDValue();
9777 Stores.push_back(Store);
9778 Chain = Store->getChain();
9779 if (MaxStores < Stores.size())
9780 return SDValue();
9781 }
9782 // There is no reason to continue if we do not have at least a pair of stores.
9783 if (Stores.size() < 2)
9784 return SDValue();
9785
9786 // Handle simple types only.
9787 LLVMContext &Context = *DAG.getContext();
9788 unsigned NumStores = Stores.size();
9789 unsigned WideNumBits = NumStores * NarrowNumBits;
9790 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9791 return SDValue();
9792
9793 // Check if all bytes of the source value that we are looking at are stored
9794 // to the same base address. Collect offsets from Base address into OffsetMap.
9795 SDValue SourceValue;
9796 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9797 int64_t FirstOffset = INT64_MAX;
9798 StoreSDNode *FirstStore = nullptr;
9799 std::optional<BaseIndexOffset> Base;
9800 for (auto *Store : Stores) {
9801 // All the stores store different parts of the CombinedValue. A truncate is
9802 // required to get the partial value.
9803 SDValue Trunc = Store->getValue();
9804 if (Trunc.getOpcode() != ISD::TRUNCATE)
9805 return SDValue();
9806 // Other than the first/last part, a shift operation is required to get the
9807 // offset.
9808 int64_t Offset = 0;
9809 SDValue WideVal = Trunc.getOperand(0);
9810 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9811 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9812 // The shift amount must be a constant multiple of the narrow type.
9813 // It is translated to the offset address in the wide source value "y".
9814 //
9815 // x = srl y, ShiftAmtC
9816 // i8 z = trunc x
9817 // store z, ...
9818 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9819 if (ShiftAmtC % NarrowNumBits != 0)
9820 return SDValue();
9821
9822 // Make sure we aren't reading bits that are shifted in.
9823 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9824 return SDValue();
9825
9826 Offset = ShiftAmtC / NarrowNumBits;
9827 WideVal = WideVal.getOperand(0);
9828 }
9829
9830 // Stores must share the same source value with different offsets.
9831 if (!SourceValue)
9832 SourceValue = WideVal;
9833 else if (SourceValue != WideVal) {
9834 // Truncate and extends can be stripped to see if the values are related.
9835 if (stripTruncAndExt(SourceValue) != WideVal &&
9836 stripTruncAndExt(WideVal) != SourceValue)
9837 return SDValue();
9838
9839 if (WideVal.getScalarValueSizeInBits() >
9840 SourceValue.getScalarValueSizeInBits())
9841 SourceValue = WideVal;
9842
9843 // Give up if the source value type is smaller than the store size.
9844 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9845 return SDValue();
9846 }
9847
9848 // Stores must share the same base address.
9849 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9850 int64_t ByteOffsetFromBase = 0;
9851 if (!Base)
9852 Base = Ptr;
9853 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9854 return SDValue();
9855
9856 // Remember the first store.
9857 if (ByteOffsetFromBase < FirstOffset) {
9858 FirstStore = Store;
9859 FirstOffset = ByteOffsetFromBase;
9860 }
9861 // Map the offset in the store and the offset in the combined value, and
9862 // early return if it has been set before.
9863 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9864 return SDValue();
9865 OffsetMap[Offset] = ByteOffsetFromBase;
9866 }
9867
9868 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9869
9870 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9871 assert(FirstStore && "First store must be set");
9872
9873 // Check that a store of the wide type is both allowed and fast on the target
9874 const DataLayout &Layout = DAG.getDataLayout();
9875 unsigned Fast = 0;
9876 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9877 *FirstStore->getMemOperand(), &Fast);
9878 if (!Allowed || !Fast)
9879 return SDValue();
9880
9881 // Check if the pieces of the value are going to the expected places in memory
9882 // to merge the stores.
9883 auto checkOffsets = [&](bool MatchLittleEndian) {
9884 if (MatchLittleEndian) {
9885 for (unsigned i = 0; i != NumStores; ++i)
9886 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9887 return false;
9888 } else { // MatchBigEndian by reversing loop counter.
9889 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9890 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9891 return false;
9892 }
9893 return true;
9894 };
9895
9896 // Check if the offsets line up for the native data layout of this target.
9897 bool NeedBswap = false;
9898 bool NeedRotate = false;
9899 if (!checkOffsets(Layout.isLittleEndian())) {
9900 // Special-case: check if byte offsets line up for the opposite endian.
9901 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9902 NeedBswap = true;
9903 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9904 NeedRotate = true;
9905 else
9906 return SDValue();
9907 }
9908
9909 SDLoc DL(N);
9910 if (WideVT != SourceValue.getValueType()) {
9911 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9912 "Unexpected store value to merge");
9913 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9914 }
9915
9916 // Before legalize we can introduce illegal bswaps/rotates which will be later
9917 // converted to an explicit bswap sequence. This way we end up with a single
9918 // store and byte shuffling instead of several stores and byte shuffling.
9919 if (NeedBswap) {
9920 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9921 } else if (NeedRotate) {
9922 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9923 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9924 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9925 }
9926
9927 SDValue NewStore =
9928 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9929 FirstStore->getPointerInfo(), FirstStore->getAlign());
9930
9931 // Rely on other DAG combine rules to remove the other individual stores.
9932 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9933 return NewStore;
9934}
9935
9936/// Match a pattern where a wide type scalar value is loaded by several narrow
9937/// loads and combined by shifts and ors. Fold it into a single load or a load
9938/// and a BSWAP if the targets supports it.
9939///
9940/// Assuming little endian target:
9941/// i8 *a = ...
9942/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9943/// =>
9944/// i32 val = *((i32)a)
9945///
9946/// i8 *a = ...
9947/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9948/// =>
9949/// i32 val = BSWAP(*((i32)a))
9950///
9951/// TODO: This rule matches complex patterns with OR node roots and doesn't
9952/// interact well with the worklist mechanism. When a part of the pattern is
9953/// updated (e.g. one of the loads) its direct users are put into the worklist,
9954/// but the root node of the pattern which triggers the load combine is not
9955/// necessarily a direct user of the changed node. For example, once the address
9956/// of t28 load is reassociated load combine won't be triggered:
9957/// t25: i32 = add t4, Constant:i32<2>
9958/// t26: i64 = sign_extend t25
9959/// t27: i64 = add t2, t26
9960/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9961/// t29: i32 = zero_extend t28
9962/// t32: i32 = shl t29, Constant:i8<8>
9963/// t33: i32 = or t23, t32
9964/// As a possible fix visitLoad can check if the load can be a part of a load
9965/// combine pattern and add corresponding OR roots to the worklist.
9966SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9967 assert(N->getOpcode() == ISD::OR &&
9968 "Can only match load combining against OR nodes");
9969
9970 // Handles simple types only
9971 EVT VT = N->getValueType(0);
9972 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9973 return SDValue();
9974 unsigned ByteWidth = VT.getSizeInBits() / 8;
9975
9976 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9977 auto MemoryByteOffset = [&](SDByteProvider P) {
9978 assert(P.hasSrc() && "Must be a memory byte provider");
9979 auto *Load = cast<LoadSDNode>(P.Src.value());
9980
9981 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9982
9983 assert(LoadBitWidth % 8 == 0 &&
9984 "can only analyze providers for individual bytes not bit");
9985 unsigned LoadByteWidth = LoadBitWidth / 8;
9986 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9987 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9988 };
9989
9990 std::optional<BaseIndexOffset> Base;
9991 SDValue Chain;
9992
9993 SmallPtrSet<LoadSDNode *, 8> Loads;
9994 std::optional<SDByteProvider> FirstByteProvider;
9995 int64_t FirstOffset = INT64_MAX;
9996
9997 // Check if all the bytes of the OR we are looking at are loaded from the same
9998 // base address. Collect bytes offsets from Base address in ByteOffsets.
9999 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
10000 unsigned ZeroExtendedBytes = 0;
10001 for (int i = ByteWidth - 1; i >= 0; --i) {
10002 auto P =
10003 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
10004 /*StartingIndex*/ i);
10005 if (!P)
10006 return SDValue();
10007
10008 if (P->isConstantZero()) {
10009 // It's OK for the N most significant bytes to be 0, we can just
10010 // zero-extend the load.
10011 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
10012 return SDValue();
10013 continue;
10014 }
10015 assert(P->hasSrc() && "provenance should either be memory or zero");
10016 auto *L = cast<LoadSDNode>(P->Src.value());
10017
10018 // All loads must share the same chain
10019 SDValue LChain = L->getChain();
10020 if (!Chain)
10021 Chain = LChain;
10022 else if (Chain != LChain)
10023 return SDValue();
10024
10025 // Loads must share the same base address
10026 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
10027 int64_t ByteOffsetFromBase = 0;
10028
10029 // For vector loads, the expected load combine pattern will have an
10030 // ExtractElement for each index in the vector. While each of these
10031 // ExtractElements will be accessing the same base address as determined
10032 // by the load instruction, the actual bytes they interact with will differ
10033 // due to different ExtractElement indices. To accurately determine the
10034 // byte position of an ExtractElement, we offset the base load ptr with
10035 // the index multiplied by the byte size of each element in the vector.
10036 if (L->getMemoryVT().isVector()) {
10037 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
10038 if (LoadWidthInBit % 8 != 0)
10039 return SDValue();
10040 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
10041 Ptr.addToOffset(ByteOffsetFromVector);
10042 }
10043
10044 if (!Base)
10045 Base = Ptr;
10046
10047 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
10048 return SDValue();
10049
10050 // Calculate the offset of the current byte from the base address
10051 ByteOffsetFromBase += MemoryByteOffset(*P);
10052 ByteOffsets[i] = ByteOffsetFromBase;
10053
10054 // Remember the first byte load
10055 if (ByteOffsetFromBase < FirstOffset) {
10056 FirstByteProvider = P;
10057 FirstOffset = ByteOffsetFromBase;
10058 }
10059
10060 Loads.insert(L);
10061 }
10062
10063 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
10064 "memory, so there must be at least one load which produces the value");
10065 assert(Base && "Base address of the accessed memory location must be set");
10066 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
10067
10068 bool NeedsZext = ZeroExtendedBytes > 0;
10069
10070 EVT MemVT =
10071 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
10072
10073 if (!MemVT.isSimple())
10074 return SDValue();
10075
10076 // Check if the bytes of the OR we are looking at match with either big or
10077 // little endian value load
10078 std::optional<bool> IsBigEndian = isBigEndian(
10079 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
10080 if (!IsBigEndian)
10081 return SDValue();
10082
10083 assert(FirstByteProvider && "must be set");
10084
10085 // Ensure that the first byte is loaded from zero offset of the first load.
10086 // So the combined value can be loaded from the first load address.
10087 if (MemoryByteOffset(*FirstByteProvider) != 0)
10088 return SDValue();
10089 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
10090
10091 // Before legalization we allow introducing loads that are wider than legal,
10092 // which will later be split into legally sized loads. This enables us to
10093 // combine, for example, i8 loads forming an i64 into an i64 load, which get
10094 // then gets split up into couple of i32 loads on 32 bit targets.
10095 if (LegalOperations &&
10096 !TLI.isLoadLegal(VT, MemVT, FirstLoad->getAlign(),
10097 FirstLoad->getAddressSpace(),
10098 NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, false))
10099 return SDValue();
10100
10101 // The node we are looking at matches with the pattern, check if we can
10102 // replace it with a single (possibly zero-extended) load and bswap + shift if
10103 // needed.
10104
10105 // If the load needs byte swap check if the target supports it
10106 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
10107
10108 // Before legalize we can introduce illegal bswaps which will be later
10109 // converted to an explicit bswap sequence. This way we end up with a single
10110 // load and byte shuffling instead of several loads and byte shuffling.
10111 // We do not introduce illegal bswaps when zero-extending as this tends to
10112 // introduce too many arithmetic instructions.
10113 if (NeedsBswap && (LegalOperations || NeedsZext) &&
10114 !TLI.isOperationLegal(ISD::BSWAP, VT))
10115 return SDValue();
10116
10117 // If we need to bswap and zero extend, we have to insert a shift. Check that
10118 // it is legal.
10119 if (NeedsBswap && NeedsZext && LegalOperations &&
10120 !TLI.isOperationLegal(ISD::SHL, VT))
10121 return SDValue();
10122
10123 // Check that a load of the wide type is both allowed and fast on the target
10124 unsigned Fast = 0;
10125 bool Allowed =
10126 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
10127 *FirstLoad->getMemOperand(), &Fast);
10128 if (!Allowed || !Fast)
10129 return SDValue();
10130
10131 SDValue NewLoad =
10132 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
10133 Chain, FirstLoad->getBasePtr(),
10134 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
10135
10136 // Transfer chain users from old loads to the new load.
10137 for (LoadSDNode *L : Loads)
10138 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
10139
10140 if (!NeedsBswap)
10141 return NewLoad;
10142
10143 SDValue ShiftedLoad =
10144 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
10145 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
10146 VT, SDLoc(N)))
10147 : NewLoad;
10148 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
10149}
10150
10151// If the target has andn, bsl, or a similar bit-select instruction,
10152// we want to unfold masked merge, with canonical pattern of:
10153// | A | |B|
10154// ((x ^ y) & m) ^ y
10155// | D |
10156// Into:
10157// (x & m) | (y & ~m)
10158// If y is a constant, m is not a 'not', and the 'andn' does not work with
10159// immediates, we unfold into a different pattern:
10160// ~(~x & m) & (m | y)
10161// If x is a constant, m is a 'not', and the 'andn' does not work with
10162// immediates, we unfold into a different pattern:
10163// (x | ~m) & ~(~m & ~y)
10164// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
10165// the very least that breaks andnpd / andnps patterns, and because those
10166// patterns are simplified in IR and shouldn't be created in the DAG
10167SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
10168 assert(N->getOpcode() == ISD::XOR);
10169
10170 // Don't touch 'not' (i.e. where y = -1).
10171 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
10172 return SDValue();
10173
10174 EVT VT = N->getValueType(0);
10175
10176 // There are 3 commutable operators in the pattern,
10177 // so we have to deal with 8 possible variants of the basic pattern.
10178 SDValue X, Y, M;
10179 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
10180 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
10181 return false;
10182 SDValue Xor = And.getOperand(XorIdx);
10183 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
10184 return false;
10185 SDValue Xor0 = Xor.getOperand(0);
10186 SDValue Xor1 = Xor.getOperand(1);
10187 // Don't touch 'not' (i.e. where y = -1).
10188 if (isAllOnesOrAllOnesSplat(Xor1))
10189 return false;
10190 if (Other == Xor0)
10191 std::swap(Xor0, Xor1);
10192 if (Other != Xor1)
10193 return false;
10194 X = Xor0;
10195 Y = Xor1;
10196 M = And.getOperand(XorIdx ? 0 : 1);
10197 return true;
10198 };
10199
10200 SDValue N0 = N->getOperand(0);
10201 SDValue N1 = N->getOperand(1);
10202 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
10203 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
10204 return SDValue();
10205
10206 // Don't do anything if the mask is constant. This should not be reachable.
10207 // InstCombine should have already unfolded this pattern, and DAGCombiner
10208 // probably shouldn't produce it, too.
10209 if (isa<ConstantSDNode>(M.getNode()))
10210 return SDValue();
10211
10212 // We can transform if the target has AndNot
10213 if (!TLI.hasAndNot(M))
10214 return SDValue();
10215
10216 SDLoc DL(N);
10217
10218 // If Y is a constant, check that 'andn' works with immediates. Unless M is
10219 // a bitwise not that would already allow ANDN to be used.
10220 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
10221 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
10222 // If not, we need to do a bit more work to make sure andn is still used.
10223 SDValue NotX = DAG.getNOT(DL, X, VT);
10224 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
10225 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
10226 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
10227 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
10228 }
10229
10230 // If X is a constant and M is a bitwise not, check that 'andn' works with
10231 // immediates.
10232 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
10233 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
10234 // If not, we need to do a bit more work to make sure andn is still used.
10235 SDValue NotM = M.getOperand(0);
10236 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
10237 SDValue NotY = DAG.getNOT(DL, Y, VT);
10238 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
10239 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
10240 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
10241 }
10242
10243 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
10244 SDValue NotM = DAG.getNOT(DL, M, VT);
10245 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
10246
10247 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
10248}
10249
10250SDValue DAGCombiner::visitXOR(SDNode *N) {
10251 SDValue N0 = N->getOperand(0);
10252 SDValue N1 = N->getOperand(1);
10253 EVT VT = N0.getValueType();
10254 SDLoc DL(N);
10255
10256 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
10257 if (N0.isUndef() && N1.isUndef())
10258 return DAG.getConstant(0, DL, VT);
10259
10260 // fold (xor x, undef) -> undef
10261 if (N0.isUndef())
10262 return N0;
10263 if (N1.isUndef())
10264 return N1;
10265
10266 // fold (xor c1, c2) -> c1^c2
10267 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
10268 return C;
10269
10270 // canonicalize constant to RHS
10273 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
10274
10275 // fold vector ops
10276 if (VT.isVector()) {
10277 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10278 return FoldedVOp;
10279
10280 // fold (xor x, 0) -> x, vector edition
10282 return N0;
10283 }
10284
10285 // fold (xor x, 0) -> x
10286 if (isNullConstant(N1))
10287 return N0;
10288
10289 if (SDValue NewSel = foldBinOpIntoSelect(N))
10290 return NewSel;
10291
10292 // reassociate xor
10293 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
10294 return RXOR;
10295
10296 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
10297 if (SDValue SD =
10298 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
10299 return SD;
10300
10301 // fold (a^b) -> (a|b) iff a and b share no bits.
10302 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
10303 DAG.haveNoCommonBitsSet(N0, N1))
10304 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
10305
10306 // look for 'add-like' folds:
10307 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
10308 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
10310 if (SDValue Combined = visitADDLike(N))
10311 return Combined;
10312
10313 // fold not (setcc x, y, cc) -> setcc x y !cc
10314 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
10315 unsigned N0Opcode = N0.getOpcode();
10316 SDValue LHS, RHS, CC;
10317 if (TLI.isConstTrueVal(N1) &&
10318 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
10319 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
10320 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
10322 LHS.getValueType());
10323 if (!LegalOperations ||
10324 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
10325 switch (N0Opcode) {
10326 default:
10327 llvm_unreachable("Unhandled SetCC Equivalent!");
10328 case ISD::SETCC:
10329 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
10330 case ISD::SELECT_CC:
10331 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
10332 N0.getOperand(3), NotCC);
10333 case ISD::STRICT_FSETCC:
10334 case ISD::STRICT_FSETCCS: {
10335 if (N0.hasOneUse()) {
10336 // FIXME Can we handle multiple uses? Could we token factor the chain
10337 // results from the new/old setcc?
10338 SDValue SetCC =
10339 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
10340 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
10341 CombineTo(N, SetCC);
10342 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
10343 recursivelyDeleteUnusedNodes(N0.getNode());
10344 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10345 }
10346 break;
10347 }
10348 }
10349 }
10350 }
10351
10352 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10353 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10354 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
10355 SDValue V = N0.getOperand(0);
10356 SDLoc DL0(N0);
10357 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
10358 DAG.getConstant(1, DL0, V.getValueType()));
10359 AddToWorklist(V.getNode());
10360 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
10361 }
10362
10363 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10364 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10365 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
10366 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10367 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10368 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
10369 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10370 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10371 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10372 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10373 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10374 }
10375 }
10376 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10377 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10378 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
10379 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10380 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10381 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
10382 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10383 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10384 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10385 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10386 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10387 }
10388 }
10389
10390 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10391 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10392 SDValue Y = N0.getOperand(0);
10393 SDValue X = N0.getOperand(1);
10394
10395 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10396 APInt NotYValue = ~YConst->getAPIntValue();
10397 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10398 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10399 }
10400 }
10401
10402 // fold (not (add X, -1)) -> (neg X)
10403 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10405 return DAG.getNegative(N0.getOperand(0), DL, VT);
10406 }
10407
10408 // fold (xor (and x, y), y) -> (and (not x), y)
10409 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10410 SDValue X = N0.getOperand(0);
10411 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10412 AddToWorklist(NotX.getNode());
10413 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10414 }
10415
10416 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10417 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10418 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10419 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10420 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10421 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10422 SDValue S0 = S.getOperand(0);
10423 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10424 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10425 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10426 return DAG.getNode(ISD::ABS, DL, VT, S0);
10427 }
10428 }
10429
10430 // fold (xor x, x) -> 0
10431 if (N0 == N1)
10432 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10433
10434 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10435 // Here is a concrete example of this equivalence:
10436 // i16 x == 14
10437 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10438 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10439 //
10440 // =>
10441 //
10442 // i16 ~1 == 0b1111111111111110
10443 // i16 rol(~1, 14) == 0b1011111111111111
10444 //
10445 // Some additional tips to help conceptualize this transform:
10446 // - Try to see the operation as placing a single zero in a value of all ones.
10447 // - There exists no value for x which would allow the result to contain zero.
10448 // - Values of x larger than the bitwidth are undefined and do not require a
10449 // consistent result.
10450 // - Pushing the zero left requires shifting one bits in from the right.
10451 // A rotate left of ~1 is a nice way of achieving the desired result.
10452 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10454 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10455 N0.getOperand(1));
10456 }
10457
10458 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10459 if (N0Opcode == N1.getOpcode())
10460 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10461 return V;
10462
10463 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10464 return R;
10465 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10466 return R;
10467 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10468 return R;
10469
10470 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10471 if (SDValue MM = unfoldMaskedMerge(N))
10472 return MM;
10473
10474 // Simplify the expression using non-local knowledge.
10476 return SDValue(N, 0);
10477
10478 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10479 return Combined;
10480
10481 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10482 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10483 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10484 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10485 SDValue Op0;
10486 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10487 m_SMax(m_Value(Op0), m_Specific(N1)),
10488 m_UMin(m_Value(Op0), m_Specific(N1)),
10489 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10490
10491 if (isa<ConstantSDNode>(N1) ||
10493 // For vectors, only optimize when the constant is zero or all-ones to
10494 // avoid generating more instructions
10495 if (VT.isVector()) {
10496 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10497 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10498 return SDValue();
10499 }
10500
10501 // Avoid the fold if the minmax operation is legal and select is expensive
10502 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10504 return SDValue();
10505
10506 EVT CCVT = getSetCCResultType(VT);
10507 ISD::CondCode CC;
10508 switch (N0.getOpcode()) {
10509 case ISD::SMIN:
10510 CC = ISD::SETLT;
10511 break;
10512 case ISD::SMAX:
10513 CC = ISD::SETGT;
10514 break;
10515 case ISD::UMIN:
10516 CC = ISD::SETULT;
10517 break;
10518 case ISD::UMAX:
10519 CC = ISD::SETUGT;
10520 break;
10521 }
10522 SDValue FN1 = DAG.getFreeze(N1);
10523 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10524 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10525 SDValue Zero = DAG.getConstant(0, DL, VT);
10526 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10527 }
10528 }
10529
10530 return SDValue();
10531}
10532
10533/// If we have a shift-by-constant of a bitwise logic op that itself has a
10534/// shift-by-constant operand with identical opcode, we may be able to convert
10535/// that into 2 independent shifts followed by the logic op. This is a
10536/// throughput improvement.
10538 // Match a one-use bitwise logic op.
10539 SDValue LogicOp = Shift->getOperand(0);
10540 if (!LogicOp.hasOneUse())
10541 return SDValue();
10542
10543 unsigned LogicOpcode = LogicOp.getOpcode();
10544 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10545 LogicOpcode != ISD::XOR)
10546 return SDValue();
10547
10548 // Find a matching one-use shift by constant.
10549 unsigned ShiftOpcode = Shift->getOpcode();
10550 SDValue C1 = Shift->getOperand(1);
10551 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10552 assert(C1Node && "Expected a shift with constant operand");
10553 const APInt &C1Val = C1Node->getAPIntValue();
10554 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10555 const APInt *&ShiftAmtVal) {
10556 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10557 return false;
10558
10559 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10560 if (!ShiftCNode)
10561 return false;
10562
10563 // Capture the shifted operand and shift amount value.
10564 ShiftOp = V.getOperand(0);
10565 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10566
10567 // Shift amount types do not have to match their operand type, so check that
10568 // the constants are the same width.
10569 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10570 return false;
10571
10572 // The fold is not valid if the sum of the shift values doesn't fit in the
10573 // given shift amount type.
10574 bool Overflow = false;
10575 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10576 if (Overflow)
10577 return false;
10578
10579 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10580 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10581 return false;
10582
10583 return true;
10584 };
10585
10586 // Logic ops are commutative, so check each operand for a match.
10587 SDValue X, Y;
10588 const APInt *C0Val;
10589 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10590 Y = LogicOp.getOperand(1);
10591 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10592 Y = LogicOp.getOperand(0);
10593 else
10594 return SDValue();
10595
10596 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10597 SDLoc DL(Shift);
10598 EVT VT = Shift->getValueType(0);
10599 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10600 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10601 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10602 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10603 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10604 LogicOp->getFlags());
10605}
10606
10607/// Handle transforms common to the three shifts, when the shift amount is a
10608/// constant.
10609/// We are looking for: (shift being one of shl/sra/srl)
10610/// shift (binop X, C0), C1
10611/// And want to transform into:
10612/// binop (shift X, C1), (shift C0, C1)
10613SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10614 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10615
10616 // Do not turn a 'not' into a regular xor.
10617 if (isBitwiseNot(N->getOperand(0)))
10618 return SDValue();
10619
10620 // The inner binop must be one-use, since we want to replace it.
10621 SDValue LHS = N->getOperand(0);
10622 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10623 return SDValue();
10624
10625 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10626 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10627 return R;
10628
10629 // We want to pull some binops through shifts, so that we have (and (shift))
10630 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10631 // thing happens with address calculations, so it's important to canonicalize
10632 // it.
10633 switch (LHS.getOpcode()) {
10634 default:
10635 return SDValue();
10636 case ISD::OR:
10637 case ISD::XOR:
10638 case ISD::AND:
10639 break;
10640 case ISD::ADD:
10641 if (N->getOpcode() != ISD::SHL)
10642 return SDValue(); // only shl(add) not sr[al](add).
10643 break;
10644 }
10645
10646 // FIXME: disable this unless the input to the binop is a shift by a constant
10647 // or is copy/select. Enable this in other cases when figure out it's exactly
10648 // profitable.
10649 SDValue BinOpLHSVal = LHS.getOperand(0);
10650 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10651 BinOpLHSVal.getOpcode() == ISD::SRA ||
10652 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10653 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10654 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10655 BinOpLHSVal.getOpcode() == ISD::SELECT;
10656
10657 if (!IsShiftByConstant && !IsCopyOrSelect)
10658 return SDValue();
10659
10660 if (IsCopyOrSelect && N->hasOneUse())
10661 return SDValue();
10662
10663 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10664 SDLoc DL(N);
10665 EVT VT = N->getValueType(0);
10666 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10667 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10668 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10669 N->getOperand(1));
10670 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10671 }
10672
10673 return SDValue();
10674}
10675
10676SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10677 assert(N->getOpcode() == ISD::TRUNCATE);
10678 assert(N->getOperand(0).getOpcode() == ISD::AND);
10679
10680 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10681 EVT TruncVT = N->getValueType(0);
10682 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10683 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10684 SDValue N01 = N->getOperand(0).getOperand(1);
10685 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10686 SDLoc DL(N);
10687 SDValue N00 = N->getOperand(0).getOperand(0);
10688 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10689 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10690 AddToWorklist(Trunc00.getNode());
10691 AddToWorklist(Trunc01.getNode());
10692 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10693 }
10694 }
10695
10696 return SDValue();
10697}
10698
10699SDValue DAGCombiner::visitRotate(SDNode *N) {
10700 SDLoc dl(N);
10701 SDValue N0 = N->getOperand(0);
10702 SDValue N1 = N->getOperand(1);
10703 EVT VT = N->getValueType(0);
10704 unsigned Bitsize = VT.getScalarSizeInBits();
10705
10706 // fold (rot x, 0) -> x
10707 if (isNullOrNullSplat(N1))
10708 return N0;
10709
10710 // fold (rot x, c) -> x iff (c % BitSize) == 0
10711 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10712 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10713 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10714 return N0;
10715 }
10716
10717 // fold (rot x, c) -> (rot x, c % BitSize)
10718 bool OutOfRange = false;
10719 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10720 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10721 return true;
10722 };
10723 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10724 EVT AmtVT = N1.getValueType();
10725 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10726 if (SDValue Amt =
10727 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10728 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10729 }
10730
10731 // rot i16 X, 8 --> bswap X
10732 auto *RotAmtC = isConstOrConstSplat(N1);
10733 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10734 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10735 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10736
10737 // Simplify the operands using demanded-bits information.
10739 return SDValue(N, 0);
10740
10741 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10742 if (N1.getOpcode() == ISD::TRUNCATE &&
10743 N1.getOperand(0).getOpcode() == ISD::AND) {
10744 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10745 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10746 }
10747
10748 unsigned NextOp = N0.getOpcode();
10749
10750 // fold (rot* (rot* x, c2), c1)
10751 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10752 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10753 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10755 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10756 EVT ShiftVT = N1.getValueType();
10757 bool SameSide = (N->getOpcode() == NextOp);
10758 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10759 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10760 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10761 {N1, BitsizeC});
10762 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10763 {N0.getOperand(1), BitsizeC});
10764 if (Norm1 && Norm2)
10765 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10766 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10767 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10768 {CombinedShift, BitsizeC});
10769 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10770 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10771 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10772 CombinedShiftNorm);
10773 }
10774 }
10775 }
10776 return SDValue();
10777}
10778
10779SDValue DAGCombiner::visitSHL(SDNode *N) {
10780 SDValue N0 = N->getOperand(0);
10781 SDValue N1 = N->getOperand(1);
10782 if (SDValue V = DAG.simplifyShift(N0, N1))
10783 return V;
10784
10785 SDLoc DL(N);
10786 EVT VT = N0.getValueType();
10787 EVT ShiftVT = N1.getValueType();
10788 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10789
10790 // fold (shl c1, c2) -> c1<<c2
10791 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10792 return C;
10793
10794 // fold vector ops
10795 if (VT.isVector()) {
10796 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10797 return FoldedVOp;
10798
10799 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10800 // If setcc produces all-one true value then:
10801 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10802 if (N1CV && N1CV->isConstant()) {
10803 if (N0.getOpcode() == ISD::AND) {
10804 SDValue N00 = N0->getOperand(0);
10805 SDValue N01 = N0->getOperand(1);
10806 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10807
10808 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10811 if (SDValue C =
10812 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10813 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10814 }
10815 }
10816 }
10817 }
10818
10819 if (SDValue NewSel = foldBinOpIntoSelect(N))
10820 return NewSel;
10821
10822 // if (shl x, c) is known to be zero, return 0
10823 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10824 return DAG.getConstant(0, DL, VT);
10825
10826 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10827 if (N1.getOpcode() == ISD::TRUNCATE &&
10828 N1.getOperand(0).getOpcode() == ISD::AND) {
10829 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10830 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10831 }
10832
10833 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10834 if (N0.getOpcode() == ISD::SHL) {
10835 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10836 ConstantSDNode *RHS) {
10837 APInt c1 = LHS->getAPIntValue();
10838 APInt c2 = RHS->getAPIntValue();
10839 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10840 return (c1 + c2).uge(OpSizeInBits);
10841 };
10842 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10843 return DAG.getConstant(0, DL, VT);
10844
10845 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10846 ConstantSDNode *RHS) {
10847 APInt c1 = LHS->getAPIntValue();
10848 APInt c2 = RHS->getAPIntValue();
10849 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10850 return (c1 + c2).ult(OpSizeInBits);
10851 };
10852 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10853 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10854 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10855 }
10856 }
10857
10858 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10859 // For this to be valid, the second form must not preserve any of the bits
10860 // that are shifted out by the inner shift in the first form. This means
10861 // the outer shift size must be >= the number of bits added by the ext.
10862 // As a corollary, we don't care what kind of ext it is.
10863 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10864 N0.getOpcode() == ISD::ANY_EXTEND ||
10865 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10866 N0.getOperand(0).getOpcode() == ISD::SHL) {
10867 SDValue N0Op0 = N0.getOperand(0);
10868 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10869 EVT InnerVT = N0Op0.getValueType();
10870 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10871
10872 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10873 ConstantSDNode *RHS) {
10874 APInt c1 = LHS->getAPIntValue();
10875 APInt c2 = RHS->getAPIntValue();
10876 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10877 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10878 (c1 + c2).uge(OpSizeInBits);
10879 };
10880 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10881 /*AllowUndefs*/ false,
10882 /*AllowTypeMismatch*/ true))
10883 return DAG.getConstant(0, DL, VT);
10884
10885 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10886 ConstantSDNode *RHS) {
10887 APInt c1 = LHS->getAPIntValue();
10888 APInt c2 = RHS->getAPIntValue();
10889 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10890 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10891 (c1 + c2).ult(OpSizeInBits);
10892 };
10893 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10894 /*AllowUndefs*/ false,
10895 /*AllowTypeMismatch*/ true)) {
10896 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10897 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10898 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10899 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10900 }
10901 }
10902
10903 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10904 // Only fold this if the inner zext has no other uses to avoid increasing
10905 // the total number of instructions.
10906 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10907 N0.getOperand(0).getOpcode() == ISD::SRL) {
10908 SDValue N0Op0 = N0.getOperand(0);
10909 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10910
10911 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10912 APInt c1 = LHS->getAPIntValue();
10913 APInt c2 = RHS->getAPIntValue();
10914 zeroExtendToMatch(c1, c2);
10915 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10916 };
10917 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10918 /*AllowUndefs*/ false,
10919 /*AllowTypeMismatch*/ true)) {
10920 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10921 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10922 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10923 AddToWorklist(NewSHL.getNode());
10924 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10925 }
10926 }
10927
10928 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10929 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10930 ConstantSDNode *RHS) {
10931 const APInt &LHSC = LHS->getAPIntValue();
10932 const APInt &RHSC = RHS->getAPIntValue();
10933 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10934 LHSC.getZExtValue() <= RHSC.getZExtValue();
10935 };
10936
10937 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10938 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10939 if (N0->getFlags().hasExact()) {
10940 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10941 /*AllowUndefs*/ false,
10942 /*AllowTypeMismatch*/ true)) {
10943 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10944 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10945 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10946 }
10947 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10948 /*AllowUndefs*/ false,
10949 /*AllowTypeMismatch*/ true)) {
10950 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10951 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10952 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10953 }
10954 }
10955
10956 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10957 // (and (srl x, (sub c1, c2), MASK)
10958 // Only fold this if the inner shift has no other uses -- if it does,
10959 // folding this will increase the total number of instructions.
10960 if (N0.getOpcode() == ISD::SRL &&
10961 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10963 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10964 /*AllowUndefs*/ false,
10965 /*AllowTypeMismatch*/ true)) {
10966 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10967 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10968 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10969 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10970 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10971 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10972 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10973 }
10974 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10975 /*AllowUndefs*/ false,
10976 /*AllowTypeMismatch*/ true)) {
10977 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10978 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10979 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10980 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10981 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10982 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10983 }
10984 }
10985 }
10986
10987 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10988 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10989 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10990 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10991 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10992 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10993 }
10994
10995 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10996 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10997 // Variant of version done on multiply, except mul by a power of 2 is turned
10998 // into a shift.
10999 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
11000 TLI.isDesirableToCommuteWithShift(N, Level)) {
11001 SDValue N01 = N0.getOperand(1);
11002 if (SDValue Shl1 =
11003 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
11004 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
11005 AddToWorklist(Shl0.getNode());
11006 SDNodeFlags Flags;
11007 // Preserve the disjoint flag for Or.
11008 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
11010 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
11011 }
11012 }
11013
11014 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
11015 // TODO: Add zext/add_nuw variant with suitable test coverage
11016 // TODO: Should we limit this with isLegalAddImmediate?
11017 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
11018 N0.getOperand(0).getOpcode() == ISD::ADD &&
11019 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
11020 TLI.isDesirableToCommuteWithShift(N, Level)) {
11021 SDValue Add = N0.getOperand(0);
11022 SDLoc DL(N0);
11023 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
11024 {Add.getOperand(1)})) {
11025 if (SDValue ShlC =
11026 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
11027 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
11028 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
11029 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
11030 }
11031 }
11032 }
11033
11034 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
11035 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
11036 SDValue N01 = N0.getOperand(1);
11037 if (SDValue Shl =
11038 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
11039 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
11040 }
11041
11042 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11043 if (N1C && !N1C->isOpaque())
11044 if (SDValue NewSHL = visitShiftByConstant(N))
11045 return NewSHL;
11046
11047 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
11048 // target.
11049 if (((N1.getOpcode() == ISD::CTTZ &&
11050 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
11052 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
11054 SDValue Y = N1.getOperand(0);
11055 SDLoc DL(N);
11056 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
11057 SDValue And =
11058 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
11059 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
11060 }
11061
11063 return SDValue(N, 0);
11064
11065 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
11066 if (N0.getOpcode() == ISD::VSCALE && N1C) {
11067 const APInt &C0 = N0.getConstantOperandAPInt(0);
11068 const APInt &C1 = N1C->getAPIntValue();
11069 return DAG.getVScale(DL, VT, C0 << C1);
11070 }
11071
11072 SDValue X;
11073 APInt VS0;
11074
11075 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
11076 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
11077 SDNodeFlags Flags;
11078 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
11079 N0->getFlags().hasNoUnsignedWrap());
11080
11081 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
11082 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
11083 }
11084
11085 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
11086 APInt ShlVal;
11087 if (N0.getOpcode() == ISD::STEP_VECTOR &&
11088 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
11089 const APInt &C0 = N0.getConstantOperandAPInt(0);
11090 if (ShlVal.ult(C0.getBitWidth())) {
11091 APInt NewStep = C0 << ShlVal;
11092 return DAG.getStepVector(DL, VT, NewStep);
11093 }
11094 }
11095
11096 return SDValue();
11097}
11098
11099// Transform a right shift of a multiply into a multiply-high.
11100// Examples:
11101// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
11102// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
11104 const TargetLowering &TLI) {
11105 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
11106 "SRL or SRA node is required here!");
11107
11108 // Check the shift amount. Proceed with the transformation if the shift
11109 // amount is constant.
11110 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
11111 if (!ShiftAmtSrc)
11112 return SDValue();
11113
11114 // The operation feeding into the shift must be a multiply.
11115 SDValue ShiftOperand = N->getOperand(0);
11116 if (ShiftOperand.getOpcode() != ISD::MUL)
11117 return SDValue();
11118
11119 // Both operands must be equivalent extend nodes.
11120 SDValue LeftOp = ShiftOperand.getOperand(0);
11121 SDValue RightOp = ShiftOperand.getOperand(1);
11122
11123 if (LeftOp.getOpcode() != ISD::SIGN_EXTEND &&
11124 LeftOp.getOpcode() != ISD::ZERO_EXTEND)
11125 std::swap(LeftOp, RightOp);
11126
11127 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
11128 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
11129
11130 if (!IsSignExt && !IsZeroExt)
11131 return SDValue();
11132
11133 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
11134 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
11135
11136 // return true if U may use the lower bits of its operands
11137 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
11138 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
11139 return true;
11140 }
11141 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
11142 if (!UShiftAmtSrc) {
11143 return true;
11144 }
11145 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
11146 return UShiftAmt < NarrowVTSize;
11147 };
11148
11149 // If the lower part of the MUL is also used and MUL_LOHI is supported
11150 // do not introduce the MULH in favor of MUL_LOHI
11151 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11152 if (!ShiftOperand.hasOneUse() &&
11153 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
11154 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
11155 return SDValue();
11156 }
11157
11158 SDValue MulhRightOp;
11159 if (LeftOp.getOpcode() != RightOp.getOpcode()) {
11160 if (IsZeroExt && ShiftOperand.hasOneUse() &&
11161 DAG.computeKnownBits(RightOp).countMaxActiveBits() <= NarrowVTSize) {
11162 MulhRightOp = DAG.getNode(ISD::TRUNCATE, DL, NarrowVT, RightOp);
11163 } else if (IsSignExt && ShiftOperand.hasOneUse() &&
11164 DAG.ComputeMaxSignificantBits(RightOp) <= NarrowVTSize) {
11165 MulhRightOp = DAG.getNode(ISD::TRUNCATE, DL, NarrowVT, RightOp);
11166 } else {
11167 return SDValue();
11168 }
11169 } else {
11170 // Check that the two extend nodes are the same type.
11171 if (NarrowVT != RightOp.getOperand(0).getValueType())
11172 return SDValue();
11173 MulhRightOp = RightOp.getOperand(0);
11174 }
11175
11176 EVT WideVT = LeftOp.getValueType();
11177 // Proceed with the transformation if the wide types match.
11178 assert((WideVT == RightOp.getValueType()) &&
11179 "Cannot have a multiply node with two different operand types.");
11180
11181 // Proceed with the transformation if the wide type is twice as large
11182 // as the narrow type.
11183 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
11184 return SDValue();
11185
11186 // Check the shift amount with the narrow type size.
11187 // Proceed with the transformation if the shift amount is the width
11188 // of the narrow type.
11189 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
11190 if (ShiftAmt != NarrowVTSize)
11191 return SDValue();
11192
11193 // If the operation feeding into the MUL is a sign extend (sext),
11194 // we use mulhs. Othewise, zero extends (zext) use mulhu.
11195 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
11196
11197 // Combine to mulh if mulh is legal/custom for the narrow type on the target
11198 // or if it is a vector type then we could transform to an acceptable type and
11199 // rely on legalization to split/combine the result.
11200 EVT TransformVT = NarrowVT;
11201 if (NarrowVT.isVector()) {
11202 TransformVT = TLI.getLegalTypeToTransformTo(*DAG.getContext(), NarrowVT);
11203 if (TransformVT.getScalarType() != NarrowVT.getScalarType())
11204 return SDValue();
11205 }
11206 if (!TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
11207 return SDValue();
11208
11209 SDValue Result =
11210 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
11211 bool IsSigned = N->getOpcode() == ISD::SRA;
11212 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
11213}
11214
11215// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
11216// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
11218 unsigned Opcode = N->getOpcode();
11219 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
11220 return SDValue();
11221
11222 SDValue N0 = N->getOperand(0);
11223 EVT VT = N->getValueType(0);
11224 SDLoc DL(N);
11225 SDValue X, Y;
11226
11227 // If both operands are bswap/bitreverse, ignore the multiuse
11229 m_UnaryOp(Opcode, m_Value(Y))))))
11230 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
11231
11232 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
11234 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
11235 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
11236 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
11237 }
11238
11239 return SDValue();
11240}
11241
11242SDValue DAGCombiner::visitSRA(SDNode *N) {
11243 SDValue N0 = N->getOperand(0);
11244 SDValue N1 = N->getOperand(1);
11245 if (SDValue V = DAG.simplifyShift(N0, N1))
11246 return V;
11247
11248 SDLoc DL(N);
11249 EVT VT = N0.getValueType();
11250 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11251
11252 // fold (sra c1, c2) -> (sra c1, c2)
11253 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
11254 return C;
11255
11256 // Arithmetic shifting an all-sign-bit value is a no-op.
11257 // fold (sra 0, x) -> 0
11258 // fold (sra -1, x) -> -1
11259 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
11260 return N0;
11261
11262 // fold vector ops
11263 if (VT.isVector())
11264 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11265 return FoldedVOp;
11266
11267 if (SDValue NewSel = foldBinOpIntoSelect(N))
11268 return NewSel;
11269
11270 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11271
11272 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
11273 // clamp (add c1, c2) to max shift.
11274 if (N0.getOpcode() == ISD::SRA) {
11275 EVT ShiftVT = N1.getValueType();
11276 EVT ShiftSVT = ShiftVT.getScalarType();
11277 SmallVector<SDValue, 16> ShiftValues;
11278
11279 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
11280 APInt c1 = LHS->getAPIntValue();
11281 APInt c2 = RHS->getAPIntValue();
11282 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11283 APInt Sum = c1 + c2;
11284 unsigned ShiftSum =
11285 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
11286 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
11287 return true;
11288 };
11289 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
11290 SDValue ShiftValue;
11291 if (N1.getOpcode() == ISD::BUILD_VECTOR)
11292 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
11293 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
11294 assert(ShiftValues.size() == 1 &&
11295 "Expected matchBinaryPredicate to return one element for "
11296 "SPLAT_VECTORs");
11297 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
11298 } else
11299 ShiftValue = ShiftValues[0];
11300 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
11301 }
11302 }
11303
11304 // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
11305 // This allows merging two arithmetic shifts even when there's a NOT in
11306 // between.
11307 SDValue X;
11308 APInt C1;
11309 if (N1C && sd_match(N0, m_OneUse(m_Not(
11310 m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) {
11311 APInt C2 = N1C->getAPIntValue();
11312 zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */);
11313 APInt Sum = C1 + C2;
11314 unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1);
11315 SDValue NewShift = DAG.getNode(
11316 ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL));
11317 return DAG.getNOT(DL, NewShift, VT);
11318 }
11319
11320 // fold (sra (shl X, m), (sub result_size, n))
11321 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
11322 // result_size - n != m.
11323 // If truncate is free for the target sext(shl) is likely to result in better
11324 // code.
11325 if (N0.getOpcode() == ISD::SHL && N1C) {
11326 // Get the two constants of the shifts, CN0 = m, CN = n.
11327 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
11328 if (N01C) {
11329 LLVMContext &Ctx = *DAG.getContext();
11330 // Determine what the truncate's result bitsize and type would be.
11331 EVT TruncVT = VT.changeElementType(
11332 Ctx, EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()));
11333
11334 // Determine the residual right-shift amount.
11335 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
11336
11337 // If the shift is not a no-op (in which case this should be just a sign
11338 // extend already), the truncated to type is legal, sign_extend is legal
11339 // on that type, and the truncate to that type is both legal and free,
11340 // perform the transform.
11341 if ((ShiftAmt > 0) &&
11344 TLI.isTruncateFree(VT, TruncVT)) {
11345 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
11346 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
11347 N0.getOperand(0), Amt);
11348 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
11349 Shift);
11350 return DAG.getNode(ISD::SIGN_EXTEND, DL,
11351 N->getValueType(0), Trunc);
11352 }
11353 }
11354 }
11355
11356 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
11357 // sra (add (shl X, N1C), AddC), N1C -->
11358 // sext (add (trunc X to (width - N1C)), AddC')
11359 // sra (sub AddC, (shl X, N1C)), N1C -->
11360 // sext (sub AddC1',(trunc X to (width - N1C)))
11361 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
11362 N0.hasOneUse()) {
11363 bool IsAdd = N0.getOpcode() == ISD::ADD;
11364 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
11365 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
11366 Shl.hasOneUse()) {
11367 // TODO: AddC does not need to be a splat.
11368 if (ConstantSDNode *AddC =
11369 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
11370 // Determine what the truncate's type would be and ask the target if
11371 // that is a free operation.
11372 LLVMContext &Ctx = *DAG.getContext();
11373 unsigned ShiftAmt = N1C->getZExtValue();
11374 EVT TruncVT = VT.changeElementType(
11375 Ctx, EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt));
11376
11377 // TODO: The simple type check probably belongs in the default hook
11378 // implementation and/or target-specific overrides (because
11379 // non-simple types likely require masking when legalized), but
11380 // that restriction may conflict with other transforms.
11381 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
11382 TLI.isTruncateFree(VT, TruncVT)) {
11383 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
11384 SDValue ShiftC =
11385 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
11386 TruncVT.getScalarSizeInBits()),
11387 DL, TruncVT);
11388 SDValue Add;
11389 if (IsAdd)
11390 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
11391 else
11392 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
11393 return DAG.getSExtOrTrunc(Add, DL, VT);
11394 }
11395 }
11396 }
11397 }
11398
11399 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11400 if (N1.getOpcode() == ISD::TRUNCATE &&
11401 N1.getOperand(0).getOpcode() == ISD::AND) {
11402 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11403 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11404 }
11405
11406 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11407 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11408 // if c1 is equal to the number of bits the trunc removes
11409 // TODO - support non-uniform vector shift amounts.
11410 if (N0.getOpcode() == ISD::TRUNCATE &&
11411 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11412 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11413 N0.getOperand(0).hasOneUse() &&
11414 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11415 SDValue N0Op0 = N0.getOperand(0);
11416 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11417 EVT LargeVT = N0Op0.getValueType();
11418 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11419 if (LargeShift->getAPIntValue() == TruncBits) {
11420 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11421 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11422 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11423 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11424 SDValue SRA =
11425 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11426 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11427 }
11428 }
11429 }
11430
11431 // Simplify, based on bits shifted out of the LHS.
11433 return SDValue(N, 0);
11434
11435 // If the sign bit is known to be zero, switch this to a SRL.
11436 if (DAG.SignBitIsZero(N0))
11437 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11438
11439 if (N1C && !N1C->isOpaque())
11440 if (SDValue NewSRA = visitShiftByConstant(N))
11441 return NewSRA;
11442
11443 // Try to transform this shift into a multiply-high if
11444 // it matches the appropriate pattern detected in combineShiftToMULH.
11445 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11446 return MULH;
11447
11448 // Attempt to convert a sra of a load into a narrower sign-extending load.
11449 if (SDValue NarrowLoad = reduceLoadWidth(N))
11450 return NarrowLoad;
11451
11452 if (SDValue AVG = foldShiftToAvg(N, DL))
11453 return AVG;
11454
11455 return SDValue();
11456}
11457
11458SDValue DAGCombiner::visitSRL(SDNode *N) {
11459 SDValue N0 = N->getOperand(0);
11460 SDValue N1 = N->getOperand(1);
11461 if (SDValue V = DAG.simplifyShift(N0, N1))
11462 return V;
11463
11464 SDLoc DL(N);
11465 EVT VT = N0.getValueType();
11466 EVT ShiftVT = N1.getValueType();
11467 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11468
11469 // fold (srl c1, c2) -> c1 >>u c2
11470 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11471 return C;
11472
11473 // fold vector ops
11474 if (VT.isVector())
11475 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11476 return FoldedVOp;
11477
11478 if (SDValue NewSel = foldBinOpIntoSelect(N))
11479 return NewSel;
11480
11481 // if (srl x, c) is known to be zero, return 0
11482 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11483 if (N1C &&
11484 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11485 return DAG.getConstant(0, DL, VT);
11486
11487 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11488 if (N0.getOpcode() == ISD::SRL) {
11489 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11490 ConstantSDNode *RHS) {
11491 APInt c1 = LHS->getAPIntValue();
11492 APInt c2 = RHS->getAPIntValue();
11493 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11494 return (c1 + c2).uge(OpSizeInBits);
11495 };
11496 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11497 return DAG.getConstant(0, DL, VT);
11498
11499 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11500 ConstantSDNode *RHS) {
11501 APInt c1 = LHS->getAPIntValue();
11502 APInt c2 = RHS->getAPIntValue();
11503 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11504 return (c1 + c2).ult(OpSizeInBits);
11505 };
11506 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11507 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11508 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11509 }
11510 }
11511
11512 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11513 N0.getOperand(0).getOpcode() == ISD::SRL) {
11514 SDValue InnerShift = N0.getOperand(0);
11515 // TODO - support non-uniform vector shift amounts.
11516 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11517 uint64_t c1 = N001C->getZExtValue();
11518 uint64_t c2 = N1C->getZExtValue();
11519 EVT InnerShiftVT = InnerShift.getValueType();
11520 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11521 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11522 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11523 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11524 if (c1 + OpSizeInBits == InnerShiftSize) {
11525 if (c1 + c2 >= InnerShiftSize)
11526 return DAG.getConstant(0, DL, VT);
11527 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11528 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11529 InnerShift.getOperand(0), NewShiftAmt);
11530 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11531 }
11532 // In the more general case, we can clear the high bits after the shift:
11533 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11534 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11535 c1 + c2 < InnerShiftSize) {
11536 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11537 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11538 InnerShift.getOperand(0), NewShiftAmt);
11539 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11540 OpSizeInBits - c2),
11541 DL, InnerShiftVT);
11542 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11543 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11544 }
11545 }
11546 }
11547
11548 if (N0.getOpcode() == ISD::SHL) {
11549 // fold (srl (shl nuw x, c), c) -> x
11550 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11551 return N0.getOperand(0);
11552
11553 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11554 // (and (srl x, (sub c2, c1), MASK)
11555 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11557 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11558 ConstantSDNode *RHS) {
11559 const APInt &LHSC = LHS->getAPIntValue();
11560 const APInt &RHSC = RHS->getAPIntValue();
11561 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11562 LHSC.getZExtValue() <= RHSC.getZExtValue();
11563 };
11564 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11565 /*AllowUndefs*/ false,
11566 /*AllowTypeMismatch*/ true)) {
11567 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11568 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11569 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11570 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11571 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11572 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11573 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11574 }
11575 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11576 /*AllowUndefs*/ false,
11577 /*AllowTypeMismatch*/ true)) {
11578 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11579 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11580 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11581 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11582 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11583 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11584 }
11585 }
11586 }
11587
11588 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11589 // TODO - support non-uniform vector shift amounts.
11590 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11591 // Shifting in all undef bits?
11592 EVT SmallVT = N0.getOperand(0).getValueType();
11593 unsigned BitSize = SmallVT.getScalarSizeInBits();
11594 if (N1C->getAPIntValue().uge(BitSize))
11595 return DAG.getUNDEF(VT);
11596
11597 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11598 uint64_t ShiftAmt = N1C->getZExtValue();
11599 SDLoc DL0(N0);
11600 SDValue SmallShift =
11601 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11602 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11603 AddToWorklist(SmallShift.getNode());
11604 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11605 return DAG.getNode(ISD::AND, DL, VT,
11606 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11607 DAG.getConstant(Mask, DL, VT));
11608 }
11609 }
11610
11611 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11612 // bit, which is unmodified by sra.
11613 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11614 if (N0.getOpcode() == ISD::SRA)
11615 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11616 }
11617
11618 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11619 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11620 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11621 isPowerOf2_32(OpSizeInBits) &&
11622 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11623 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11624
11625 // If any of the input bits are KnownOne, then the input couldn't be all
11626 // zeros, thus the result of the srl will always be zero.
11627 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11628
11629 // If all of the bits input the to ctlz node are known to be zero, then
11630 // the result of the ctlz is "32" and the result of the shift is one.
11631 APInt UnknownBits = ~Known.Zero;
11632 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11633
11634 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11635 if (UnknownBits.isPowerOf2()) {
11636 // Okay, we know that only that the single bit specified by UnknownBits
11637 // could be set on input to the CTLZ node. If this bit is set, the SRL
11638 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11639 // to an SRL/XOR pair, which is likely to simplify more.
11640 unsigned ShAmt = UnknownBits.countr_zero();
11641 SDValue Op = N0.getOperand(0);
11642
11643 if (ShAmt) {
11644 SDLoc DL(N0);
11645 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11646 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11647 AddToWorklist(Op.getNode());
11648 }
11649 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11650 }
11651 }
11652
11653 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11654 if (N1.getOpcode() == ISD::TRUNCATE &&
11655 N1.getOperand(0).getOpcode() == ISD::AND) {
11656 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11657 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11658 }
11659
11660 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11661 // -> (logic_op (srl x, c1), (zext y))
11662 // c1 <= leadingzeros(zext(y))
11663 // TODO: Replace c1 with valuetracking?
11664 SDValue X, ZExtY;
11665 if (sd_match(
11666 N0,
11668 m_Value(X),
11670 m_Specific(N1))))))) {
11671 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11673 if (N1C && N1C->getZExtValue() <= NumLeadingZeros)
11674 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11675 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11676 }
11677
11678 // fold (srl (bitcast (build_vector e1, ..., eN)), (N-1) * eltsize)
11679 // -> (zext eN)
11680 if (N1C && VT.isScalarInteger() && DAG.getDataLayout().isLittleEndian()) {
11682 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
11683 EVT BVVT = BV.getValueType();
11684 unsigned EltSizeInBits = BVVT.getScalarSizeInBits();
11685 unsigned NumElts = BVVT.getVectorNumElements();
11686 if (N1C->getZExtValue() == (NumElts - 1) * EltSizeInBits) {
11687 SDValue LastElt = BV.getOperand(NumElts - 1);
11688 assert(LastElt.getScalarValueSizeInBits() >= EltSizeInBits &&
11689 "Expected BUILD_VECTOR operand as wide as element type");
11690 EVT IntEltVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits);
11691 LastElt = DAG.getBitcast(LastElt.getValueType().changeTypeToInteger(),
11692 LastElt);
11693 return DAG.getZExtOrTrunc(DAG.getZExtOrTrunc(LastElt, DL, IntEltVT), DL,
11694 VT);
11695 }
11696 }
11697 }
11698
11699 // fold operands of srl based on knowledge that the low bits are not
11700 // demanded.
11702 return SDValue(N, 0);
11703
11704 if (N1C && !N1C->isOpaque())
11705 if (SDValue NewSRL = visitShiftByConstant(N))
11706 return NewSRL;
11707
11708 // Attempt to convert a srl of a load into a narrower zero-extending load.
11709 if (SDValue NarrowLoad = reduceLoadWidth(N))
11710 return NarrowLoad;
11711
11712 // Here is a common situation. We want to optimize:
11713 //
11714 // %a = ...
11715 // %b = and i32 %a, 2
11716 // %c = srl i32 %b, 1
11717 // brcond i32 %c ...
11718 //
11719 // into
11720 //
11721 // %a = ...
11722 // %b = and %a, 2
11723 // %c = setcc eq %b, 0
11724 // brcond %c ...
11725 //
11726 // However when after the source operand of SRL is optimized into AND, the SRL
11727 // itself may not be optimized further. Look for it and add the BRCOND into
11728 // the worklist.
11729 //
11730 // The also tends to happen for binary operations when SimplifyDemandedBits
11731 // is involved.
11732 //
11733 // FIXME: This is unecessary if we process the DAG in topological order,
11734 // which we plan to do. This workaround can be removed once the DAG is
11735 // processed in topological order.
11736 if (N->hasOneUse()) {
11737 SDNode *User = *N->user_begin();
11738
11739 // Look pass the truncate.
11740 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11741 User = *User->user_begin();
11742
11743 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11744 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11745 AddToWorklist(User);
11746 }
11747
11748 // Try to transform this shift into a multiply-high if
11749 // it matches the appropriate pattern detected in combineShiftToMULH.
11750 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11751 return MULH;
11752
11753 if (SDValue AVG = foldShiftToAvg(N, DL))
11754 return AVG;
11755
11756 SDValue Y;
11757 if (VT.getScalarSizeInBits() % 2 == 0 && N1C) {
11758 // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y).
11759 unsigned HalfBW = VT.getScalarSizeInBits() / 2;
11760 if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
11761 X.getScalarValueSizeInBits() == HalfBW &&
11762 Y.getScalarValueSizeInBits() == HalfBW) {
11763 if (N1C->getZExtValue() == HalfBW - 1 &&
11764 (!LegalOperations ||
11765 TLI.isOperationLegalOrCustom(ISD::CLMULR, X.getValueType())))
11766 return DAG.getNode(
11767 ISD::ZERO_EXTEND, DL, VT,
11768 DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y));
11769 if (N1C->getZExtValue() == HalfBW &&
11770 (!LegalOperations ||
11771 TLI.isOperationLegalOrCustom(ISD::CLMULH, X.getValueType())))
11772 return DAG.getNode(
11773 ISD::ZERO_EXTEND, DL, VT,
11774 DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y));
11775 }
11776 }
11777
11778 // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 ->
11779 // clmulh(x, y).
11780 if (N1C && N1C->getZExtValue() == 1 &&
11782 m_BitReverse(m_Value(Y))))))
11783 return DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
11784
11785 return SDValue();
11786}
11787
11788SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11789 EVT VT = N->getValueType(0);
11790 SDValue N0 = N->getOperand(0);
11791 SDValue N1 = N->getOperand(1);
11792 SDValue N2 = N->getOperand(2);
11793 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11794 unsigned BitWidth = VT.getScalarSizeInBits();
11795 SDLoc DL(N);
11796
11797 // fold (fshl/fshr C0, C1, C2) -> C3
11798 if (SDValue C =
11799 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11800 return C;
11801
11802 // fold (fshl N0, N1, 0) -> N0
11803 // fold (fshr N0, N1, 0) -> N1
11805 if (DAG.MaskedValueIsZero(
11806 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11807 return IsFSHL ? N0 : N1;
11808
11809 auto IsUndefOrZero = [](SDValue V) {
11810 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11811 };
11812
11813 // TODO - support non-uniform vector shift amounts.
11814 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11815 EVT ShAmtTy = N2.getValueType();
11816
11817 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11818 if (Cst->getAPIntValue().uge(BitWidth)) {
11819 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11820 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11821 DAG.getConstant(RotAmt, DL, ShAmtTy));
11822 }
11823
11824 unsigned ShAmt = Cst->getZExtValue();
11825 if (ShAmt == 0)
11826 return IsFSHL ? N0 : N1;
11827
11828 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11829 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11830 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11831 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11832 if (IsUndefOrZero(N0))
11833 return DAG.getNode(
11834 ISD::SRL, DL, VT, N1,
11835 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11836 if (IsUndefOrZero(N1))
11837 return DAG.getNode(
11838 ISD::SHL, DL, VT, N0,
11839 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11840
11841 // fold fshl(N0, N1, c) -> x and fshr(N0, N1, c) -> x
11842 // where N0 is any node that contributes "x >> C0" to the result:
11843 // lshr(x, C0) | fshr(_, x, C0) | fshl(_, x, C1)
11844 // and N1 is any node that contributes "x << C1" to the result:
11845 // shl(x, C1) | fshl(x, _, C1) | fshr(x, _, C0)
11846 // with C0 = IsFSHL ? amnt : BW-amnt, C1 = BW - C0
11847
11848 // ShAmt == 0 was handled above; uge(BitWidth) was reduced via modulo above.
11849 assert(ShAmt >= 1 && ShAmt < BitWidth &&
11850 "ShAmt must be in [1, BW-1] for the identity fold to be valid");
11851 SDValue Val;
11852 unsigned C0Expected = IsFSHL ? ShAmt : BitWidth - ShAmt;
11853 unsigned C1Expected = IsFSHL ? BitWidth - ShAmt : ShAmt;
11854
11855 if ((sd_match(N0, m_Srl(m_Value(Val), m_SpecificInt(C0Expected))) ||
11857 m_SpecificInt(C0Expected))) ||
11859 m_SpecificInt(C1Expected)))) &&
11860 (sd_match(N1, m_Shl(m_Specific(Val), m_SpecificInt(C1Expected))) ||
11862 m_SpecificInt(C1Expected))) ||
11864 m_SpecificInt(C0Expected)))))
11865 return Val;
11866
11867 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11868 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11869 // TODO - bigendian support once we have test coverage.
11870 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11871 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11872 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11873 !DAG.getDataLayout().isBigEndian()) {
11874 auto *LHS = dyn_cast<LoadSDNode>(N0);
11875 auto *RHS = dyn_cast<LoadSDNode>(N1);
11876 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11877 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11878 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11880 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11881 SDLoc DL(RHS);
11882 uint64_t PtrOff =
11883 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11884 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11885 unsigned Fast = 0;
11886 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11887 RHS->getAddressSpace(), NewAlign,
11888 RHS->getMemOperand()->getFlags(), &Fast) &&
11889 Fast) {
11890 SDValue NewPtr = DAG.getMemBasePlusOffset(
11891 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11892 AddToWorklist(NewPtr.getNode());
11893 SDValue Load = DAG.getLoad(
11894 VT, DL, RHS->getChain(), NewPtr,
11895 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11896 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11897 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11898 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11899 return Load;
11900 }
11901 }
11902 }
11903 }
11904 }
11905
11906 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11907 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11908 // iff We know the shift amount is in range.
11909 // TODO: when is it worth doing SUB(BW, N2) as well?
11910 if (isPowerOf2_32(BitWidth)) {
11911 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11912 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11913 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11914 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11915 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11916 }
11917
11918 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11919 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11920 // TODO: Investigate flipping this rotate if only one is legal.
11921 // If funnel shift is legal as well we might be better off avoiding
11922 // non-constant (BW - N2).
11923 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11924 if (N0 == N1 && hasOperation(RotOpc, VT))
11925 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11926
11927 // Simplify, based on bits shifted out of N0/N1.
11929 return SDValue(N, 0);
11930
11931 return SDValue();
11932}
11933
11934SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11935 SDValue N0 = N->getOperand(0);
11936 SDValue N1 = N->getOperand(1);
11937 if (SDValue V = DAG.simplifyShift(N0, N1))
11938 return V;
11939
11940 SDLoc DL(N);
11941 EVT VT = N0.getValueType();
11942
11943 // fold (*shlsat c1, c2) -> c1<<c2
11944 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11945 return C;
11946
11947 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11948
11949 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11950 // fold (sshlsat x, c) -> (shl x, c)
11951 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11952 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11953 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11954
11955 // fold (ushlsat x, c) -> (shl x, c)
11956 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11957 N1C->getAPIntValue().ule(
11959 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11960 }
11961
11962 return SDValue();
11963}
11964
11965// Given a ABS node, detect the following patterns:
11966// (ABS (SUB (EXTEND a), (EXTEND b))).
11967// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11968// Generates UABD/SABD instruction.
11969SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11970 EVT SrcVT = N->getValueType(0);
11971
11972 if (N->getOpcode() == ISD::TRUNCATE)
11973 N = N->getOperand(0).getNode();
11974
11975 EVT VT = N->getValueType(0);
11976 SDValue Op0, Op1;
11977
11978 if (!sd_match(N, m_Abs(m_AnyOf(m_Sub(m_Value(Op0), m_Value(Op1)),
11979 m_Add(m_Value(Op0), m_Value(Op1))))))
11980 return SDValue();
11981
11982 SDValue AbsOp0 = N->getOperand(0);
11983 bool IsAdd = AbsOp0.getOpcode() == ISD::ADD;
11984 // Make sure (abs B) is positive.
11985 if (IsAdd) {
11986 // Elements of Op1 must be constant and != VT.minSignedValue() (or undef)
11987 auto IsNotMinSignedInt = [VT](ConstantSDNode *C) {
11988 if (C == nullptr)
11989 return true;
11990 return !C->getAPIntValue()
11991 .trunc(VT.getScalarSizeInBits())
11992 .isMinSignedValue();
11993 };
11994
11995 if (!ISD::matchUnaryPredicate(Op1, IsNotMinSignedInt, /*AllowUndefs=*/true,
11996 /*AllowTruncation=*/true))
11997 return SDValue();
11998 }
11999
12000 unsigned Opc0 = Op0.getOpcode();
12001
12002 // Check if the operands of the sub are (zero|sign)-extended, otherwise
12003 // fallback to ValueTracking.
12004 if (Opc0 != Op1.getOpcode() ||
12005 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
12006 Opc0 != ISD::SIGN_EXTEND_INREG)) {
12007
12008 auto CreateZextedAbd = [&](unsigned AbdOpc) {
12009 if (IsAdd)
12010 Op1 = DAG.getNegative(Op1, SDLoc(Op1), VT);
12011 SDValue ABD = DAG.getNode(AbdOpc, DL, VT, Op0, Op1);
12012 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
12013 };
12014
12015 // fold (abs (sub nsw x, y)) -> abds(x, y)
12016 // fold (abs (add nsw x, -y)) -> abds(x, y)
12017 bool AbsOpWillNSW =
12018 AbsOp0->getFlags().hasNoSignedWrap() ||
12019 (IsAdd ? DAG.willNotOverflowAdd(/*IsSigned=*/true, Op0, Op1)
12020 : DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1));
12021
12022 // Don't fold this for unsupported types as we lose the NSW handling.
12023 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
12024 AbsOpWillNSW)
12025 return CreateZextedAbd(ISD::ABDS);
12026
12027 // fold (abs (sub x, y)) -> abdu(x, y)
12028 bool Op1SignBitIsOne = DAG.computeKnownBits(Op1).isNegative();
12029 bool AbsOpWillNUW = !IsAdd && DAG.SignBitIsZero(Op0) && Op1SignBitIsOne;
12030
12031 if (hasOperation(ISD::ABDU, VT) && AbsOpWillNUW)
12032 return CreateZextedAbd(ISD::ABDU);
12033
12034 return SDValue();
12035 }
12036
12037 // The IsAdd case explicitly checks for const/bv-of-const. This implies either
12038 // (Opc0 != Op1.getOpcode() || Opc0 is not in {zext/sext/sign_ext_inreg}. This
12039 // implies it was alrady handled by the above if statement.
12040 assert(!IsAdd && "Unexpected abs(add(x,y)) pattern");
12041
12042 EVT VT0, VT1;
12043 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
12044 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
12045 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
12046 } else {
12047 VT0 = Op0.getOperand(0).getValueType();
12048 VT1 = Op1.getOperand(0).getValueType();
12049 }
12050 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
12051
12052 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
12053 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
12054 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
12055 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
12056 (VT1 == MaxVT || Op1->hasOneUse()) &&
12057 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
12058 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
12059 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
12060 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
12061 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
12062 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
12063 }
12064
12065 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
12066 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
12067 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
12068 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
12069 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
12070 }
12071
12072 return SDValue();
12073}
12074
12075SDValue DAGCombiner::visitABS(SDNode *N) {
12076 SDValue N0 = N->getOperand(0);
12077 EVT VT = N->getValueType(0);
12078 SDLoc DL(N);
12079
12080 // fold (abs c1) -> c2
12081 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
12082 return C;
12083 // fold (abs (abs x)) -> (abs x)
12084 if (N0.getOpcode() == ISD::ABS)
12085 return N0;
12086 // fold (abs x) -> x iff not-negative
12087 if (DAG.SignBitIsZero(N0))
12088 return N0;
12089
12090 if (SDValue ABD = foldABSToABD(N, DL))
12091 return ABD;
12092
12093 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
12094 // iff zero_extend/truncate are free.
12095 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
12096 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
12097 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
12098 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
12099 hasOperation(ISD::ABS, ExtVT)) {
12100 return DAG.getNode(
12101 ISD::ZERO_EXTEND, DL, VT,
12102 DAG.getNode(ISD::ABS, DL, ExtVT,
12103 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
12104 }
12105 }
12106
12107 return SDValue();
12108}
12109
12110SDValue DAGCombiner::visitCLMUL(SDNode *N) {
12111 unsigned Opcode = N->getOpcode();
12112 SDValue N0 = N->getOperand(0);
12113 SDValue N1 = N->getOperand(1);
12114 EVT VT = N->getValueType(0);
12115 SDLoc DL(N);
12116
12117 // fold (clmul c1, c2)
12118 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
12119 return C;
12120
12121 // canonicalize constant to RHS
12124 return DAG.getNode(Opcode, DL, VT, N1, N0);
12125
12126 // fold (clmul x, 0) -> 0
12128 return DAG.getConstant(0, DL, VT);
12129
12130 // fold (clmul x, c_pow2) -> (shl x, log2(c_pow2))
12131 // This also handles (clmul x, 1) -> x since (shl x, 0) simplifies to x.
12132 if (Opcode == ISD::CLMUL) {
12133 if (ConstantSDNode *C = isConstOrConstSplat(N1)) {
12134 APInt CV = C->getAPIntValue().trunc(VT.getScalarSizeInBits());
12135 if (CV.isPowerOf2() &&
12136 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)))
12137 return DAG.getNode(ISD::SHL, DL, VT, N0,
12138 DAG.getShiftAmountConstant(CV.logBase2(), VT, DL));
12139 }
12140 }
12141
12142 return SDValue();
12143}
12144
12145SDValue DAGCombiner::visitBSWAP(SDNode *N) {
12146 SDValue N0 = N->getOperand(0);
12147 EVT VT = N->getValueType(0);
12148 SDLoc DL(N);
12149
12150 // fold (bswap c1) -> c2
12151 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
12152 return C;
12153 // fold (bswap (bswap x)) -> x
12154 if (N0.getOpcode() == ISD::BSWAP)
12155 return N0.getOperand(0);
12156
12157 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
12158 // isn't supported, it will be expanded to bswap followed by a manual reversal
12159 // of bits in each byte. By placing bswaps before bitreverse, we can remove
12160 // the two bswaps if the bitreverse gets expanded.
12161 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
12162 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
12163 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
12164 }
12165
12166 unsigned BW = VT.getScalarSizeInBits();
12167 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
12168 // iff x >= bw/2 (i.e. lower half is known zero)
12169 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
12170 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
12171 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
12172 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
12173 ShAmt->getZExtValue() >= (BW / 2) && (ShAmt->getZExtValue() % 8) == 0 &&
12174 TLI.isTypeLegal(HalfVT) && TLI.isTruncateFree(VT, HalfVT) &&
12175 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
12176 SDValue Res = N0.getOperand(0);
12177 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
12178 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
12179 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
12180 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
12181 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
12182 return DAG.getZExtOrTrunc(Res, DL, VT);
12183 }
12184 }
12185
12186 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
12187 // inverse-shift-of-bswap:
12188 // bswap (X u<< C) --> (bswap X) u>> C
12189 // bswap (X u>> C) --> (bswap X) u<< C
12190 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
12191 N0.hasOneUse()) {
12192 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
12193 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
12194 ShAmt->getZExtValue() % 8 == 0) {
12195 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
12196 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
12197 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
12198 }
12199 }
12200
12201 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
12202 return V;
12203
12204 // Folds that depend on computeKnownBits of the operand.
12205 KnownBits Known = DAG.computeKnownBits(N0);
12206 // bswap(0) = 0. Catch cases that computeKnownBits can prove are zero but
12207 // that structural combines haven't simplified to a constant yet
12208 // (e.g. and of disjoint byte masks).
12209 if (Known.isZero())
12210 return DAG.getConstant(0, DL, VT);
12211 // If only one byte of the operand may be nonzero, bswap becomes a shift
12212 // to the mirror byte.
12213 unsigned TZ = alignDown(Known.countMinTrailingZeros(), 8);
12214 unsigned LZ = alignDown(Known.countMinLeadingZeros(), 8);
12215 if (BW - (LZ + TZ) == 8) {
12216 unsigned Opc = LZ > TZ ? ISD::SHL : ISD::SRL;
12217 // Skip if the target would re-expand the produced shift post-legalize.
12218 // Targets that custom-lower byte-multiple shifts via bswap (e.g. MSP430
12219 // for shl i16) would loop with this combine.
12220 if (!LegalOperations || hasOperation(Opc, VT)) {
12221 unsigned Amt = AbsoluteDifference(LZ, TZ);
12222 SDNodeFlags Flags =
12224 return DAG.getNode(Opc, DL, VT, N0,
12225 DAG.getShiftAmountConstant(Amt, VT, DL), Flags);
12226 }
12227 }
12228
12229 return SDValue();
12230}
12231
12232SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
12233 SDValue N0 = N->getOperand(0);
12234 EVT VT = N->getValueType(0);
12235 SDLoc DL(N);
12236
12237 // fold (bitreverse c1) -> c2
12238 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
12239 return C;
12240
12241 // fold (bitreverse (bitreverse x)) -> x
12242 if (N0.getOpcode() == ISD::BITREVERSE)
12243 return N0.getOperand(0);
12244
12245 SDValue X, Y;
12246
12247 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
12248 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12250 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
12251
12252 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
12253 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
12255 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
12256
12257 // fold bitreverse(clmul(bitreverse(x), bitreverse(y))) -> clmulr(x, y)
12258 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::CLMULR, VT)) &&
12260 return DAG.getNode(ISD::CLMULR, DL, VT, X, Y);
12261
12262 return SDValue();
12263}
12264
12265// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
12266// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
12267SDValue DAGCombiner::foldCTLZToCTLS(SDValue Src, const SDLoc &DL) {
12268 EVT VT = Src.getValueType();
12269
12270 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
12271 if ((LK.first != TargetLoweringBase::TypeLegal &&
12273 !TLI.isOperationLegalOrCustom(ISD::CTLS, LK.second))
12274 return SDValue();
12275
12276 unsigned BitWidth = VT.getScalarSizeInBits();
12277
12278 bool NeedAdd = true;
12279
12280 SDValue X;
12282 m_SpecificInt(1))))) {
12283 NeedAdd = false;
12284 Src = X;
12285 }
12286
12287 if (!sd_match(Src,
12290 m_SpecificInt(BitWidth - 1)))))))
12291 return SDValue();
12292
12293 SDValue Res = DAG.getNode(ISD::CTLS, DL, VT, X);
12294 if (!NeedAdd)
12295 return Res;
12296
12297 return DAG.getNode(ISD::ADD, DL, VT, Res, DAG.getConstant(1, DL, VT));
12298}
12299
12300SDValue DAGCombiner::visitCTLZ(SDNode *N) {
12301 SDValue N0 = N->getOperand(0);
12302 EVT VT = N->getValueType(0);
12303 SDLoc DL(N);
12304
12305 // fold (ctlz c1) -> c2
12306 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
12307 return C;
12308
12309 // If the value is known never to be zero, switch to the poison version.
12310 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_POISON, VT))
12311 if (DAG.isKnownNeverZero(N0))
12312 return DAG.getNode(ISD::CTLZ_ZERO_POISON, DL, VT, N0);
12313
12314 if (SDValue V = foldCTLZToCTLS(N0, DL))
12315 return V;
12316
12317 return SDValue();
12318}
12319
12320SDValue DAGCombiner::visitCTLZ_ZERO_POISON(SDNode *N) {
12321 SDValue N0 = N->getOperand(0);
12322 EVT VT = N->getValueType(0);
12323 SDLoc DL(N);
12324
12325 // fold (ctlz_zero_poison c1) -> c2
12326 if (SDValue C =
12328 return C;
12329
12330 if (SDValue V = foldCTLZToCTLS(N0, DL))
12331 return V;
12332
12333 return SDValue();
12334}
12335
12336SDValue DAGCombiner::visitCTTZ(SDNode *N) {
12337 SDValue N0 = N->getOperand(0);
12338 EVT VT = N->getValueType(0);
12339 SDLoc DL(N);
12340
12341 // fold (cttz c1) -> c2
12342 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
12343 return C;
12344
12345 // If the value is known never to be zero, switch to the poison version.
12346 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_POISON, VT))
12347 if (DAG.isKnownNeverZero(N0))
12348 return DAG.getNode(ISD::CTTZ_ZERO_POISON, DL, VT, N0);
12349
12350 return SDValue();
12351}
12352
12353SDValue DAGCombiner::visitCTTZ_ZERO_POISON(SDNode *N) {
12354 SDValue N0 = N->getOperand(0);
12355 EVT VT = N->getValueType(0);
12356 SDLoc DL(N);
12357
12358 // fold (cttz_zero_poison c1) -> c2
12359 if (SDValue C =
12361 return C;
12362 return SDValue();
12363}
12364
12365SDValue DAGCombiner::visitCTPOP(SDNode *N) {
12366 SDValue N0 = N->getOperand(0);
12367 EVT VT = N->getValueType(0);
12368 unsigned NumBits = VT.getScalarSizeInBits();
12369 SDLoc DL(N);
12370
12371 // fold (ctpop c1) -> c2
12372 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
12373 return C;
12374
12375 // If the source is being shifted, but doesn't affect any active bits,
12376 // then we can call CTPOP on the shift source directly.
12377 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
12378 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
12379 const APInt &Amt = AmtC->getAPIntValue();
12380 if (Amt.ult(NumBits)) {
12381 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
12382 if ((N0.getOpcode() == ISD::SRL &&
12383 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
12384 (N0.getOpcode() == ISD::SHL &&
12385 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
12386 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
12387 }
12388 }
12389 }
12390 }
12391
12392 // If the upper bits are known to be zero, then see if its profitable to
12393 // only count the lower bits.
12394 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
12395 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
12396 if (hasOperation(ISD::CTPOP, HalfVT) &&
12397 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
12398 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
12399 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
12400 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
12401 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
12402 DAG.getZExtOrTrunc(N0, DL, HalfVT));
12403 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
12404 }
12405 }
12406 }
12407
12408 return SDValue();
12409}
12410
12412 SDValue RHS, const SDNodeFlags Flags,
12413 const TargetLowering &TLI) {
12414 EVT VT = LHS.getValueType();
12415 if (!VT.isFloatingPoint())
12416 return false;
12417
12418 return Flags.hasNoSignedZeros() &&
12420 (Flags.hasNoNaNs() ||
12421 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
12422}
12423
12425 SDValue RHS, SDValue True, SDValue False,
12426 ISD::CondCode CC,
12427 const TargetLowering &TLI,
12428 SelectionDAG &DAG) {
12429 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
12430 switch (CC) {
12431 case ISD::SETOLT:
12432 case ISD::SETOLE:
12433 case ISD::SETLT:
12434 case ISD::SETLE:
12435 case ISD::SETULT:
12436 case ISD::SETULE: {
12437 // Since it's known never nan to get here already, either fminnum or
12438 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
12439 // expanded in terms of it.
12440 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
12441 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
12442 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
12443
12444 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
12445 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
12446 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
12447 return SDValue();
12448 }
12449 case ISD::SETOGT:
12450 case ISD::SETOGE:
12451 case ISD::SETGT:
12452 case ISD::SETGE:
12453 case ISD::SETUGT:
12454 case ISD::SETUGE: {
12455 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
12456 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
12457 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
12458
12459 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
12460 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
12461 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
12462 return SDValue();
12463 }
12464 default:
12465 return SDValue();
12466 }
12467}
12468
12469// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
12470SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
12471 const unsigned Opcode = N->getOpcode();
12472 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
12473 return SDValue();
12474
12475 EVT VT = N->getValueType(0);
12476 bool IsUnsigned = Opcode == ISD::SRL;
12477
12478 // Captured values.
12479 SDValue A, B;
12480
12481 // Match floor average as it is common to both floor/ceil avgs, ensure the add
12482 // doesn't wrap.
12483 SDNodeFlags Flags =
12485 if (sd_match(N, m_BinOp(Opcode,
12486 m_c_BinOp(ISD::ADD, m_Value(A), m_Value(B), Flags),
12487 m_One()))) {
12488 // Decide whether signed or unsigned.
12489 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
12490 if (hasOperation(FloorISD, VT))
12491 return DAG.getNode(FloorISD, DL, VT, {A, B});
12492 }
12493
12494 return SDValue();
12495}
12496
12497SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
12498 unsigned Opc = N->getOpcode();
12499 SDValue X, Y, Z;
12500 if (sd_match(
12502 return DAG.getNode(Opc, DL, VT, X,
12503 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
12504
12506 m_Value(Z)))))
12507 return DAG.getNode(Opc, DL, VT, X,
12508 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
12509
12510 return SDValue();
12511}
12512
12513/// Generate Min/Max node
12514SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
12515 SDValue RHS, SDValue True,
12516 SDValue False, ISD::CondCode CC) {
12517 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
12518 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
12519
12520 // If we can't directly match this, try to see if we can pull an fneg out of
12521 // the select.
12523 True, DAG, LegalOperations, ForCodeSize);
12524 if (!NegTrue)
12525 return SDValue();
12526
12527 HandleSDNode NegTrueHandle(NegTrue);
12528
12529 // Try to unfold an fneg from the select if we are comparing the negated
12530 // constant.
12531 //
12532 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
12533 //
12534 // TODO: Handle fabs
12535 if (LHS == NegTrue) {
12536 // If we can't directly match this, try to see if we can pull an fneg out of
12537 // the select.
12539 RHS, DAG, LegalOperations, ForCodeSize);
12540 if (NegRHS) {
12541 HandleSDNode NegRHSHandle(NegRHS);
12542 if (NegRHS == False) {
12543 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
12544 False, CC, TLI, DAG);
12545 if (Combined)
12546 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
12547 }
12548 }
12549 }
12550
12551 return SDValue();
12552}
12553
12554/// If a (v)select has a condition value that is a sign-bit test, try to smear
12555/// the condition operand sign-bit across the value width and use it as a mask.
12557 SelectionDAG &DAG) {
12558 SDValue Cond = N->getOperand(0);
12559 SDValue C1 = N->getOperand(1);
12560 SDValue C2 = N->getOperand(2);
12562 return SDValue();
12563
12564 EVT VT = N->getValueType(0);
12565 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
12566 VT != Cond.getOperand(0).getValueType())
12567 return SDValue();
12568
12569 // The inverted-condition + commuted-select variants of these patterns are
12570 // canonicalized to these forms in IR.
12571 SDValue X = Cond.getOperand(0);
12572 SDValue CondC = Cond.getOperand(1);
12573 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12574 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
12576 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
12577 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12578 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12579 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
12580 }
12581 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
12582 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12583 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12584 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12585 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
12586 }
12587 return SDValue();
12588}
12589
12591 const TargetLowering &TLI) {
12592 if (!TLI.convertSelectOfConstantsToMath(VT))
12593 return false;
12594
12595 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
12596 return true;
12598 return true;
12599
12600 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12601 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
12602 return true;
12603 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
12604 return true;
12605
12606 return false;
12607}
12608
12609SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12610 SDValue Cond = N->getOperand(0);
12611 SDValue N1 = N->getOperand(1);
12612 SDValue N2 = N->getOperand(2);
12613 EVT VT = N->getValueType(0);
12614 EVT CondVT = Cond.getValueType();
12615 SDLoc DL(N);
12616
12617 if (!VT.isInteger())
12618 return SDValue();
12619
12620 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12621 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12622 if (!C1 || !C2)
12623 return SDValue();
12624
12625 if (CondVT != MVT::i1 || LegalOperations) {
12626 // We can't do this reliably if integer based booleans have different contents
12627 // to floating point based booleans. This is because we can't tell whether we
12628 // have an integer-based boolean or a floating-point-based boolean unless we
12629 // can find the SETCC that produced it and inspect its operands. This is
12630 // fairly easy if C is the SETCC node, but it can potentially be
12631 // undiscoverable (or not reasonably discoverable). For example, it could be
12632 // in another basic block or it could require searching a complicated
12633 // expression.
12634 if (CondVT.isInteger() &&
12635 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12637 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12639 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12640 if (C1->isZero() && C2->isOne()) {
12641 SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
12642 DAG.getConstant(1, DL, CondVT));
12643 if (VT.bitsEq(CondVT))
12644 return NotCond;
12645 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12646 }
12647
12648 // fold (select Cond, 1, 0) -> Cond
12649 if (C1->isOne() && C2->isZero() && CondVT == VT)
12650 return Cond;
12651 }
12652
12653 return SDValue();
12654 }
12655
12656 // Only do this before legalization to avoid conflicting with target-specific
12657 // transforms in the other direction (create a select from a zext/sext). There
12658 // is also a target-independent combine here in DAGCombiner in the other
12659 // direction for (select Cond, -1, 0) when the condition is not i1.
12660 assert(CondVT == MVT::i1 && !LegalOperations);
12661
12662 // select Cond, 1, 0 --> zext (Cond)
12663 if (C1->isOne() && C2->isZero())
12664 return DAG.getZExtOrTrunc(Cond, DL, VT);
12665
12666 // select Cond, -1, 0 --> sext (Cond)
12667 if (C1->isAllOnes() && C2->isZero())
12668 return DAG.getSExtOrTrunc(Cond, DL, VT);
12669
12670 // select Cond, 0, 1 --> zext (!Cond)
12671 if (C1->isZero() && C2->isOne()) {
12672 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12673 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12674 return NotCond;
12675 }
12676
12677 // select Cond, 0, -1 --> sext (!Cond)
12678 if (C1->isZero() && C2->isAllOnes()) {
12679 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12680 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12681 return NotCond;
12682 }
12683
12684 // Use a target hook because some targets may prefer to transform in the
12685 // other direction.
12687 return SDValue();
12688
12689 // For any constants that differ by 1, we can transform the select into
12690 // an extend and add.
12691 const APInt &C1Val = C1->getAPIntValue();
12692 const APInt &C2Val = C2->getAPIntValue();
12693
12694 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12695 if (C1Val - 1 == C2Val) {
12696 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12697 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12698 }
12699
12700 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12701 if (C1Val + 1 == C2Val) {
12702 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12703 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12704 }
12705
12706 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12707 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12708 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12709 SDValue ShAmtC =
12710 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12711 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12712 }
12713
12714 // select Cond, -1, C --> or (sext Cond), C
12715 if (C1->isAllOnes()) {
12716 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12717 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12718 }
12719
12720 // select Cond, C, -1 --> or (sext (not Cond)), C
12721 if (C2->isAllOnes()) {
12722 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12723 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12724 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12725 }
12726
12728 return V;
12729
12730 return SDValue();
12731}
12732
12733template <class MatchContextClass>
12735 SelectionDAG &DAG) {
12736 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12737 N->getOpcode() == ISD::VP_SELECT) &&
12738 "Expected a (v)(vp.)select");
12739 SDValue Cond = N->getOperand(0);
12740 SDValue T = N->getOperand(1), F = N->getOperand(2);
12741 EVT VT = N->getValueType(0);
12742 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12743 MatchContextClass matcher(DAG, TLI, N);
12744
12745 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12746 return SDValue();
12747
12748 // select Cond, Cond, F --> or Cond, freeze(F)
12749 // select Cond, 1, F --> or Cond, freeze(F)
12750 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12751 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12752
12753 // select Cond, T, Cond --> and Cond, freeze(T)
12754 // select Cond, T, 0 --> and Cond, freeze(T)
12755 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12756 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12757
12758 // select Cond, T, 1 --> or (not Cond), freeze(T)
12759 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12760 SDValue NotCond =
12761 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12762 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12763 }
12764
12765 // select Cond, 0, F --> and (not Cond), freeze(F)
12766 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12767 SDValue NotCond =
12768 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12769 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12770 }
12771
12772 return SDValue();
12773}
12774
12776 SDValue N0 = N->getOperand(0);
12777 SDValue N1 = N->getOperand(1);
12778 SDValue N2 = N->getOperand(2);
12779 EVT VT = N->getValueType(0);
12780 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12781
12782 SDValue Cond0, Cond1;
12783 ISD::CondCode CC;
12784 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12785 m_CondCode(CC)))) ||
12786 VT != Cond0.getValueType())
12787 return SDValue();
12788
12789 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12790 // compare is inverted from that pattern ("Cond0 s> -1").
12791 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12792 ; // This is the pattern we are looking for.
12793 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12794 std::swap(N1, N2);
12795 else
12796 return SDValue();
12797
12798 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12799 if (isNullOrNullSplat(N2)) {
12800 SDLoc DL(N);
12801 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12802 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12803 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12804 }
12805
12806 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12807 if (isAllOnesOrAllOnesSplat(N1)) {
12808 SDLoc DL(N);
12809 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12810 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12811 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12812 }
12813
12814 // If we have to invert the sign bit mask, only do that transform if the
12815 // target has a bitwise 'and not' instruction (the invert is free).
12816 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12817 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12818 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12819 SDLoc DL(N);
12820 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12821 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12822 SDValue Not = DAG.getNOT(DL, Sra, VT);
12823 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12824 }
12825
12826 // TODO: There's another pattern in this family, but it may require
12827 // implementing hasOrNot() to check for profitability:
12828 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12829
12830 return SDValue();
12831}
12832
12833// Match SELECTs with absolute difference patterns.
12834// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12835// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12836// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12837// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12838SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12839 SDValue False, ISD::CondCode CC,
12840 const SDLoc &DL) {
12841 bool IsSigned = isSignedIntSetCC(CC);
12842 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12843 EVT VT = LHS.getValueType();
12844
12845 if (LegalOperations && !hasOperation(ABDOpc, VT))
12846 return SDValue();
12847
12848 // (setcc 0, b set???) --> (setcc b, 0, set???)
12849 if (isZeroOrZeroSplat(LHS)) {
12850 std::swap(LHS, RHS);
12852 }
12853
12854 // (setcc (add nsw A, Const), 0, sets??) --> (setcc A, -Const, sets??)
12855 SDValue A, B;
12856 if (ISD::isSignedIntSetCC(CC) && LHS->getFlags().hasNoSignedWrap() &&
12859 RHS = DAG.getNegative(B, LHS, B.getValueType());
12860 LHS = A;
12861 }
12862
12863 bool IsTypeLegalOrPromote =
12864 TLI.isTypeLegal(VT) || TLI.getTypeAction(*DAG.getContext(), VT) ==
12866
12867 switch (CC) {
12868 case ISD::SETGT:
12869 case ISD::SETGE:
12870 case ISD::SETUGT:
12871 case ISD::SETUGE:
12876 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12881 IsTypeLegalOrPromote)
12882 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12883 break;
12884 case ISD::SETLT:
12885 case ISD::SETLE:
12886 case ISD::SETULT:
12887 case ISD::SETULE:
12892 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12897 IsTypeLegalOrPromote)
12898 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12899 break;
12900 default:
12901 break;
12902 }
12903
12904 return SDValue();
12905}
12906
12907// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12908// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12909SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12910 SDValue False, ISD::CondCode CC,
12911 const SDLoc &DL) {
12912 APInt C;
12913 EVT VT = True.getValueType();
12914 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12915 if (CC == ISD::SETUGT && LHS == False &&
12916 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12917 SDValue AddC = DAG.getConstant(~C, DL, VT);
12918 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12919 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12920 }
12921 if (CC == ISD::SETULT && LHS == True &&
12922 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12923 SDValue AddC = DAG.getConstant(-C, DL, VT);
12924 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12925 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12926 }
12927 }
12928 return SDValue();
12929}
12930
12931SDValue DAGCombiner::visitSELECT(SDNode *N) {
12932 SDValue N0 = N->getOperand(0);
12933 SDValue N1 = N->getOperand(1);
12934 SDValue N2 = N->getOperand(2);
12935 EVT VT = N->getValueType(0);
12936 EVT VT0 = N0.getValueType();
12937 SDLoc DL(N);
12938 SDNodeFlags Flags = N->getFlags();
12939
12940 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12941 return V;
12942
12944 return V;
12945
12946 // select (not Cond), N1, N2 -> select Cond, N2, N1
12947 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12948 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12949
12950 if (SDValue V = foldSelectOfConstants(N))
12951 return V;
12952
12953 // If we can fold this based on the true/false value, do so.
12954 if (SimplifySelectOps(N, N1, N2))
12955 return SDValue(N, 0); // Don't revisit N.
12956
12957 if (VT0 == MVT::i1) {
12958 // The code in this block deals with the following 2 equivalences:
12959 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12960 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12961 // The target can specify its preferred form with the
12962 // shouldNormalizeToSelectSequence() callback. However we always transform
12963 // to the right anyway if we find the inner select exists in the DAG anyway
12964 // and we always transform to the left side if we know that we can further
12965 // optimize the combination of the conditions.
12966 bool normalizeToSequence =
12968 // select (and Cond0, Cond1), X, Y
12969 // -> select Cond0, (select Cond1, X, Y), Y
12970 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12971 SDValue Cond0 = N0->getOperand(0);
12972 SDValue Cond1 = N0->getOperand(1);
12973 SDValue InnerSelect =
12974 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12975 if (normalizeToSequence || !InnerSelect.use_empty())
12976 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12977 InnerSelect, N2, Flags);
12978 // Cleanup on failure.
12979 if (InnerSelect.use_empty())
12980 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12981 }
12982 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12983 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12984 SDValue Cond0 = N0->getOperand(0);
12985 SDValue Cond1 = N0->getOperand(1);
12986 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12987 Cond1, N1, N2, Flags);
12988 if (normalizeToSequence || !InnerSelect.use_empty())
12989 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12990 InnerSelect, Flags);
12991 // Cleanup on failure.
12992 if (InnerSelect.use_empty())
12993 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12994 }
12995
12996 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12997 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12998 SDValue N1_0 = N1->getOperand(0);
12999 SDValue N1_1 = N1->getOperand(1);
13000 SDValue N1_2 = N1->getOperand(2);
13001 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
13002 // Create the actual and node if we can generate good code for it.
13003 if (!normalizeToSequence) {
13004 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
13005 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
13006 N2, Flags);
13007 }
13008 // Otherwise see if we can optimize the "and" to a better pattern.
13009 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
13010 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
13011 N2, Flags);
13012 }
13013 }
13014 }
13015 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
13016 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
13017 SDValue N2_0 = N2->getOperand(0);
13018 SDValue N2_1 = N2->getOperand(1);
13019 SDValue N2_2 = N2->getOperand(2);
13020 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
13021 // Create the actual or node if we can generate good code for it.
13022 if (!normalizeToSequence) {
13023 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
13024 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
13025 N2_2, Flags);
13026 }
13027 // Otherwise see if we can optimize to a better pattern.
13028 if (SDValue Combined = visitORLike(N0, N2_0, DL))
13029 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
13030 N2_2, Flags);
13031 }
13032 }
13033
13034 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
13035 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
13036 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
13037 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
13038 N2.getOperand(1) == N1.getOperand(0) &&
13039 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
13040 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
13041
13042 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
13043 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
13044 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
13045 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
13046 N2.getOperand(1) == N1.getOperand(0) &&
13047 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
13048 return DAG.getNegative(
13049 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
13050 DL, VT);
13051 }
13052
13053 // Fold selects based on a setcc into other things, such as min/max/abs.
13054 if (N0.getOpcode() == ISD::SETCC) {
13055 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
13057
13058 // select (fcmp lt x, y), x, y -> fminnum x, y
13059 // select (fcmp gt x, y), x, y -> fmaxnum x, y
13060 //
13061 // This is OK if we don't care what happens if either operand is a NaN.
13062 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
13063 if (SDValue FMinMax =
13064 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
13065 return FMinMax;
13066
13067 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
13068 // This is conservatively limited to pre-legal-operations to give targets
13069 // a chance to reverse the transform if they want to do that. Also, it is
13070 // unlikely that the pattern would be formed late, so it's probably not
13071 // worth going through the other checks.
13072 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
13073 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
13074 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
13075 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
13076 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
13077 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
13078 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
13079 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
13080 //
13081 // The IR equivalent of this transform would have this form:
13082 // %a = add %x, C
13083 // %c = icmp ugt %x, ~C
13084 // %r = select %c, -1, %a
13085 // =>
13086 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
13087 // %u0 = extractvalue %u, 0
13088 // %u1 = extractvalue %u, 1
13089 // %r = select %u1, -1, %u0
13090 SDVTList VTs = DAG.getVTList(VT, VT0);
13091 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
13092 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
13093 }
13094 }
13095
13096 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
13097 (!LegalOperations &&
13099 // Any flags available in a select/setcc fold will be on the setcc as they
13100 // migrated from fcmp
13101 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
13102 N0.getOperand(2), N0->getFlags());
13103 }
13104
13105 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
13106 return ABD;
13107
13108 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
13109 return NewSel;
13110
13111 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13112 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13113 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
13114 return UMin;
13115 }
13116
13117 if (!VT.isVector())
13118 if (SDValue BinOp = foldSelectOfBinops(N))
13119 return BinOp;
13120
13121 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
13122 return R;
13123
13124 return SDValue();
13125}
13126
13127// This function assumes all the vselect's arguments are CONCAT_VECTOR
13128// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
13130 SDLoc DL(N);
13131 SDValue Cond = N->getOperand(0);
13132 SDValue LHS = N->getOperand(1);
13133 SDValue RHS = N->getOperand(2);
13134 EVT VT = N->getValueType(0);
13135 int NumElems = VT.getVectorNumElements();
13136 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
13137 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
13138 Cond.getOpcode() == ISD::BUILD_VECTOR);
13139
13140 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
13141 // binary ones here.
13142 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
13143 return SDValue();
13144
13145 // We're sure we have an even number of elements due to the
13146 // concat_vectors we have as arguments to vselect.
13147 // Skip BV elements until we find one that's not an UNDEF
13148 // After we find an UNDEF element, keep looping until we get to half the
13149 // length of the BV and see if all the non-undef nodes are the same.
13150 ConstantSDNode *BottomHalf = nullptr;
13151 for (int i = 0; i < NumElems / 2; ++i) {
13152 if (Cond->getOperand(i)->isUndef())
13153 continue;
13154
13155 if (BottomHalf == nullptr)
13156 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
13157 else if (Cond->getOperand(i).getNode() != BottomHalf)
13158 return SDValue();
13159 }
13160
13161 // Do the same for the second half of the BuildVector
13162 ConstantSDNode *TopHalf = nullptr;
13163 for (int i = NumElems / 2; i < NumElems; ++i) {
13164 if (Cond->getOperand(i)->isUndef())
13165 continue;
13166
13167 if (TopHalf == nullptr)
13168 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
13169 else if (Cond->getOperand(i).getNode() != TopHalf)
13170 return SDValue();
13171 }
13172
13173 assert(TopHalf && BottomHalf &&
13174 "One half of the selector was all UNDEFs and the other was all the "
13175 "same value. This should have been addressed before this function.");
13176 return DAG.getNode(
13178 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
13179 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
13180}
13181
13182bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
13183 SelectionDAG &DAG, const SDLoc &DL) {
13184
13185 // Only perform the transformation when existing operands can be reused.
13186 if (IndexIsScaled)
13187 return false;
13188
13189 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
13190 return false;
13191
13192 EVT VT = BasePtr.getValueType();
13193
13194 if (SDValue SplatVal = DAG.getSplatValue(Index);
13195 SplatVal && !isNullConstant(SplatVal) &&
13196 SplatVal.getValueType() == VT) {
13197 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
13198 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
13199 return true;
13200 }
13201
13202 if (Index.getOpcode() != ISD::ADD)
13203 return false;
13204
13205 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
13206 SplatVal && SplatVal.getValueType() == VT) {
13207 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
13208 Index = Index.getOperand(1);
13209 return true;
13210 }
13211 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
13212 SplatVal && SplatVal.getValueType() == VT) {
13213 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
13214 Index = Index.getOperand(0);
13215 return true;
13216 }
13217 return false;
13218}
13219
13220// Fold sext/zext of index into index type.
13221bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
13222 SelectionDAG &DAG) {
13223 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13224
13225 // It's always safe to look through zero extends.
13226 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
13227 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
13228 IndexType = ISD::UNSIGNED_SCALED;
13229 Index = Index.getOperand(0);
13230 return true;
13231 }
13232 if (ISD::isIndexTypeSigned(IndexType)) {
13233 IndexType = ISD::UNSIGNED_SCALED;
13234 return true;
13235 }
13236 }
13237
13238 // It's only safe to look through sign extends when Index is signed.
13239 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
13240 ISD::isIndexTypeSigned(IndexType) &&
13241 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
13242 Index = Index.getOperand(0);
13243 return true;
13244 }
13245
13246 return false;
13247}
13248
13249SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
13250 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
13251 SDValue Mask = MSC->getMask();
13252 SDValue Chain = MSC->getChain();
13253 SDValue Index = MSC->getIndex();
13254 SDValue Scale = MSC->getScale();
13255 SDValue StoreVal = MSC->getValue();
13256 SDValue BasePtr = MSC->getBasePtr();
13257 SDValue VL = MSC->getVectorLength();
13258 ISD::MemIndexType IndexType = MSC->getIndexType();
13259 SDLoc DL(N);
13260
13261 // Zap scatters with a zero mask.
13263 return Chain;
13264
13265 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
13266 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
13267 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
13268 DL, Ops, MSC->getMemOperand(), IndexType);
13269 }
13270
13271 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
13272 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
13273 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
13274 DL, Ops, MSC->getMemOperand(), IndexType);
13275 }
13276
13277 return SDValue();
13278}
13279
13280SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
13281 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
13282 SDValue Mask = MSC->getMask();
13283 SDValue Chain = MSC->getChain();
13284 SDValue Index = MSC->getIndex();
13285 SDValue Scale = MSC->getScale();
13286 SDValue StoreVal = MSC->getValue();
13287 SDValue BasePtr = MSC->getBasePtr();
13288 ISD::MemIndexType IndexType = MSC->getIndexType();
13289 SDLoc DL(N);
13290
13291 // Zap scatters with a zero mask.
13293 return Chain;
13294
13295 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
13296 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
13297 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
13298 DL, Ops, MSC->getMemOperand(), IndexType,
13299 MSC->isTruncatingStore());
13300 }
13301
13302 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
13303 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
13304 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
13305 DL, Ops, MSC->getMemOperand(), IndexType,
13306 MSC->isTruncatingStore());
13307 }
13308
13309 return SDValue();
13310}
13311
13312SDValue DAGCombiner::visitMSTORE(SDNode *N) {
13313 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
13314 SDValue Mask = MST->getMask();
13315 SDValue Chain = MST->getChain();
13316 SDValue Value = MST->getValue();
13317 SDValue Ptr = MST->getBasePtr();
13318
13319 // Zap masked stores with a zero mask.
13321 return Chain;
13322
13323 // Remove a masked store if base pointers and masks are equal.
13324 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
13325 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
13326 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
13327 !MST->getBasePtr().isUndef() &&
13328 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
13329 MST1->getMemoryVT().getStoreSize()) ||
13331 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
13332 MST->getMemoryVT().getStoreSize())) {
13333 CombineTo(MST1, MST1->getChain());
13334 if (N->getOpcode() != ISD::DELETED_NODE)
13335 AddToWorklist(N);
13336 return SDValue(N, 0);
13337 }
13338 }
13339
13340 // If this is a masked load with an all ones mask, we can use a unmasked load.
13341 // FIXME: Can we do this for indexed, compressing, or truncating stores?
13342 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
13343 !MST->isCompressingStore() && !MST->isTruncatingStore())
13344 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
13345 MST->getBasePtr(), MST->getPointerInfo(),
13346 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
13347 MST->getAAInfo());
13348
13349 // Try transforming N to an indexed store.
13350 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13351 return SDValue(N, 0);
13352
13353 if (MST->isTruncatingStore() && MST->isUnindexed() &&
13354 Value.getValueType().isInteger() &&
13356 !cast<ConstantSDNode>(Value)->isOpaque())) {
13357 APInt TruncDemandedBits =
13358 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
13360
13361 // See if we can simplify the operation with
13362 // SimplifyDemandedBits, which only works if the value has a single use.
13363 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
13364 // Re-visit the store if anything changed and the store hasn't been merged
13365 // with another node (N is deleted) SimplifyDemandedBits will add Value's
13366 // node back to the worklist if necessary, but we also need to re-visit
13367 // the Store node itself.
13368 if (N->getOpcode() != ISD::DELETED_NODE)
13369 AddToWorklist(N);
13370 return SDValue(N, 0);
13371 }
13372 }
13373
13374 // If this is a TRUNC followed by a masked store, fold this into a masked
13375 // truncating store. We can do this even if this is already a masked
13376 // truncstore.
13377 // TODO: Try combine to masked compress store if possiable.
13378 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
13379 MST->isUnindexed() && !MST->isCompressingStore() &&
13380 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
13381 MST->getMemoryVT(), MST->getAlign(),
13382 MST->getAddressSpace(), LegalOperations)) {
13383 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
13384 Value.getOperand(0).getValueType());
13385 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
13386 MST->getOffset(), Mask, MST->getMemoryVT(),
13387 MST->getMemOperand(), MST->getAddressingMode(),
13388 /*IsTruncating=*/true);
13389 }
13390
13391 return SDValue();
13392}
13393
13394SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
13395 auto *SST = cast<VPStridedStoreSDNode>(N);
13396 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
13397 // Combine strided stores with unit-stride to a regular VP store.
13398 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
13399 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13400 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
13401 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
13402 SST->getVectorLength(), SST->getMemoryVT(),
13403 SST->getMemOperand(), SST->getAddressingMode(),
13404 SST->isTruncatingStore(), SST->isCompressingStore());
13405 }
13406 return SDValue();
13407}
13408
13409SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
13410 SDLoc DL(N);
13411 SDValue Vec = N->getOperand(0);
13412 SDValue Mask = N->getOperand(1);
13413 SDValue Passthru = N->getOperand(2);
13414 EVT VecVT = Vec.getValueType();
13415
13416 bool HasPassthru = !Passthru.isUndef();
13417
13418 APInt SplatVal;
13419 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
13420 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
13421
13422 if (Vec.isUndef() || Mask.isUndef())
13423 return Passthru;
13424
13425 // No need for potentially expensive compress if the mask is constant.
13428 EVT ScalarVT = VecVT.getVectorElementType();
13429 unsigned NumSelected = 0;
13430 unsigned NumElmts = VecVT.getVectorNumElements();
13431 for (unsigned I = 0; I < NumElmts; ++I) {
13432 SDValue MaskI = Mask.getOperand(I);
13433 // We treat undef mask entries as "false".
13434 if (MaskI.isUndef())
13435 continue;
13436
13437 if (TLI.isConstTrueVal(MaskI)) {
13438 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
13439 DAG.getVectorIdxConstant(I, DL));
13440 Ops.push_back(VecI);
13441 NumSelected++;
13442 }
13443 }
13444 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
13445 SDValue Val =
13446 HasPassthru
13447 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
13448 DAG.getVectorIdxConstant(Rest, DL))
13449 : DAG.getUNDEF(ScalarVT);
13450 Ops.push_back(Val);
13451 }
13452 return DAG.getBuildVector(VecVT, DL, Ops);
13453 }
13454
13455 return SDValue();
13456}
13457
13458SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
13459 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
13460 SDValue Mask = MGT->getMask();
13461 SDValue Chain = MGT->getChain();
13462 SDValue Index = MGT->getIndex();
13463 SDValue Scale = MGT->getScale();
13464 SDValue BasePtr = MGT->getBasePtr();
13465 SDValue VL = MGT->getVectorLength();
13466 ISD::MemIndexType IndexType = MGT->getIndexType();
13467 SDLoc DL(N);
13468
13469 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
13470 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
13471 return DAG.getGatherVP(
13472 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13473 Ops, MGT->getMemOperand(), IndexType);
13474 }
13475
13476 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
13477 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
13478 return DAG.getGatherVP(
13479 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13480 Ops, MGT->getMemOperand(), IndexType);
13481 }
13482
13483 return SDValue();
13484}
13485
13486SDValue DAGCombiner::visitMGATHER(SDNode *N) {
13487 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
13488 SDValue Mask = MGT->getMask();
13489 SDValue Chain = MGT->getChain();
13490 SDValue Index = MGT->getIndex();
13491 SDValue Scale = MGT->getScale();
13492 SDValue PassThru = MGT->getPassThru();
13493 SDValue BasePtr = MGT->getBasePtr();
13494 ISD::MemIndexType IndexType = MGT->getIndexType();
13495 SDLoc DL(N);
13496
13497 // Zap gathers with a zero mask.
13499 return CombineTo(N, PassThru, MGT->getChain());
13500
13501 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
13502 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
13503 return DAG.getMaskedGather(
13504 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13505 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
13506 }
13507
13508 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
13509 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
13510 return DAG.getMaskedGather(
13511 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13512 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
13513 }
13514
13515 return SDValue();
13516}
13517
13518SDValue DAGCombiner::visitMLOAD(SDNode *N) {
13519 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
13520 SDValue Mask = MLD->getMask();
13521
13522 // Zap masked loads with a zero mask.
13524 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
13525
13526 // If this is a masked load with an all ones mask, we can use a unmasked load.
13527 // FIXME: Can we do this for indexed, expanding, or extending loads?
13528 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
13529 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
13530 SDValue NewLd = DAG.getLoad(
13531 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
13532 MLD->getPointerInfo(), MLD->getBaseAlign(),
13533 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
13534 return CombineTo(N, NewLd, NewLd.getValue(1));
13535 }
13536
13537 // Try transforming N to an indexed load.
13538 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13539 return SDValue(N, 0);
13540
13541 return SDValue();
13542}
13543
13544SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
13545 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
13546 SDValue Chain = HG->getChain();
13547 SDValue Inc = HG->getInc();
13548 SDValue Mask = HG->getMask();
13549 SDValue BasePtr = HG->getBasePtr();
13550 SDValue Index = HG->getIndex();
13551 SDLoc DL(HG);
13552
13553 EVT MemVT = HG->getMemoryVT();
13554 EVT DataVT = Index.getValueType();
13555 MachineMemOperand *MMO = HG->getMemOperand();
13556 ISD::MemIndexType IndexType = HG->getIndexType();
13557
13559 return Chain;
13560
13561 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
13562 refineIndexType(Index, IndexType, DataVT, DAG)) {
13563 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
13564 HG->getScale(), HG->getIntID()};
13565 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
13566 MMO, IndexType);
13567 }
13568
13569 return SDValue();
13570}
13571
13572SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
13573 if (SDValue Res = foldPartialReduceMLAMulOp(N))
13574 return Res;
13575 if (SDValue Res = foldPartialReduceAdd(N))
13576 return Res;
13577 return SDValue();
13578}
13579
13580// partial_reduce_*mla(acc, mul(*ext(a), *ext(b)), splat(1))
13581// -> partial_reduce_*mla(acc, a, b)
13582//
13583// partial_reduce_*mla(acc, mul(*ext(x), splat(C)), splat(1))
13584// -> partial_reduce_*mla(acc, x, splat(C))
13585//
13586// partial_reduce_*mla(acc, sel(p, mul(*ext(a), *ext(b)), splat(0)), splat(1))
13587// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), b)
13588//
13589// partial_reduce_*mla(acc, sel(p, mul(*ext(a), splat(C)), splat(0)), splat(1))
13590// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), splat(C))
13591SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
13592 SDLoc DL(N);
13593 auto *Context = DAG.getContext();
13594 SDValue Tmp;
13595 SDValue Acc = N->getOperand(0);
13596 SDValue Op1 = N->getOperand(1);
13597 SDValue Op2 = N->getOperand(2);
13598 unsigned Opc = Op1->getOpcode();
13599
13600 // Handle predication by moving the SELECT into the operand of the MUL.
13601 SDValue Pred;
13602 if (Opc == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13603 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13604 Pred = Op1->getOperand(0);
13605 Op1 = Op1->getOperand(1);
13606 Opc = Op1->getOpcode();
13607 }
13608
13609 // Handle negation (sub-reduction).
13610 bool IsMLS = false;
13611 if (sd_match(Op1, m_Neg(m_Value(Tmp)))) {
13612 Op1 = Tmp;
13613 Opc = Op1->getOpcode();
13614 IsMLS = true;
13615 }
13616
13617 if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL)
13618 return SDValue();
13619
13620 SDValue LHS = Op1->getOperand(0);
13621 SDValue RHS = Op1->getOperand(1);
13622
13623 // After instcombine, negation for FP operations is on the RHS, so implement:
13624 // fmul(fpext(a), fneg(fpext(b)))
13625 //-> fmul(fpext(a), fpext(fneg(b)))
13626 if (sd_match(RHS, m_FNeg(m_Value(Tmp)))) {
13627 RHS = Tmp;
13628 IsMLS = true;
13629 }
13630
13631 // Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
13632 if (Opc == ISD::SHL) {
13633 APInt C;
13634 if (!ISD::isConstantSplatVector(RHS.getNode(), C))
13635 return SDValue();
13636
13637 RHS =
13638 DAG.getSplatVector(RHS.getValueType(), DL,
13639 DAG.getConstant(APInt(C.getBitWidth(), 1).shl(C), DL,
13640 RHS.getValueType().getScalarType()));
13641 Opc = ISD::MUL;
13642 }
13643
13644 if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(Op2)) &&
13646 return SDValue();
13647
13648 auto IsIntOrFPExtOpcode = [](unsigned int Opcode) {
13649 return (ISD::isExtOpcode(Opcode) || Opcode == ISD::FP_EXTEND);
13650 };
13651
13652 unsigned LHSOpcode = LHS->getOpcode();
13653 if (!IsIntOrFPExtOpcode(LHSOpcode))
13654 return SDValue();
13655
13656 SDValue LHSExtOp = LHS->getOperand(0);
13657 EVT LHSExtOpVT = LHSExtOp.getValueType();
13658
13659 // When Pred is non-zero, set Op = select(Pred, Op, splat(0)) and freeze
13660 // OtherOp to keep the same semantics when moving the selects into the MUL
13661 // operands.
13662 auto ApplyPredicate = [&](SDValue &Op, SDValue &OtherOp) {
13663 if (Pred) {
13664 EVT OpVT = Op.getValueType();
13665 SDValue Zero = OpVT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, OpVT)
13666 : DAG.getConstant(0, DL, OpVT);
13667 Op = DAG.getSelect(DL, OpVT, Pred, Op, Zero);
13668 OtherOp = DAG.getFreeze(OtherOp);
13669 }
13670 };
13671
13672 // Generate an MLA or MLS.
13673 auto GetMLA = [&](unsigned Opc, SDValue Acc, SDValue LHS,
13674 SDValue RHS) -> SDValue {
13675 EVT AccVT = Acc.getValueType();
13676 return IsMLS ? DAG.getPartialReduceMLS(Opc, DL, Acc, LHS, RHS)
13677 : DAG.getNode(Opc, DL, AccVT, Acc, LHS, RHS);
13678 };
13679
13680 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
13681 // -> partial_reduce_*mla(acc, x, C)
13682 APInt C;
13683 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
13684 // TODO: Make use of partial_reduce_sumla here
13685 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
13686 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13687 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
13688 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
13689 return SDValue();
13690
13691 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13694
13695 // Only perform these combines if the target supports folding
13696 // the extends into the operation.
13698 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13699 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13700 return SDValue();
13701
13702 SDValue C = DAG.getConstant(CTrunc, DL, LHSExtOpVT);
13703 ApplyPredicate(C, LHSExtOp);
13704 return GetMLA(NewOpcode, Acc, LHSExtOp, C);
13705 }
13706
13707 unsigned RHSOpcode = RHS->getOpcode();
13708 if (!IsIntOrFPExtOpcode(RHSOpcode))
13709 return SDValue();
13710
13711 SDValue RHSExtOp = RHS->getOperand(0);
13712 if (LHSExtOpVT != RHSExtOp.getValueType())
13713 return SDValue();
13714
13715 unsigned NewOpc;
13716 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13717 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13718 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13719 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13720 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13722 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13724 std::swap(LHSExtOp, RHSExtOp);
13725 } else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) {
13726 NewOpc = ISD::PARTIAL_REDUCE_FMLA;
13727 } else
13728 return SDValue();
13729 // For a 2-stage extend the signedness of both of the extends must match
13730 // If the mul has the same type, there is no outer extend, and thus we
13731 // can simply use the inner extends to pick the result node.
13732 // TODO: extend to handle nonneg zext as sext
13733 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13734 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13735 NewOpc != N->getOpcode())
13736 return SDValue();
13737
13738 // Only perform these combines if the target supports folding
13739 // the extends into the operation.
13741 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13742 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13743 return SDValue();
13744
13745 ApplyPredicate(RHSExtOp, LHSExtOp);
13746 return GetMLA(NewOpc, Acc, LHSExtOp, RHSExtOp);
13747}
13748
13749// partial.reduce.*mla(acc, *ext(op), splat(1))
13750// -> partial.reduce.*mla(acc, op, splat(trunc(1)))
13751// partial.reduce.sumla(acc, sext(op), splat(1))
13752// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13753//
13754// partial.reduce.*mla(acc, sel(p, *ext(op), splat(0)), splat(1))
13755// -> partial.reduce.*mla(acc, sel(p, op, splat(0)), splat(trunc(1)))
13756SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13757 SDLoc DL(N);
13758 SDValue Tmp;
13759 SDValue Acc = N->getOperand(0);
13760 SDValue Op1 = N->getOperand(1);
13761 SDValue Op2 = N->getOperand(2);
13762
13764 return SDValue();
13765
13766 SDValue Pred;
13767 unsigned Op1Opcode = Op1.getOpcode();
13768 if (Op1Opcode == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13769 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13770 Pred = Op1->getOperand(0);
13771 Op1 = Op1->getOperand(1);
13772 Op1Opcode = Op1->getOpcode();
13773 }
13774
13775 // Handle negation (sub-reduction).
13776 bool IsMLS = false;
13777 if (sd_match(Op1, m_AnyOf(m_Neg(m_Value(Tmp)), m_FNeg(m_Value(Tmp))))) {
13778 Op1 = Tmp;
13779 Op1Opcode = Op1.getOpcode();
13780 IsMLS = true;
13781 }
13782
13783 if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
13784 return SDValue();
13785
13786 bool Op1IsSigned =
13787 Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND;
13788 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13789 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13790 if (Op1IsSigned != NodeIsSigned &&
13791 Op1.getValueType().getVectorElementType() != AccElemVT)
13792 return SDValue();
13793
13794 unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13796 : Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA
13798
13799 SDValue UnextOp1 = Op1.getOperand(0);
13800 EVT UnextOp1VT = UnextOp1.getValueType();
13801 auto *Context = DAG.getContext();
13803 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13804 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13805 return SDValue();
13806
13807 SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13808 ? DAG.getConstantFP(1, DL, UnextOp1VT)
13809 : DAG.getConstant(1, DL, UnextOp1VT);
13810
13811 if (Pred) {
13812 SDValue Zero = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13813 ? DAG.getConstantFP(0, DL, UnextOp1VT)
13814 : DAG.getConstant(0, DL, UnextOp1VT);
13815 Constant = DAG.getSelect(DL, UnextOp1VT, Pred, Constant, Zero);
13816 }
13817 EVT AccVT = Acc.getValueType();
13818 return IsMLS ? DAG.getPartialReduceMLS(NewOpcode, DL, Acc, UnextOp1, Constant)
13819 : DAG.getNode(NewOpcode, DL, AccVT, Acc, UnextOp1, Constant);
13820}
13821
13822SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13823 auto *SLD = cast<VPStridedLoadSDNode>(N);
13824 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13825 // Combine strided loads with unit-stride to a regular VP load.
13826 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13827 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13828 SDValue NewLd = DAG.getLoadVP(
13829 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13830 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13831 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13832 SLD->getMemOperand(), SLD->isExpandingLoad());
13833 return CombineTo(N, NewLd, NewLd.getValue(1));
13834 }
13835 return SDValue();
13836}
13837
13838/// A vector select of 2 constant vectors can be simplified to math/logic to
13839/// avoid a variable select instruction and possibly avoid constant loads.
13840SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13841 SDValue Cond = N->getOperand(0);
13842 SDValue N1 = N->getOperand(1);
13843 SDValue N2 = N->getOperand(2);
13844 EVT VT = N->getValueType(0);
13845 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13849 return SDValue();
13850
13851 // Check if we can use the condition value to increment/decrement a single
13852 // constant value. This simplifies a select to an add and removes a constant
13853 // load/materialization from the general case.
13854 bool AllAddOne = true;
13855 bool AllSubOne = true;
13856 unsigned Elts = VT.getVectorNumElements();
13857 for (unsigned i = 0; i != Elts; ++i) {
13858 SDValue N1Elt = N1.getOperand(i);
13859 SDValue N2Elt = N2.getOperand(i);
13860 if (N1Elt.isUndef())
13861 continue;
13862 // N2 should not contain undef values since it will be reused in the fold.
13863 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13864 AllAddOne = false;
13865 AllSubOne = false;
13866 break;
13867 }
13868
13869 const APInt &C1 = N1Elt->getAsAPIntVal();
13870 const APInt &C2 = N2Elt->getAsAPIntVal();
13871 if (C1 != C2 + 1)
13872 AllAddOne = false;
13873 if (C1 != C2 - 1)
13874 AllSubOne = false;
13875 }
13876
13877 // Further simplifications for the extra-special cases where the constants are
13878 // all 0 or all -1 should be implemented as folds of these patterns.
13879 SDLoc DL(N);
13880 if (AllAddOne || AllSubOne) {
13881 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13882 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13883 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13884 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13885 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13886 }
13887
13888 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13889 APInt Pow2C;
13890 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13891 isNullOrNullSplat(N2)) {
13892 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13893 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13894 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13895 }
13896
13898 return V;
13899
13900 // The general case for select-of-constants:
13901 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13902 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13903 // leave that to a machine-specific pass.
13904 return SDValue();
13905}
13906
13907SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13908 SDValue N0 = N->getOperand(0);
13909 SDValue N1 = N->getOperand(1);
13910 SDValue N2 = N->getOperand(2);
13911 SDLoc DL(N);
13912
13913 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13914 return V;
13915
13917 return V;
13918
13919 return SDValue();
13920}
13921
13923 SDValue FVal,
13924 const TargetLowering &TLI,
13925 SelectionDAG &DAG,
13926 const SDLoc &DL) {
13927 EVT VT = TVal.getValueType();
13928 if (!TLI.isTypeLegal(VT))
13929 return SDValue();
13930
13931 EVT CondVT = Cond.getValueType();
13932 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13933
13934 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13935 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13936 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13937 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13938
13939 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13940 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13941 return SDValue();
13942
13943 // select Cond, 0, 0 → 0
13944 if (IsTAllZero && IsFAllZero) {
13945 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13946 : DAG.getConstant(0, DL, VT);
13947 }
13948
13949 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13950 APInt TValAPInt;
13951 if (Cond.getOpcode() == ISD::SETCC &&
13952 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13953 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13954 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13955 TValAPInt.isOne() &&
13956 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13959 SDValue LHS = Cond.getOperand(0);
13960 SDValue ShiftC =
13962 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, ShiftC);
13963 return DAG.getNode(ISD::OR, DL, VT, Shift, TVal);
13964 }
13965
13966 // To use the condition operand as a bitwise mask, it must have elements that
13967 // are the same size as the select elements. i.e, the condition operand must
13968 // have already been promoted from the IR select condition type <N x i1>.
13969 // Don't check if the types themselves are equal because that excludes
13970 // vector floating-point selects.
13971 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13972 return SDValue();
13973
13974 // Cond value must be 'sign splat' to be converted to a logical op.
13975 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13976 return SDValue();
13977
13978 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13979 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13980 Cond.getOpcode() == ISD::SETCC &&
13981 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13982 CondVT) {
13983 if (IsTAllZero || IsFAllOne) {
13984 SDValue CC = Cond.getOperand(2);
13986 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13987 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13988 InverseCC);
13989 std::swap(TVal, FVal);
13990 std::swap(IsTAllOne, IsFAllOne);
13991 std::swap(IsTAllZero, IsFAllZero);
13992 }
13993 }
13994
13996 "Select condition no longer all-sign bits");
13997
13998 // select Cond, -1, 0 → bitcast Cond
13999 if (IsTAllOne && IsFAllZero)
14000 return DAG.getBitcast(VT, Cond);
14001
14002 // select Cond, -1, x → or Cond, x
14003 if (IsTAllOne) {
14004 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
14005 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
14006 return DAG.getBitcast(VT, Or);
14007 }
14008
14009 // select Cond, x, 0 → and Cond, x
14010 if (IsFAllZero) {
14011 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
14012 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
14013 return DAG.getBitcast(VT, And);
14014 }
14015
14016 // select Cond, 0, x -> and not(Cond), x
14017 if (IsTAllZero &&
14019 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
14020 SDValue And =
14021 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
14022 return DAG.getBitcast(VT, And);
14023 }
14024
14025 return SDValue();
14026}
14027
14028SDValue DAGCombiner::visitVSELECT(SDNode *N) {
14029 SDValue N0 = N->getOperand(0);
14030 SDValue N1 = N->getOperand(1);
14031 SDValue N2 = N->getOperand(2);
14032 EVT VT = N->getValueType(0);
14033 SDLoc DL(N);
14034
14035 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
14036 return V;
14037
14039 return V;
14040
14041 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
14042 if (!TLI.isTargetCanonicalSelect(N))
14043 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
14044 return DAG.getSelect(DL, VT, F, N2, N1, N->getFlags());
14045
14046 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
14047 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
14050 TLI.getBooleanContents(N0.getValueType()) ==
14052 return DAG.getNode(
14053 ISD::ADD, DL, N1.getValueType(), N2,
14054 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
14055 }
14056
14057 // Canonicalize integer abs.
14058 // vselect (setg[te] X, 0), X, -X ->
14059 // vselect (setgt X, -1), X, -X ->
14060 // vselect (setl[te] X, 0), -X, X ->
14061 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
14062 if (N0.getOpcode() == ISD::SETCC) {
14063 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
14065 bool isAbs = false;
14066 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
14067
14068 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
14069 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
14070 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
14072 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
14073 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
14075
14076 if (isAbs) {
14078 return DAG.getNode(ISD::ABS, DL, VT, LHS);
14079
14080 SDValue Shift = DAG.getNode(
14081 ISD::SRA, DL, VT, LHS,
14082 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
14083 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
14084 AddToWorklist(Shift.getNode());
14085 AddToWorklist(Add.getNode());
14086 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
14087 }
14088
14089 // vselect x, y (fcmp lt x, y) -> fminnum x, y
14090 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
14091 //
14092 // This is OK if we don't care about what happens if either operand is a
14093 // NaN.
14094 //
14095 if (N0.hasOneUse() &&
14096 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
14097 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
14098 return FMinMax;
14099 }
14100
14101 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
14102 return S;
14103 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
14104 return S;
14105
14106 // If this select has a condition (setcc) with narrower operands than the
14107 // select, try to widen the compare to match the select width.
14108 // TODO: This should be extended to handle any constant.
14109 // TODO: This could be extended to handle non-loading patterns, but that
14110 // requires thorough testing to avoid regressions.
14111 if (isNullOrNullSplat(RHS)) {
14112 EVT NarrowVT = LHS.getValueType();
14114 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
14115 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
14116 unsigned WideWidth = WideVT.getScalarSizeInBits();
14117 bool IsSigned = isSignedIntSetCC(CC);
14118 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14119 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && SetCCWidth != 1 &&
14120 SetCCWidth < WideWidth &&
14121 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
14122 LoadSDNode *Ld = cast<LoadSDNode>(LHS);
14123
14124 if (TLI.isLoadLegalOrCustom(WideVT, NarrowVT, Ld->getAlign(),
14125 Ld->getAddressSpace(), LoadExtOpcode,
14126 false)) {
14127 // Both compare operands can be widened for free. The LHS can use an
14128 // extended load, and the RHS is a constant:
14129 // vselect (ext (setcc load(X), C)), N1, N2 -->
14130 // vselect (setcc extload(X), C'), N1, N2
14131 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14132 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
14133 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
14134 EVT WideSetCCVT = getSetCCResultType(WideVT);
14135 SDValue WideSetCC =
14136 DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
14137 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
14138 }
14139 }
14140 }
14141
14142 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
14143 return ABD;
14144
14145 // Match VSELECTs into add with unsigned saturation.
14146 if (hasOperation(ISD::UADDSAT, VT)) {
14147 // Check if one of the arms of the VSELECT is vector with all bits set.
14148 // If it's on the left side invert the predicate to simplify logic below.
14149 SDValue Other;
14150 ISD::CondCode SatCC = CC;
14152 Other = N2;
14153 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
14154 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
14155 Other = N1;
14156 }
14157
14158 if (Other && Other.getOpcode() == ISD::ADD) {
14159 SDValue CondLHS = LHS, CondRHS = RHS;
14160 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
14161
14162 // Canonicalize condition operands.
14163 if (SatCC == ISD::SETUGE) {
14164 std::swap(CondLHS, CondRHS);
14165 SatCC = ISD::SETULE;
14166 }
14167
14168 // We can test against either of the addition operands.
14169 // x <= x+y ? x+y : ~0 --> uaddsat x, y
14170 // x+y >= x ? x+y : ~0 --> uaddsat x, y
14171 if (SatCC == ISD::SETULE && Other == CondRHS &&
14172 (OpLHS == CondLHS || OpRHS == CondLHS))
14173 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
14174
14175 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
14176 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
14177 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
14178 CondLHS == OpLHS) {
14179 // If the RHS is a constant we have to reverse the const
14180 // canonicalization.
14181 // x >= ~C ? x+C : ~0 --> uaddsat x, C
14182 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
14183 return Cond->getAPIntValue() == ~Op->getAPIntValue();
14184 };
14185 if (SatCC == ISD::SETULE &&
14186 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
14187 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
14188 }
14189 }
14190 }
14191
14192 // Match VSELECTs into sub with unsigned saturation.
14193 if (hasOperation(ISD::USUBSAT, VT)) {
14194 // Check if one of the arms of the VSELECT is a zero vector. If it's on
14195 // the left side invert the predicate to simplify logic below.
14196 SDValue Other;
14197 ISD::CondCode SatCC = CC;
14199 Other = N2;
14200 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
14202 Other = N1;
14203 }
14204
14205 // zext(x) >= y ? trunc(zext(x) - y) : 0
14206 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
14207 // zext(x) > y ? trunc(zext(x) - y) : 0
14208 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
14209 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
14210 Other.getOperand(0).getOpcode() == ISD::SUB &&
14211 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
14212 SDValue OpLHS = Other.getOperand(0).getOperand(0);
14213 SDValue OpRHS = Other.getOperand(0).getOperand(1);
14214 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
14215 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
14216 DAG, DL))
14217 return R;
14218 }
14219
14220 if (Other && Other.getNumOperands() == 2) {
14221 SDValue CondRHS = RHS;
14222 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
14223
14224 if (OpLHS == LHS) {
14225 // Look for a general sub with unsigned saturation first.
14226 // x >= y ? x-y : 0 --> usubsat x, y
14227 // x > y ? x-y : 0 --> usubsat x, y
14228 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
14229 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
14230 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
14231
14232 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
14233 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
14234 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
14235 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
14236 // If the RHS is a constant we have to reverse the const
14237 // canonicalization.
14238 // x > C-1 ? x+-C : 0 --> usubsat x, C
14239 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
14240 return (!Op && !Cond) ||
14241 (Op && Cond &&
14242 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
14243 };
14244 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
14245 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
14246 /*AllowUndefs*/ true)) {
14247 OpRHS = DAG.getNegative(OpRHS, DL, VT);
14248 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
14249 }
14250
14251 // Another special case: If C was a sign bit, the sub has been
14252 // canonicalized into a xor.
14253 // FIXME: Would it be better to use computeKnownBits to
14254 // determine whether it's safe to decanonicalize the xor?
14255 // x s< 0 ? x^C : 0 --> usubsat x, C
14256 APInt SplatValue;
14257 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
14258 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
14260 SplatValue.isSignMask()) {
14261 // Note that we have to rebuild the RHS constant here to
14262 // ensure we don't rely on particular values of undef lanes.
14263 OpRHS = DAG.getConstant(SplatValue, DL, VT);
14264 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
14265 }
14266 }
14267 }
14268 }
14269 }
14270 }
14271
14272 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
14273 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
14274 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
14275 return UMin;
14276 }
14277
14278 if (SimplifySelectOps(N, N1, N2))
14279 return SDValue(N, 0); // Don't revisit N.
14280
14281 // Fold (vselect all_ones, N1, N2) -> N1
14283 return N1;
14284 // Fold (vselect all_zeros, N1, N2) -> N2
14286 return N2;
14287
14288 // The ConvertSelectToConcatVector function is assuming both the above
14289 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
14290 // and addressed.
14291 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
14294 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
14295 return CV;
14296 }
14297
14298 if (SDValue V = foldVSelectOfConstants(N))
14299 return V;
14300
14301 if (hasOperation(ISD::SRA, VT))
14303 return V;
14304
14306 return SDValue(N, 0);
14307
14308 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
14309 return V;
14310
14311 return SDValue();
14312}
14313
14314SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
14315 SDValue N0 = N->getOperand(0);
14316 SDValue N1 = N->getOperand(1);
14317 SDValue N2 = N->getOperand(2);
14318 SDValue N3 = N->getOperand(3);
14319 SDValue N4 = N->getOperand(4);
14320 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
14321 SDLoc DL(N);
14322
14323 // fold select_cc lhs, rhs, x, x, cc -> x
14324 if (N2 == N3)
14325 return N2;
14326
14327 // select_cc bool, 0, x, y, seteq -> select bool, y, x
14328 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
14329 isNullConstant(N1))
14330 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
14331
14332 // Determine if the condition we're dealing with is constant
14333 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
14334 CC, DL, false)) {
14335 AddToWorklist(SCC.getNode());
14336
14337 // cond always true -> true val
14338 // cond always false -> false val
14339 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
14340 return SCCC->isZero() ? N3 : N2;
14341
14342 // When the condition is UNDEF, just return the first operand. This is
14343 // coherent the DAG creation, no setcc node is created in this case
14344 if (SCC->isUndef())
14345 return N2;
14346
14347 // Fold to a simpler select_cc
14348 if (SCC.getOpcode() == ISD::SETCC) {
14349 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
14350 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
14351 SCC.getOperand(2), SCC->getFlags());
14352 }
14353 }
14354
14355 // If we can fold this based on the true/false value, do so.
14356 if (SimplifySelectOps(N, N2, N3))
14357 return SDValue(N, 0); // Don't revisit N.
14358
14359 // fold select_cc into other things, such as min/max/abs
14360 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
14361}
14362
14363SDValue DAGCombiner::visitSETCC(SDNode *N) {
14364 // setcc is very commonly used as an argument to brcond or cond_loop. This
14365 // pattern also lend itself to numerous combines and, as a result, it is
14366 // desired we keep the argument to a brcond as a setcc as much as possible.
14367 bool PreferSetCC =
14368 N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::BRCOND ||
14369 N->user_begin()->getOpcode() == ISD::COND_LOOP);
14370
14371 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14372 EVT VT = N->getValueType(0);
14373 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14374 SDLoc DL(N);
14375
14376 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
14377 // If we prefer to have a setcc, and we don't, we'll try our best to
14378 // recreate one using rebuildSetCC.
14379 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
14380 SDValue NewSetCC = rebuildSetCC(Combined);
14381
14382 // We don't have anything interesting to combine to.
14383 if (NewSetCC.getNode() == N)
14384 return SDValue();
14385
14386 if (NewSetCC)
14387 return NewSetCC;
14388 }
14389 return Combined;
14390 }
14391
14392 // Optimize
14393 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
14394 // or
14395 // 2) (icmp eq/ne X, (rotate X, C1))
14396 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
14397 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
14398 // Then:
14399 // If C1 is a power of 2, then the rotate and shift+and versions are
14400 // equivilent, so we can interchange them depending on target preference.
14401 // Otherwise, if we have the shift+and version we can interchange srl/shl
14402 // which inturn affects the constant C0. We can use this to get better
14403 // constants again determined by target preference.
14404 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
14405 auto IsAndWithShift = [](SDValue A, SDValue B) {
14406 return A.getOpcode() == ISD::AND &&
14407 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
14408 A.getOperand(0) == B.getOperand(0);
14409 };
14410 auto IsRotateWithOp = [](SDValue A, SDValue B) {
14411 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
14412 B.getOperand(0) == A;
14413 };
14414 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
14415 bool IsRotate = false;
14416
14417 // Find either shift+and or rotate pattern.
14418 if (IsAndWithShift(N0, N1)) {
14419 AndOrOp = N0;
14420 ShiftOrRotate = N1;
14421 } else if (IsAndWithShift(N1, N0)) {
14422 AndOrOp = N1;
14423 ShiftOrRotate = N0;
14424 } else if (IsRotateWithOp(N0, N1)) {
14425 IsRotate = true;
14426 AndOrOp = N0;
14427 ShiftOrRotate = N1;
14428 } else if (IsRotateWithOp(N1, N0)) {
14429 IsRotate = true;
14430 AndOrOp = N1;
14431 ShiftOrRotate = N0;
14432 }
14433
14434 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
14435 (IsRotate || AndOrOp.hasOneUse())) {
14436 EVT OpVT = N0.getValueType();
14437 // Get constant shift/rotate amount and possibly mask (if its shift+and
14438 // variant).
14439 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
14440 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
14441 /*AllowTrunc*/ false);
14442 if (CNode == nullptr)
14443 return std::nullopt;
14444 return CNode->getAPIntValue();
14445 };
14446 std::optional<APInt> AndCMask =
14447 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
14448 std::optional<APInt> ShiftCAmt =
14449 GetAPIntValue(ShiftOrRotate.getOperand(1));
14450 unsigned NumBits = OpVT.getScalarSizeInBits();
14451
14452 // We found constants.
14453 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
14454 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
14455 // Check that the constants meet the constraints.
14456 bool CanTransform = IsRotate;
14457 if (!CanTransform) {
14458 // Check that mask and shift compliment eachother
14459 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
14460 // Check that we are comparing all bits
14461 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
14462 // Check that the and mask is correct for the shift
14463 CanTransform &=
14464 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
14465 }
14466
14467 // See if target prefers another shift/rotate opcode.
14468 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
14469 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
14470 // Transform is valid and we have a new preference.
14471 if (CanTransform && NewShiftOpc != ShiftOpc) {
14472 SDValue NewShiftOrRotate =
14473 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
14474 ShiftOrRotate.getOperand(1));
14475 SDValue NewAndOrOp = SDValue();
14476
14477 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
14478 APInt NewMask =
14479 NewShiftOpc == ISD::SHL
14480 ? APInt::getHighBitsSet(NumBits,
14481 NumBits - ShiftCAmt->getZExtValue())
14482 : APInt::getLowBitsSet(NumBits,
14483 NumBits - ShiftCAmt->getZExtValue());
14484 NewAndOrOp =
14485 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
14486 DAG.getConstant(NewMask, DL, OpVT));
14487 } else {
14488 NewAndOrOp = ShiftOrRotate.getOperand(0);
14489 }
14490
14491 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
14492 }
14493 }
14494 }
14495 }
14496 return SDValue();
14497}
14498
14499SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
14500 SDValue LHS = N->getOperand(0);
14501 SDValue RHS = N->getOperand(1);
14502 SDValue Carry = N->getOperand(2);
14503 SDValue Cond = N->getOperand(3);
14504
14505 // If Carry is false, fold to a regular SETCC.
14506 if (isNullConstant(Carry))
14507 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
14508
14509 return SDValue();
14510}
14511
14512/// Check if N satisfies:
14513/// N is used once.
14514/// N is a Load.
14515/// The load is compatible with ExtOpcode. It means
14516/// If load has explicit zero/sign extension, ExpOpcode must have the same
14517/// extension.
14518/// Otherwise returns true.
14519static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
14520 if (!N.hasOneUse())
14521 return false;
14522
14523 if (!isa<LoadSDNode>(N))
14524 return false;
14525
14526 LoadSDNode *Load = cast<LoadSDNode>(N);
14527 ISD::LoadExtType LoadExt = Load->getExtensionType();
14528 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
14529 return true;
14530
14531 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
14532 // extension.
14533 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
14534 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
14535 return false;
14536
14537 return true;
14538}
14539
14540/// Fold
14541/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
14542/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
14543/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
14544/// This function is called by the DAGCombiner when visiting sext/zext/aext
14545/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14547 SelectionDAG &DAG, const SDLoc &DL,
14548 CombineLevel Level) {
14549 unsigned Opcode = N->getOpcode();
14550 SDValue N0 = N->getOperand(0);
14551 EVT VT = N->getValueType(0);
14552 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
14553 Opcode == ISD::ANY_EXTEND) &&
14554 "Expected EXTEND dag node in input!");
14555
14556 SDValue Cond, Op1, Op2;
14558 m_Value(Op2)))))
14559 return SDValue();
14560
14561 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
14562 return SDValue();
14563
14564 auto ExtLoadOpcode = ISD::EXTLOAD;
14565 if (Opcode == ISD::SIGN_EXTEND)
14566 ExtLoadOpcode = ISD::SEXTLOAD;
14567 else if (Opcode == ISD::ZERO_EXTEND)
14568 ExtLoadOpcode = ISD::ZEXTLOAD;
14569
14570 // Illegal VSELECT may ISel fail if happen after legalization (DAG
14571 // Combine2), so we should conservatively check the OperationAction.
14572 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
14573 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
14574 if (!TLI.isLoadLegal(VT, Load1->getMemoryVT(), Load1->getAlign(),
14575 Load1->getAddressSpace(), ExtLoadOpcode, false) ||
14576 !TLI.isLoadLegal(VT, Load2->getMemoryVT(), Load2->getAlign(),
14577 Load2->getAddressSpace(), ExtLoadOpcode, false) ||
14578 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
14580 return SDValue();
14581
14582 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
14583 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
14584 return DAG.getSelect(DL, VT, Cond, Ext1, Ext2);
14585}
14586
14587/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
14588/// a build_vector of constants.
14589/// This function is called by the DAGCombiner when visiting sext/zext/aext
14590/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14591/// Vector extends are not folded if operations are legal; this is to
14592/// avoid introducing illegal build_vector dag nodes.
14594 const TargetLowering &TLI,
14595 SelectionDAG &DAG, bool LegalTypes) {
14596 unsigned Opcode = N->getOpcode();
14597 SDValue N0 = N->getOperand(0);
14598 EVT VT = N->getValueType(0);
14599
14600 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
14601 "Expected EXTEND dag node in input!");
14602
14603 // fold (sext c1) -> c1
14604 // fold (zext c1) -> c1
14605 // fold (aext c1) -> c1
14606 if (isa<ConstantSDNode>(N0))
14607 return DAG.getNode(Opcode, DL, VT, N0);
14608
14609 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14610 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
14611 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14612 if (N0->getOpcode() == ISD::SELECT) {
14613 SDValue Op1 = N0->getOperand(1);
14614 SDValue Op2 = N0->getOperand(2);
14615 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
14616 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
14617 // For any_extend, choose sign extension of the constants to allow a
14618 // possible further transform to sign_extend_inreg.i.e.
14619 //
14620 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
14621 // t2: i64 = any_extend t1
14622 // -->
14623 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
14624 // -->
14625 // t4: i64 = sign_extend_inreg t3
14626 unsigned FoldOpc = Opcode;
14627 if (FoldOpc == ISD::ANY_EXTEND)
14628 FoldOpc = ISD::SIGN_EXTEND;
14629 return DAG.getSelect(DL, VT, N0->getOperand(0),
14630 DAG.getNode(FoldOpc, DL, VT, Op1),
14631 DAG.getNode(FoldOpc, DL, VT, Op2));
14632 }
14633 }
14634
14635 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
14636 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
14637 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
14638 EVT SVT = VT.getScalarType();
14639 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
14641 return SDValue();
14642
14643 // We can fold this node into a build_vector.
14644 unsigned VTBits = SVT.getSizeInBits();
14645 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
14647 unsigned NumElts = VT.getVectorNumElements();
14648
14649 for (unsigned i = 0; i != NumElts; ++i) {
14650 SDValue Op = N0.getOperand(i);
14651 if (Op.isUndef()) {
14652 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
14653 Elts.push_back(DAG.getUNDEF(SVT));
14654 else
14655 Elts.push_back(DAG.getConstant(0, DL, SVT));
14656 continue;
14657 }
14658
14659 SDLoc DL(Op);
14660 // Get the constant value and if needed trunc it to the size of the type.
14661 // Nodes like build_vector might have constants wider than the scalar type.
14662 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
14663 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
14664 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
14665 else
14666 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
14667 }
14668
14669 return DAG.getBuildVector(VT, DL, Elts);
14670}
14671
14672// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
14673// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
14674// transformation. Returns true if extension are possible and the above
14675// mentioned transformation is profitable.
14677 unsigned ExtOpc,
14678 SmallVectorImpl<SDNode *> &ExtendNodes,
14679 const TargetLowering &TLI) {
14680 bool HasCopyToRegUses = false;
14681 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
14682 for (SDUse &Use : N0->uses()) {
14683 SDNode *User = Use.getUser();
14684 if (User == N)
14685 continue;
14686 if (Use.getResNo() != N0.getResNo())
14687 continue;
14688 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
14689 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
14691 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
14692 // Sign bits will be lost after a zext.
14693 return false;
14694 bool Add = false;
14695 for (unsigned i = 0; i != 2; ++i) {
14696 SDValue UseOp = User->getOperand(i);
14697 if (UseOp == N0)
14698 continue;
14699 if (!isa<ConstantSDNode>(UseOp))
14700 return false;
14701 Add = true;
14702 }
14703 if (Add)
14704 ExtendNodes.push_back(User);
14705 continue;
14706 }
14707 // If truncates aren't free and there are users we can't
14708 // extend, it isn't worthwhile.
14709 if (!isTruncFree)
14710 return false;
14711 // Remember if this value is live-out.
14712 if (User->getOpcode() == ISD::CopyToReg)
14713 HasCopyToRegUses = true;
14714 }
14715
14716 if (HasCopyToRegUses) {
14717 bool BothLiveOut = false;
14718 for (SDUse &Use : N->uses()) {
14719 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
14720 BothLiveOut = true;
14721 break;
14722 }
14723 }
14724 if (BothLiveOut)
14725 // Both unextended and extended values are live out. There had better be
14726 // a good reason for the transformation.
14727 return !ExtendNodes.empty();
14728 }
14729 return true;
14730}
14731
14732void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14733 SDValue OrigLoad, SDValue ExtLoad,
14734 ISD::NodeType ExtType) {
14735 // Extend SetCC uses if necessary.
14736 SDLoc DL(ExtLoad);
14737 for (SDNode *SetCC : SetCCs) {
14739
14740 for (unsigned j = 0; j != 2; ++j) {
14741 SDValue SOp = SetCC->getOperand(j);
14742 if (SOp == OrigLoad)
14743 Ops.push_back(ExtLoad);
14744 else
14745 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
14746 }
14747
14748 Ops.push_back(SetCC->getOperand(2));
14749 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
14750 }
14751}
14752
14753// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14754SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14755 SDValue N0 = N->getOperand(0);
14756 EVT DstVT = N->getValueType(0);
14757 EVT SrcVT = N0.getValueType();
14758
14759 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14760 N->getOpcode() == ISD::ZERO_EXTEND) &&
14761 "Unexpected node type (not an extend)!");
14762
14763 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
14764 // For example, on a target with legal v4i32, but illegal v8i32, turn:
14765 // (v8i32 (sext (v8i16 (load x))))
14766 // into:
14767 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14768 // (v4i32 (sextload (x + 16)))))
14769 // Where uses of the original load, i.e.:
14770 // (v8i16 (load x))
14771 // are replaced with:
14772 // (v8i16 (truncate
14773 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14774 // (v4i32 (sextload (x + 16)))))))
14775 //
14776 // This combine is only applicable to illegal, but splittable, vectors.
14777 // All legal types, and illegal non-vector types, are handled elsewhere.
14778 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14779 //
14780 if (N0->getOpcode() != ISD::LOAD)
14781 return SDValue();
14782
14783 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14784
14785 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14786 !N0.hasOneUse() || !LN0->isSimple() ||
14787 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14789 return SDValue();
14790
14792 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14793 return SDValue();
14794
14795 ISD::LoadExtType ExtType =
14796 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14797
14798 // Try to split the vector types to get down to legal types.
14799 EVT SplitSrcVT = SrcVT;
14800 EVT SplitDstVT = DstVT;
14801 while (!TLI.isLoadLegalOrCustom(SplitDstVT, SplitSrcVT, LN0->getAlign(),
14802 LN0->getAddressSpace(), ExtType, false) &&
14803 SplitSrcVT.getVectorNumElements() > 1) {
14804 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14805 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14806 }
14807
14808 if (!TLI.isLoadLegalOrCustom(SplitDstVT, SplitSrcVT, LN0->getAlign(),
14809 LN0->getAddressSpace(), ExtType, false))
14810 return SDValue();
14811
14812 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14813
14814 SDLoc DL(N);
14815 const unsigned NumSplits =
14816 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14817 const unsigned Stride = SplitSrcVT.getStoreSize();
14820
14821 SDValue BasePtr = LN0->getBasePtr();
14822 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14823 const unsigned Offset = Idx * Stride;
14824
14826 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14827 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14828 SplitSrcVT, LN0->getBaseAlign(),
14829 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14830
14831 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14832
14833 Loads.push_back(SplitLoad.getValue(0));
14834 Chains.push_back(SplitLoad.getValue(1));
14835 }
14836
14837 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14838 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14839
14840 // Simplify TF.
14841 AddToWorklist(NewChain.getNode());
14842
14843 CombineTo(N, NewValue);
14844
14845 // Replace uses of the original load (before extension)
14846 // with a truncate of the concatenated sextloaded vectors.
14847 SDValue Trunc =
14848 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14849 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14850 CombineTo(N0.getNode(), Trunc, NewChain);
14851 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14852}
14853
14854// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14855// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14856SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14857 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14858 EVT VT = N->getValueType(0);
14859 EVT OrigVT = N->getOperand(0).getValueType();
14860 if (TLI.isZExtFree(OrigVT, VT))
14861 return SDValue();
14862
14863 // and/or/xor
14864 SDValue N0 = N->getOperand(0);
14865 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14866 N0.getOperand(1).getOpcode() != ISD::Constant ||
14867 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14868 return SDValue();
14869
14870 // shl/shr
14871 SDValue N1 = N0->getOperand(0);
14872 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14873 N1.getOperand(1).getOpcode() != ISD::Constant ||
14874 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14875 return SDValue();
14876
14877 // load
14878 if (!isa<LoadSDNode>(N1.getOperand(0)))
14879 return SDValue();
14880 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14881 EVT MemVT = Load->getMemoryVT();
14882 if (!TLI.isLoadLegal(VT, MemVT, Load->getAlign(), Load->getAddressSpace(),
14883 ISD::ZEXTLOAD, false) ||
14884 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14885 return SDValue();
14886
14887
14888 // If the shift op is SHL, the logic op must be AND, otherwise the result
14889 // will be wrong.
14890 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14891 return SDValue();
14892
14893 if (!N0.hasOneUse() || !N1.hasOneUse())
14894 return SDValue();
14895
14897 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14898 ISD::ZERO_EXTEND, SetCCs, TLI))
14899 return SDValue();
14900
14901 // Actually do the transformation.
14902 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14903 Load->getChain(), Load->getBasePtr(),
14904 Load->getMemoryVT(), Load->getMemOperand());
14905
14906 SDLoc DL1(N1);
14907 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14908 N1.getOperand(1));
14909
14910 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14911 SDLoc DL0(N0);
14912 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14913 DAG.getConstant(Mask, DL0, VT));
14914
14915 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14916 CombineTo(N, And);
14917 if (SDValue(Load, 0).hasOneUse()) {
14918 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14919 } else {
14920 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14921 Load->getValueType(0), ExtLoad);
14922 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14923 }
14924
14925 // N0 is dead at this point.
14926 recursivelyDeleteUnusedNodes(N0.getNode());
14927
14928 return SDValue(N,0); // Return N so it doesn't get rechecked!
14929}
14930
14931/// If we're narrowing or widening the result of a vector select and the final
14932/// size is the same size as a setcc (compare) feeding the select, then try to
14933/// apply the cast operation to the select's operands because matching vector
14934/// sizes for a select condition and other operands should be more efficient.
14935SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14936 unsigned CastOpcode = Cast->getOpcode();
14937 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14938 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14939 CastOpcode == ISD::FP_ROUND) &&
14940 "Unexpected opcode for vector select narrowing/widening");
14941
14942 // We only do this transform before legal ops because the pattern may be
14943 // obfuscated by target-specific operations after legalization. Do not create
14944 // an illegal select op, however, because that may be difficult to lower.
14945 EVT VT = Cast->getValueType(0);
14946 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14947 return SDValue();
14948
14949 SDValue VSel = Cast->getOperand(0);
14950 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14951 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14952 return SDValue();
14953
14954 // Does the setcc have the same vector size as the casted select?
14955 SDValue SetCC = VSel.getOperand(0);
14956 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14957 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14958 return SDValue();
14959
14960 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14961 SDValue A = VSel.getOperand(1);
14962 SDValue B = VSel.getOperand(2);
14963 SDValue CastA, CastB;
14964 SDLoc DL(Cast);
14965 if (CastOpcode == ISD::FP_ROUND) {
14966 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14967 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14968 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14969 } else {
14970 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14971 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14972 }
14973 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14974}
14975
14976// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14977// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14979 const TargetLowering &TLI, EVT VT,
14980 bool LegalOperations, SDNode *N,
14981 SDValue N0, ISD::LoadExtType ExtLoadType) {
14982 bool Frozen = N0.getOpcode() == ISD::FREEZE;
14983 auto *OldExtLoad = dyn_cast<LoadSDNode>(Frozen ? N0.getOperand(0) : N0);
14984 if (!OldExtLoad)
14985 return SDValue();
14986
14987 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD)
14988 ? ISD::isSEXTLoad(OldExtLoad)
14989 : ISD::isZEXTLoad(OldExtLoad);
14990 if ((!isAExtLoad && !ISD::isEXTLoad(OldExtLoad)) ||
14991 !ISD::isUNINDEXEDLoad(OldExtLoad) || !OldExtLoad->hasNUsesOfValue(1, 0))
14992 return SDValue();
14993
14994 EVT MemVT = OldExtLoad->getMemoryVT();
14995 if ((LegalOperations || !OldExtLoad->isSimple() || VT.isVector()) &&
14996 !TLI.isLoadLegal(VT, MemVT, OldExtLoad->getAlign(),
14997 OldExtLoad->getAddressSpace(), ExtLoadType, false))
14998 return SDValue();
14999
15000 SDLoc DL(OldExtLoad);
15001 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, DL, VT, OldExtLoad->getChain(),
15002 OldExtLoad->getBasePtr(), MemVT,
15003 OldExtLoad->getMemOperand());
15004 SDValue Res = ExtLoad;
15005 if (Frozen) {
15006 Res = DAG.getFreeze(ExtLoad);
15007 Res = DAG.getNode(
15008 ExtLoadType == ISD::SEXTLOAD ? ISD::AssertSext : ISD::AssertZext, DL,
15009 Res.getValueType(), Res,
15010 DAG.getValueType(OldExtLoad->getValueType(0).getScalarType()));
15011 }
15012 Combiner.CombineTo(N, Res);
15013 DAG.ReplaceAllUsesOfValueWith(SDValue(OldExtLoad, 1), ExtLoad.getValue(1));
15014 if (N0->use_empty())
15015 Combiner.recursivelyDeleteUnusedNodes(N0.getNode());
15016 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15017}
15018
15019// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
15020// Only generate vector extloads when 1) they're legal, and 2) they are
15021// deemed desirable by the target. NonNegZExt can be set to true if a zero
15022// extend has the nonneg flag to allow use of sextload if profitable.
15024 const TargetLowering &TLI, EVT VT,
15025 bool LegalOperations, SDNode *N, SDValue N0,
15026 ISD::LoadExtType ExtLoadType,
15027 ISD::NodeType ExtOpc,
15028 bool NonNegZExt = false) {
15029
15030 bool Frozen = N0.getOpcode() == ISD::FREEZE;
15031 SDValue Freeze = Frozen ? N0 : SDValue();
15032 auto *Load = dyn_cast<LoadSDNode>(Frozen ? N0.getOperand(0) : N0);
15033 // TODO: Support multiple uses of the load when frozen.
15034 if (!Load || !ISD::isNON_EXTLoad(Load) || !ISD::isUNINDEXEDLoad(Load) ||
15035 (Frozen && !Load->hasNUsesOfValue(1, 0)))
15036 return {};
15037
15038 // If this is zext nneg, see if it would make sense to treat it as a sext.
15039 if (NonNegZExt) {
15040 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
15041 "Unexpected load type or opcode");
15042 for (SDNode *User : Load->users()) {
15043 if (User->getOpcode() == ISD::SETCC) {
15045 if (ISD::isSignedIntSetCC(CC)) {
15046 ExtLoadType = ISD::SEXTLOAD;
15047 ExtOpc = ISD::SIGN_EXTEND;
15048 break;
15049 }
15050 }
15051 }
15052 }
15053
15054 // TODO: isFixedLengthVector() should be removed and any negative effects on
15055 // code generation being the result of that target's implementation of
15056 // isVectorLoadExtDesirable().
15057 if ((LegalOperations || VT.isFixedLengthVector() || !Load->isSimple()) &&
15058 !TLI.isLoadLegal(VT, Load->getValueType(0), Load->getAlign(),
15059 Load->getAddressSpace(), ExtLoadType, false))
15060 return {};
15061
15062 bool DoXform = true;
15064 if (!N0->hasOneUse())
15065 DoXform = ExtendUsesToFormExtLoad(VT, N, Frozen ? Freeze : SDValue(Load, 0),
15066 ExtOpc, SetCCs, TLI);
15067 if (VT.isVector())
15068 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
15069 if (!DoXform)
15070 return {};
15071
15072 SDLoc DL(Load);
15073
15074 auto SalvageDbgValue = [&](SDDbgValue *Dbg, SDValue Old, SDValue New,
15075 unsigned OldBits, unsigned NewBits,
15076 bool IsSigned) {
15077 SmallVector<SDDbgOperand> Locs = Dbg->copyLocationOps();
15078 bool Changed = false;
15079
15080 bool IsVariadic = Dbg->isVariadic();
15081 SmallVector<unsigned, 2> AffectedArgs;
15082
15083 for (unsigned I = 0, E = Locs.size(); I != E; ++I) {
15084 SDDbgOperand &Op = Locs[I];
15085 if (Op.getKind() != SDDbgOperand::SDNODE)
15086 continue;
15087
15088 if (Op.getSDNode() == Old.getNode() && Op.getResNo() == Old.getResNo()) {
15089 Op = SDDbgOperand::fromNode(New.getNode(), New.getResNo());
15090 Changed = true;
15091
15092 if (IsVariadic)
15093 AffectedArgs.push_back(I);
15094 }
15095 }
15096
15097 if (!Changed)
15098 return;
15099
15100 const DIExpression *OldExpr = Dbg->getExpression();
15101 const DIExpression *NewExpr = nullptr;
15102
15103 if (!IsVariadic) {
15104 // Do not introduce DW_OP_LLVM_arg into ordinary single-location
15105 // DBG_VALUEs.
15106 NewExpr = DIExpression::appendExt(OldExpr, NewBits, OldBits, IsSigned);
15107 } else {
15108 auto ExtOps = DIExpression::getExtOps(NewBits, OldBits, IsSigned);
15109
15111
15112 for (unsigned ArgNo : AffectedArgs)
15114 /*StackValue=*/false);
15115 }
15116
15117 SDDbgValue *NewDV = DAG.getDbgValueList(
15118 Dbg->getVariable(), const_cast<DIExpression *>(NewExpr), Locs,
15119 Dbg->getAdditionalDependencies(), Dbg->isIndirect(), Dbg->getDebugLoc(),
15120 Dbg->getOrder(), Dbg->isVariadic());
15121
15122 Dbg->setIsInvalidated();
15123 Dbg->setIsEmitted();
15124 DAG.AddDbgValue(NewDV, /*isParameter=*/false);
15125 };
15126
15127 // Because we are replacing a load and a s|z ext with a load-s|z ext
15128 // instruction, the dbg_value attached to the load will be of a smaller bit
15129 // width, and we have to add a DW_OP_LLVM_convert expression to get the
15130 // correct size.
15131 auto SalvageToOldLoadSize = [&](SDValue Old, SDValue New, bool IsSigned) {
15133 DAG.GetDbgValues(Old.getNode()).begin(),
15134 DAG.GetDbgValues(Old.getNode()).end());
15135
15136 unsigned VarBitsOld = Old.getValueSizeInBits();
15137 unsigned VarBitsNew = New.getValueSizeInBits();
15138
15139 for (SDDbgValue *Dbg : DbgVals) {
15140 if (Dbg->isInvalidated())
15141 continue;
15142
15143 SalvageDbgValue(Dbg, Old, New, VarBitsOld, VarBitsNew, IsSigned);
15144 }
15145 };
15146
15147 SDValue ExtLoad =
15148 DAG.getExtLoad(ExtLoadType, DL, VT, Load->getChain(), Load->getBasePtr(),
15149 Load->getValueType(0), Load->getMemOperand());
15150 SDValue Res = ExtLoad;
15151 if (Frozen) {
15152 Res = DAG.getFreeze(ExtLoad);
15153 Res = DAG.getNode(ExtLoadType == ISD::SEXTLOAD ? ISD::AssertSext
15155 DL, Res.getValueType(), Res,
15156 DAG.getValueType(Load->getValueType(0).getScalarType()));
15157 }
15158 Combiner.ExtendSetCCUses(SetCCs, N0, Res, ExtOpc);
15159 // If the load value is used only by N, replace it via CombineTo N.
15160 bool NoReplaceTrunc = N0.hasOneUse();
15161 if (N->getHasDebugValue()) {
15162 SDValue OldExtValue(N, 0);
15163 DAG.transferDbgValues(OldExtValue, ExtLoad);
15164 }
15165 if (NoReplaceTrunc) {
15166 bool IsSigned = N->getOpcode() == ISD::SIGN_EXTEND;
15167 if (Load->getHasDebugValue()) {
15168 SDValue OldLoadVal(Load, 0);
15169 SalvageToOldLoadSize(OldLoadVal, ExtLoad, IsSigned);
15170 }
15171 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
15172 Combiner.CombineTo(N, Res);
15173 Combiner.recursivelyDeleteUnusedNodes(N0.getNode());
15174 } else {
15175 Combiner.CombineTo(N, Res);
15176 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Load->getValueType(0), Res);
15177 if (Frozen) {
15178 Combiner.CombineTo(Freeze.getNode(), Trunc);
15179 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
15180 } else {
15181 Combiner.CombineTo(Load, Trunc, ExtLoad.getValue(1));
15182 }
15183 }
15184 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15185}
15186
15187static SDValue
15189 bool LegalOperations, SDNode *N, SDValue N0,
15190 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
15191 if (!N0.hasOneUse())
15192 return SDValue();
15193
15195 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
15196 return SDValue();
15197
15198 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
15199 !TLI.isLoadLegalOrCustom(VT, Ld->getValueType(0), Ld->getAlign(),
15200 Ld->getAddressSpace(), ExtLoadType, false))
15201 return SDValue();
15202
15203 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
15204 return SDValue();
15205
15206 SDLoc dl(Ld);
15207 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
15208 SDValue NewLoad = DAG.getMaskedLoad(
15209 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
15210 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
15211 ExtLoadType, Ld->isExpandingLoad());
15212 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
15213 return NewLoad;
15214}
15215
15216// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
15218 const TargetLowering &TLI, EVT VT,
15219 SDValue N0,
15220 ISD::LoadExtType ExtLoadType) {
15221 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
15222 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
15223 return {};
15224 EVT MemoryVT = ALoad->getMemoryVT();
15225 if (!TLI.isLoadLegal(VT, MemoryVT, ALoad->getAlign(),
15226 ALoad->getAddressSpace(), ExtLoadType, true))
15227 return {};
15228 // Can't fold into ALoad if it is already extending differently.
15229 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
15230 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
15231 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
15232 return {};
15233
15234 EVT OrigVT = ALoad->getValueType(0);
15235 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
15236 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
15237 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
15238 ALoad->getBasePtr(), ALoad->getMemOperand()));
15240 SDValue(ALoad, 0),
15241 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
15242 // Update the chain uses.
15243 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
15244 return SDValue(NewALoad, 0);
15245}
15246
15248 bool LegalOperations) {
15249 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
15250 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
15251
15252 SDValue SetCC = N->getOperand(0);
15253 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
15254 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
15255 return SDValue();
15256
15257 SDValue X = SetCC.getOperand(0);
15258 SDValue Ones = SetCC.getOperand(1);
15259 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
15260 EVT VT = N->getValueType(0);
15261 EVT XVT = X.getValueType();
15262 // setge X, C is canonicalized to setgt, so we do not need to match that
15263 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
15264 // not require the 'not' op.
15265 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
15266 // Invert and smear/shift the sign bit:
15267 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
15268 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
15269 SDLoc DL(N);
15270 unsigned ShCt = VT.getSizeInBits() - 1;
15271 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15272 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
15273 SDValue NotX = DAG.getNOT(DL, X, VT);
15274 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
15275 auto ShiftOpcode =
15276 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
15277 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
15278 }
15279 }
15280 return SDValue();
15281}
15282
15283SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
15284 SDValue N0 = N->getOperand(0);
15285 if (N0.getOpcode() != ISD::SETCC)
15286 return SDValue();
15287
15288 SDValue N00 = N0.getOperand(0);
15289 SDValue N01 = N0.getOperand(1);
15291 EVT VT = N->getValueType(0);
15292 EVT N00VT = N00.getValueType();
15293 SDLoc DL(N);
15294
15295 // Propagate fast-math-flags.
15296 SDNodeFlags Flags = N0->getFlags();
15297
15298 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
15299 // the same size as the compared operands. Try to optimize sext(setcc())
15300 // if this is the case.
15301 if (VT.isVector() && !LegalOperations &&
15302 TLI.getBooleanContents(N00VT) ==
15304 EVT SVT = getSetCCResultType(N00VT);
15305
15306 // If we already have the desired type, don't change it.
15307 if (SVT != N0.getValueType()) {
15308 // We know that the # elements of the results is the same as the
15309 // # elements of the compare (and the # elements of the compare result
15310 // for that matter). Check to see that they are the same size. If so,
15311 // we know that the element size of the sext'd result matches the
15312 // element size of the compare operands.
15313 if (VT.getSizeInBits() == SVT.getSizeInBits())
15314 return DAG.getSetCC(DL, VT, N00, N01, CC, /*Chain=*/{},
15315 /*Signaling=*/false, Flags);
15316
15317 // If the desired elements are smaller or larger than the source
15318 // elements, we can use a matching integer vector type and then
15319 // truncate/sign extend.
15320 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
15321 if (SVT == MatchingVecType) {
15322 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC,
15323 /*Chain=*/{}, /*Signaling=*/false, Flags);
15324 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
15325 }
15326 }
15327
15328 // Try to eliminate the sext of a setcc by zexting the compare operands.
15329 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
15331 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
15332 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
15333 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15334
15335 // We have an unsupported narrow vector compare op that would be legal
15336 // if extended to the destination type. See if the compare operands
15337 // can be freely extended to the destination type.
15338 auto IsFreeToExtend = [&](SDValue V) {
15339 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
15340 return true;
15341 // Match a simple, non-extended load that can be converted to a
15342 // legal {z/s}ext-load.
15343 // TODO: Allow widening of an existing {z/s}ext-load?
15344 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
15345 ISD::isUNINDEXEDLoad(V.getNode())))
15346 return false;
15347
15348 LoadSDNode *Ld = cast<LoadSDNode>(V.getNode());
15349
15350 if (!Ld->isSimple() ||
15351 !TLI.isLoadLegal(VT, V.getValueType(), Ld->getAlign(),
15352 Ld->getAddressSpace(), LoadOpcode, false))
15353 return false;
15354
15355 // Non-chain users of this value must either be the setcc in this
15356 // sequence or extends that can be folded into the new {z/s}ext-load.
15357 for (SDUse &Use : V->uses()) {
15358 // Skip uses of the chain and the setcc.
15359 SDNode *User = Use.getUser();
15360 if (Use.getResNo() != 0 || User == N0.getNode())
15361 continue;
15362 // Extra users must have exactly the same cast we are about to create.
15363 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
15364 // is enhanced similarly.
15365 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
15366 return false;
15367 }
15368 return true;
15369 };
15370
15371 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
15372 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
15373 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
15374 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC, /*Chain=*/{},
15375 /*Signaling=*/false, Flags);
15376 }
15377 }
15378 }
15379
15380 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
15381 // Here, T can be 1 or -1, depending on the type of the setcc and
15382 // getBooleanContents().
15383 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
15384
15385 // To determine the "true" side of the select, we need to know the high bit
15386 // of the value returned by the setcc if it evaluates to true.
15387 // If the type of the setcc is i1, then the true case of the select is just
15388 // sext(i1 1), that is, -1.
15389 // If the type of the setcc is larger (say, i8) then the value of the high
15390 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
15391 // of the appropriate width.
15392 SDValue ExtTrueVal = (SetCCWidth == 1)
15393 ? DAG.getAllOnesConstant(DL, VT)
15394 : DAG.getBoolConstant(true, DL, VT, N00VT);
15395 SDValue Zero = DAG.getConstant(0, DL, VT);
15396 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
15397 return SCC;
15398
15399 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
15400 EVT SetCCVT = getSetCCResultType(N00VT);
15401 // Don't do this transform for i1 because there's a select transform
15402 // that would reverse it.
15403 // TODO: We should not do this transform at all without a target hook
15404 // because a sext is likely cheaper than a select?
15405 if (SetCCVT.getScalarSizeInBits() != 1 &&
15406 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
15407 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC, /*Chain=*/{},
15408 /*Signaling=*/false, Flags);
15409 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero, Flags);
15410 }
15411 }
15412
15413 return SDValue();
15414}
15415
15416SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
15417 SDValue N0 = N->getOperand(0);
15418 EVT VT = N->getValueType(0);
15419 SDLoc DL(N);
15420
15421 if (VT.isVector())
15422 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
15423 return FoldedVOp;
15424
15425 // sext(undef) = 0 because the top bit will all be the same.
15426 if (N0.isUndef())
15427 return DAG.getConstant(0, DL, VT);
15428
15429 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15430 return Res;
15431
15432 // fold (sext (sext x)) -> (sext x)
15433 // fold (sext (aext x)) -> (sext x)
15434 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
15435 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
15436
15437 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15438 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15441 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
15442 N0.getOperand(0));
15443
15444 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
15445 SDValue N00 = N0.getOperand(0);
15446 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
15447 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
15448 // fold (sext (sext_inreg x)) -> (sext (trunc x))
15449 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
15450 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
15451 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
15452 }
15453
15454 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
15455 if (!LegalTypes || TLI.isTypeLegal(VT)) {
15456 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
15457 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
15458 N0->getOperand(1));
15459 }
15460 }
15461 }
15462
15463 if (N0.getOpcode() == ISD::TRUNCATE) {
15464 // fold (sext (truncate (load x))) -> (sext (smaller load x))
15465 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
15466 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15467 SDNode *oye = N0.getOperand(0).getNode();
15468 if (NarrowLoad.getNode() != N0.getNode()) {
15469 CombineTo(N0.getNode(), NarrowLoad);
15470 // CombineTo deleted the truncate, if needed, but not what's under it.
15471 AddToWorklist(oye);
15472 }
15473 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15474 }
15475
15476 // See if the value being truncated is already sign extended. If so, just
15477 // eliminate the trunc/sext pair.
15478 SDValue Op = N0.getOperand(0);
15479 unsigned OpBits = Op.getScalarValueSizeInBits();
15480 unsigned MidBits = N0.getScalarValueSizeInBits();
15481 unsigned DestBits = VT.getScalarSizeInBits();
15482
15483 if (N0->getFlags().hasNoSignedWrap() ||
15484 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
15485 if (OpBits == DestBits) {
15486 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
15487 // bits, it is already ready.
15488 return Op;
15489 }
15490
15491 if (OpBits < DestBits) {
15492 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
15493 // bits, just sext from i32.
15494 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
15495 }
15496
15497 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
15498 // bits, just truncate to i32.
15499 SDNodeFlags Flags;
15500 Flags.setNoSignedWrap(true);
15501 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
15502 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
15503 }
15504
15505 // fold (sext (truncate x)) -> (sextinreg x).
15506 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
15507 N0.getValueType())) {
15508 if (OpBits < DestBits)
15509 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
15510 else if (OpBits > DestBits)
15511 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
15512 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
15513 DAG.getValueType(N0.getValueType()));
15514 }
15515 }
15516
15517 // Try to simplify (sext (load x)).
15518 if (SDValue foldedExt =
15519 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15521 return foldedExt;
15522
15523 if (SDValue foldedExt =
15524 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15526 return foldedExt;
15527
15528 // fold (sext (load x)) to multiple smaller sextloads.
15529 // Only on illegal but splittable vectors.
15530 if (SDValue ExtLoad = CombineExtLoad(N))
15531 return ExtLoad;
15532
15533 // Try to simplify (sext (sextload x)).
15534 if (SDValue foldedExt = tryToFoldExtOfExtload(
15535 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
15536 return foldedExt;
15537
15538 // Try to simplify (sext (atomic_load x)).
15539 if (SDValue foldedExt =
15540 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
15541 return foldedExt;
15542
15543 // fold (sext (and/or/xor (load x), cst)) ->
15544 // (and/or/xor (sextload x), (sext cst))
15545 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
15546 isa<LoadSDNode>(N0.getOperand(0)) &&
15547 N0.getOperand(1).getOpcode() == ISD::Constant &&
15548 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15549 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15550 EVT MemVT = LN00->getMemoryVT();
15551 if (TLI.isLoadLegal(VT, MemVT, LN00->getAlign(), LN00->getAddressSpace(),
15552 ISD::SEXTLOAD, false) &&
15553 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
15555 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15556 ISD::SIGN_EXTEND, SetCCs, TLI);
15557 if (DoXform) {
15558 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
15559 LN00->getChain(), LN00->getBasePtr(),
15560 LN00->getMemoryVT(),
15561 LN00->getMemOperand());
15562 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
15563 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15564 ExtLoad, DAG.getConstant(Mask, DL, VT));
15565 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
15566 bool NoReplaceTruncAnd = !N0.hasOneUse();
15567 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15568 CombineTo(N, And);
15569 // If N0 has multiple uses, change other uses as well.
15570 if (NoReplaceTruncAnd) {
15571 SDValue TruncAnd =
15573 CombineTo(N0.getNode(), TruncAnd);
15574 }
15575 if (NoReplaceTrunc) {
15576 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15577 } else {
15578 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15579 LN00->getValueType(0), ExtLoad);
15580 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15581 }
15582 return SDValue(N,0); // Return N so it doesn't get rechecked!
15583 }
15584 }
15585 }
15586
15587 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15588 return V;
15589
15590 if (SDValue V = foldSextSetcc(N))
15591 return V;
15592
15593 // fold (sext x) -> (zext x) if the sign bit is known zero.
15594 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
15595 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
15596 DAG.SignBitIsZero(N0))
15597 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
15598
15599 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15600 return NewVSel;
15601
15602 // Eliminate this sign extend by doing a negation in the destination type:
15603 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
15604 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
15608 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
15609 return DAG.getNegative(Zext, DL, VT);
15610 }
15611 // Eliminate this sign extend by doing a decrement in the destination type:
15612 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
15613 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
15617 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15618 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
15619 }
15620
15621 // fold sext (not i1 X) -> add (zext i1 X), -1
15622 // TODO: This could be extended to handle bool vectors.
15623 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
15624 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
15625 TLI.isOperationLegal(ISD::ADD, VT)))) {
15626 // If we can eliminate the 'not', the sext form should be better
15627 if (SDValue NewXor = visitXOR(N0.getNode())) {
15628 // Returning N0 is a form of in-visit replacement that may have
15629 // invalidated N0.
15630 if (NewXor.getNode() == N0.getNode()) {
15631 // Return SDValue here as the xor should have already been replaced in
15632 // this sext.
15633 return SDValue();
15634 }
15635
15636 // Return a new sext with the new xor.
15637 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
15638 }
15639
15640 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
15641 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
15642 }
15643
15644 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15645 return Res;
15646
15647 return SDValue();
15648}
15649
15650/// Given an extending node with a pop-count operand, if the target does not
15651/// support a pop-count in the narrow source type but does support it in the
15652/// destination type, widen the pop-count to the destination type.
15653static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
15654 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
15655 Extend->getOpcode() == ISD::ANY_EXTEND) &&
15656 "Expected extend op");
15657
15658 SDValue CtPop = Extend->getOperand(0);
15659 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
15660 return SDValue();
15661
15662 EVT VT = Extend->getValueType(0);
15663 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15666 return SDValue();
15667
15668 // zext (ctpop X) --> ctpop (zext X)
15669 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
15670 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
15671}
15672
15673// If we have (zext (abs X)) where X is a type that will be promoted by type
15674// legalization, convert to (abs (sext X)). But don't extend past a legal type.
15675static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
15676 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
15677
15678 EVT VT = Extend->getValueType(0);
15679 if (VT.isVector())
15680 return SDValue();
15681
15682 SDValue Abs = Extend->getOperand(0);
15683 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
15684 return SDValue();
15685
15686 EVT AbsVT = Abs.getValueType();
15687 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15688 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
15690 return SDValue();
15691
15692 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
15693
15694 SDValue SExt =
15695 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
15696 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
15697 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
15698}
15699
15700SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
15701 SDValue N0 = N->getOperand(0);
15702 EVT VT = N->getValueType(0);
15703 SDLoc DL(N);
15704
15705 if (VT.isVector())
15706 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
15707 return FoldedVOp;
15708
15709 // zext(undef) = 0
15710 if (N0.isUndef())
15711 return DAG.getConstant(0, DL, VT);
15712
15713 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15714 return Res;
15715
15716 // fold (zext (zext x)) -> (zext x)
15717 // fold (zext (aext x)) -> (zext x)
15718 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15719 SDNodeFlags Flags;
15720 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15721 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15722 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
15723 }
15724
15725 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15726 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15729 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
15730
15731 // fold (zext (truncate x)) -> (zext x) or
15732 // (zext (truncate x)) -> (truncate x)
15733 // This is valid when the truncated bits of x are already zero.
15734 SDValue Op;
15735 KnownBits Known;
15736 if (isTruncateOf(DAG, N0, Op, Known)) {
15737 APInt TruncatedBits =
15738 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
15739 APInt(Op.getScalarValueSizeInBits(), 0) :
15740 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
15741 N0.getScalarValueSizeInBits(),
15742 std::min(Op.getScalarValueSizeInBits(),
15743 VT.getScalarSizeInBits()));
15744 if (TruncatedBits.isSubsetOf(Known.Zero)) {
15745 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15746 DAG.salvageDebugInfo(*N0.getNode());
15747
15748 return ZExtOrTrunc;
15749 }
15750 }
15751
15752 // fold (zext (truncate x)) -> (and x, mask)
15753 if (N0.getOpcode() == ISD::TRUNCATE) {
15754 // fold (zext (truncate (load x))) -> (zext (smaller load x))
15755 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
15756 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15757 SDNode *oye = N0.getOperand(0).getNode();
15758 if (NarrowLoad.getNode() != N0.getNode()) {
15759 CombineTo(N0.getNode(), NarrowLoad);
15760 // CombineTo deleted the truncate, if needed, but not what's under it.
15761 AddToWorklist(oye);
15762 }
15763 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15764 }
15765
15766 EVT SrcVT = N0.getOperand(0).getValueType();
15767 EVT MinVT = N0.getValueType();
15768
15769 if (N->getFlags().hasNonNeg()) {
15770 SDValue Op = N0.getOperand(0);
15771 unsigned OpBits = SrcVT.getScalarSizeInBits();
15772 unsigned MidBits = MinVT.getScalarSizeInBits();
15773 unsigned DestBits = VT.getScalarSizeInBits();
15774
15775 if (N0->getFlags().hasNoSignedWrap() ||
15776 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
15777 if (OpBits == DestBits) {
15778 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
15779 // bits, it is already ready.
15780 return Op;
15781 }
15782
15783 if (OpBits < DestBits) {
15784 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
15785 // bits, just sext from i32.
15786 // FIXME: This can probably be ZERO_EXTEND nneg?
15787 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
15788 }
15789
15790 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
15791 // bits, just truncate to i32.
15792 SDNodeFlags Flags;
15793 Flags.setNoSignedWrap(true);
15794 Flags.setNoUnsignedWrap(true);
15795 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
15796 }
15797 }
15798
15799 // Try to mask before the extension to avoid having to generate a larger mask,
15800 // possibly over several sub-vectors.
15801 if (SrcVT.bitsLT(VT) && VT.isVector()) {
15802 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
15804 SDValue Op = N0.getOperand(0);
15805 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
15806 AddToWorklist(Op.getNode());
15807 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15808 // Transfer the debug info; the new node is equivalent to N0.
15809 DAG.transferDbgValues(N0, ZExtOrTrunc);
15810 return ZExtOrTrunc;
15811 }
15812 }
15813
15814 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
15815 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15816 AddToWorklist(Op.getNode());
15817 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
15818 // We may safely transfer the debug info describing the truncate node over
15819 // to the equivalent and operation.
15820 DAG.transferDbgValues(N0, And);
15821 return And;
15822 }
15823 }
15824
15825 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
15826 // if either of the casts is not free.
15827 if (N0.getOpcode() == ISD::AND &&
15828 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15829 N0.getOperand(1).getOpcode() == ISD::Constant &&
15830 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
15831 !TLI.isZExtFree(N0.getValueType(), VT))) {
15832 SDValue X = N0.getOperand(0).getOperand(0);
15833 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
15834 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15835 return DAG.getNode(ISD::AND, DL, VT,
15836 X, DAG.getConstant(Mask, DL, VT));
15837 }
15838
15839 // Try to simplify (zext (load x)).
15840 if (SDValue foldedExt = tryToFoldExtOfLoad(
15841 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
15842 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
15843 return foldedExt;
15844
15845 if (SDValue foldedExt =
15846 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15848 return foldedExt;
15849
15850 // fold (zext (load x)) to multiple smaller zextloads.
15851 // Only on illegal but splittable vectors.
15852 if (SDValue ExtLoad = CombineExtLoad(N))
15853 return ExtLoad;
15854
15855 // Try to simplify (zext (atomic_load x)).
15856 if (SDValue foldedExt =
15857 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
15858 return foldedExt;
15859
15860 // fold (zext (and/or/xor (load x), cst)) ->
15861 // (and/or/xor (zextload x), (zext cst))
15862 // Unless (and (load x) cst) will match as a zextload already and has
15863 // additional users, or the zext is already free.
15864 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
15865 isa<LoadSDNode>(N0.getOperand(0)) &&
15866 N0.getOperand(1).getOpcode() == ISD::Constant &&
15867 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15868 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15869 EVT MemVT = LN00->getMemoryVT();
15870 if (TLI.isLoadLegal(VT, MemVT, LN00->getAlign(), LN00->getAddressSpace(),
15871 ISD::ZEXTLOAD, false) &&
15872 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15873 bool DoXform = true;
15875 if (!N0.hasOneUse()) {
15876 if (N0.getOpcode() == ISD::AND) {
15877 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
15878 EVT LoadResultTy = AndC->getValueType(0);
15879 EVT ExtVT;
15880 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
15881 DoXform = false;
15882 }
15883 }
15884 if (DoXform)
15885 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15886 ISD::ZERO_EXTEND, SetCCs, TLI);
15887 if (DoXform) {
15888 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
15889 LN00->getChain(), LN00->getBasePtr(),
15890 LN00->getMemoryVT(),
15891 LN00->getMemOperand());
15892 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15893 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15894 ExtLoad, DAG.getConstant(Mask, DL, VT));
15895 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
15896 bool NoReplaceTruncAnd = !N0.hasOneUse();
15897 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15898 CombineTo(N, And);
15899 // If N0 has multiple uses, change other uses as well.
15900 if (NoReplaceTruncAnd) {
15901 SDValue TruncAnd =
15903 CombineTo(N0.getNode(), TruncAnd);
15904 }
15905 if (NoReplaceTrunc) {
15906 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15907 } else {
15908 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15909 LN00->getValueType(0), ExtLoad);
15910 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15911 }
15912 return SDValue(N,0); // Return N so it doesn't get rechecked!
15913 }
15914 }
15915 }
15916
15917 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15918 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15919 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15920 return ZExtLoad;
15921
15922 // Try to simplify (zext (zextload x)).
15923 if (SDValue foldedExt = tryToFoldExtOfExtload(
15924 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15925 return foldedExt;
15926
15927 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15928 return V;
15929
15930 if (N0.getOpcode() == ISD::SETCC) {
15931 // Propagate fast-math-flags.
15932 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15933
15934 // Only do this before legalize for now.
15935 if (!LegalOperations && VT.isVector() &&
15936 N0.getValueType().getVectorElementType() == MVT::i1) {
15937 EVT N00VT = N0.getOperand(0).getValueType();
15938 if (getSetCCResultType(N00VT) == N0.getValueType())
15939 return SDValue();
15940
15941 // We know that the # elements of the results is the same as the #
15942 // elements of the compare (and the # elements of the compare result for
15943 // that matter). Check to see that they are the same size. If so, we know
15944 // that the element size of the sext'd result matches the element size of
15945 // the compare operands.
15946 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15947 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15948 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15949 N0.getOperand(1), N0.getOperand(2));
15950 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15951 }
15952
15953 // If the desired elements are smaller or larger than the source
15954 // elements we can use a matching integer vector type and then
15955 // truncate/any extend followed by zext_in_reg.
15956 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15957 SDValue VsetCC =
15958 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15959 N0.getOperand(1), N0.getOperand(2));
15960 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15961 N0.getValueType());
15962 }
15963
15964 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15965 EVT N0VT = N0.getValueType();
15966 EVT N00VT = N0.getOperand(0).getValueType();
15967 if (SDValue SCC = SimplifySelectCC(
15968 DL, N0.getOperand(0), N0.getOperand(1),
15969 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15970 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15971 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15972 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15973 }
15974
15975 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15976 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15977 !TLI.isZExtFree(N0, VT)) {
15978 SDValue ShVal = N0.getOperand(0);
15979 SDValue ShAmt = N0.getOperand(1);
15980 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15981 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15982 if (N0.getOpcode() == ISD::SHL) {
15983 // If the original shl may be shifting out bits, do not perform this
15984 // transformation.
15985 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15986 ShVal.getOperand(0).getValueSizeInBits();
15987 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15988 // If the shift is too large, then see if we can deduce that the
15989 // shift is safe anyway.
15990
15991 // Check if the bits being shifted out are known to be zero.
15992 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15993 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15994 return SDValue();
15995 }
15996 }
15997
15998 // Ensure that the shift amount is wide enough for the shifted value.
15999 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
16000 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
16001
16002 return DAG.getNode(N0.getOpcode(), DL, VT,
16003 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
16004 }
16005 }
16006 }
16007
16008 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16009 return NewVSel;
16010
16011 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
16012 return NewCtPop;
16013
16014 if (SDValue V = widenAbs(N, DAG))
16015 return V;
16016
16017 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
16018 return Res;
16019
16020 // CSE zext nneg with sext if the zext is not free.
16021 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
16022 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
16023 if (CSENode)
16024 return SDValue(CSENode, 0);
16025 }
16026
16027 return SDValue();
16028}
16029
16030SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
16031 SDValue N0 = N->getOperand(0);
16032 EVT VT = N->getValueType(0);
16033 SDLoc DL(N);
16034
16035 // aext(undef) = undef
16036 if (N0.isUndef())
16037 return DAG.getUNDEF(VT);
16038
16039 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
16040 return Res;
16041
16042 // fold (aext (aext x)) -> (aext x)
16043 // fold (aext (zext x)) -> (zext x)
16044 // fold (aext (sext x)) -> (sext x)
16045 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
16046 N0.getOpcode() == ISD::SIGN_EXTEND) {
16047 SDNodeFlags Flags;
16048 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16049 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16050 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16051 }
16052
16053 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
16054 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
16055 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
16059 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
16060
16061 // fold (aext (truncate (load x))) -> (aext (smaller load x))
16062 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
16063 if (N0.getOpcode() == ISD::TRUNCATE) {
16064 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
16065 SDNode *oye = N0.getOperand(0).getNode();
16066 if (NarrowLoad.getNode() != N0.getNode()) {
16067 CombineTo(N0.getNode(), NarrowLoad);
16068 // CombineTo deleted the truncate, if needed, but not what's under it.
16069 AddToWorklist(oye);
16070 }
16071 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16072 }
16073 }
16074
16075 // fold (aext (truncate x))
16076 if (N0.getOpcode() == ISD::TRUNCATE)
16077 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
16078
16079 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
16080 // if the trunc is not free.
16081 if (N0.getOpcode() == ISD::AND &&
16082 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
16083 N0.getOperand(1).getOpcode() == ISD::Constant &&
16084 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
16085 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
16086 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
16087 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
16088 return DAG.getNode(ISD::AND, DL, VT, X, Y);
16089 }
16090
16091 // fold (aext (load x)) -> (aext (truncate (extload x)))
16092 // None of the supported targets knows how to perform load and any_ext
16093 // on vectors in one instruction, so attempt to fold to zext instead.
16094 if (VT.isVector()) {
16095 // Try to simplify (zext (load x)).
16096 if (SDValue foldedExt =
16097 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
16099 return foldedExt;
16100 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
16102 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16103 if (TLI.isLoadLegalOrCustom(VT, N0.getValueType(), LN0->getAlign(),
16104 LN0->getAddressSpace(), ISD::EXTLOAD, false)) {
16105 bool DoXform = true;
16107 if (!N0.hasOneUse())
16108 DoXform =
16109 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
16110 if (DoXform) {
16111 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
16112 LN0->getBasePtr(), N0.getValueType(),
16113 LN0->getMemOperand());
16114 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
16115 // If the load value is used only by N, replace it via CombineTo N.
16116 bool NoReplaceTrunc = N0.hasOneUse();
16117 CombineTo(N, ExtLoad);
16118 if (NoReplaceTrunc) {
16119 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
16120 recursivelyDeleteUnusedNodes(LN0);
16121 } else {
16122 SDValue Trunc =
16123 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
16124 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
16125 }
16126 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16127 }
16128 }
16129 }
16130
16131 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
16132 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
16133 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
16134 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
16135 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
16136 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16137 ISD::LoadExtType ExtType = LN0->getExtensionType();
16138 EVT MemVT = LN0->getMemoryVT();
16139 if (!LegalOperations ||
16140 TLI.isLoadLegal(VT, MemVT, LN0->getAlign(), LN0->getAddressSpace(),
16141 ExtType, false)) {
16142 SDValue ExtLoad =
16143 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
16144 MemVT, LN0->getMemOperand());
16145 CombineTo(N, ExtLoad);
16146 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
16147 recursivelyDeleteUnusedNodes(LN0);
16148 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16149 }
16150 }
16151
16152 if (N0.getOpcode() == ISD::SETCC) {
16153 // Propagate fast-math-flags.
16154 SDNodeFlags Flags = N0->getFlags();
16155 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
16156
16157 // For vectors:
16158 // aext(setcc) -> vsetcc
16159 // aext(setcc) -> truncate(vsetcc)
16160 // aext(setcc) -> aext(vsetcc)
16161 // Only do this before legalize for now.
16162 if (VT.isVector() && !LegalOperations) {
16163 EVT N00VT = N0.getOperand(0).getValueType();
16164 if (getSetCCResultType(N00VT) == N0.getValueType())
16165 return SDValue();
16166
16167 // We know that the # elements of the results is the same as the
16168 // # elements of the compare (and the # elements of the compare result
16169 // for that matter). Check to see that they are the same size. If so,
16170 // we know that the element size of the sext'd result matches the
16171 // element size of the compare operands.
16172 if (VT.getSizeInBits() == N00VT.getSizeInBits())
16173 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
16174 cast<CondCodeSDNode>(N0.getOperand(2))->get(),
16175 /*Chain=*/{}, /*Signaling=*/false, Flags);
16176
16177 // If the desired elements are smaller or larger than the source
16178 // elements we can use a matching integer vector type and then
16179 // truncate/any extend
16180 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
16181 SDValue VsetCC = DAG.getSetCC(
16182 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
16183 cast<CondCodeSDNode>(N0.getOperand(2))->get(), /*Chain=*/{},
16184 /*Signaling=*/false, Flags);
16185 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
16186 }
16187
16188 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
16189 if (SDValue SCC = SimplifySelectCC(
16190 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
16191 DAG.getConstant(0, DL, VT),
16192 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
16193 return SCC;
16194 }
16195
16196 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
16197 return NewCtPop;
16198
16199 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
16200 return Res;
16201
16202 return SDValue();
16203}
16204
16205SDValue DAGCombiner::visitAssertExt(SDNode *N) {
16206 unsigned Opcode = N->getOpcode();
16207 SDValue N0 = N->getOperand(0);
16208 SDValue N1 = N->getOperand(1);
16209 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
16210
16211 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
16212 if (N0.getOpcode() == Opcode &&
16213 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
16214 return N0;
16215
16216 // fold (assert?ext c, vt) -> c
16217 if (isa<ConstantSDNode>(N0))
16218 return N0;
16219
16220 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
16221 N0.getOperand(0).getOpcode() == Opcode) {
16222 // We have an assert, truncate, assert sandwich. Make one stronger assert
16223 // by asserting on the smallest asserted type to the larger source type.
16224 // This eliminates the later assert:
16225 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
16226 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
16227 SDLoc DL(N);
16228 SDValue BigA = N0.getOperand(0);
16229 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
16230 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
16231 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
16232 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
16233 BigA.getOperand(0), MinAssertVTVal);
16234 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
16235 }
16236
16237 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
16238 // than X. Just move the AssertZext in front of the truncate and drop the
16239 // AssertSExt.
16240 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
16242 Opcode == ISD::AssertZext) {
16243 SDValue BigA = N0.getOperand(0);
16244 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
16245 if (AssertVT.bitsLT(BigA_AssertVT)) {
16246 SDLoc DL(N);
16247 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
16248 BigA.getOperand(0), N1);
16249 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
16250 }
16251 }
16252
16253 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
16255 const APInt &Mask = N0.getConstantOperandAPInt(1);
16256
16257 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
16258 // than X, and the And doesn't change the lower iX bits, we can move the
16259 // AssertZext in front of the And and drop the AssertSext.
16260 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
16261 SDValue BigA = N0.getOperand(0);
16262 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
16263 if (AssertVT.bitsLT(BigA_AssertVT) &&
16264 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
16265 SDLoc DL(N);
16266 SDValue NewAssert =
16267 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
16268 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
16269 N0.getOperand(1));
16270 }
16271 }
16272
16273 // Remove AssertZext entirely if the mask guarantees the assertion cannot
16274 // fail.
16275 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
16276 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
16277 return N0;
16278 }
16279
16280 return SDValue();
16281}
16282
16283SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
16284 SDLoc DL(N);
16285
16286 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
16287 SDValue N0 = N->getOperand(0);
16288
16289 // Fold (assertalign (assertalign x, AL0), AL1) ->
16290 // (assertalign x, max(AL0, AL1))
16291 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
16292 return DAG.getAssertAlign(DL, N0.getOperand(0),
16293 std::max(AL, AAN->getAlign()));
16294
16295 // In rare cases, there are trivial arithmetic ops in source operands. Sink
16296 // this assert down to source operands so that those arithmetic ops could be
16297 // exposed to the DAG combining.
16298 switch (N0.getOpcode()) {
16299 default:
16300 break;
16301 case ISD::ADD:
16302 case ISD::PTRADD:
16303 case ISD::SUB: {
16304 unsigned AlignShift = Log2(AL);
16305 SDValue LHS = N0.getOperand(0);
16306 SDValue RHS = N0.getOperand(1);
16307 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
16308 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
16309 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
16310 if (LHSAlignShift < AlignShift)
16311 LHS = DAG.getAssertAlign(DL, LHS, AL);
16312 if (RHSAlignShift < AlignShift)
16313 RHS = DAG.getAssertAlign(DL, RHS, AL);
16314 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
16315 }
16316 break;
16317 }
16318 }
16319
16320 return SDValue();
16321}
16322
16323SDValue DAGCombiner::visitIS_FPCLASS(SDNode *N) {
16324 SDValue Src = N->getOperand(0);
16325 FPClassTest Mask = static_cast<FPClassTest>(N->getConstantOperandVal(1));
16326 EVT VT = N->getValueType(0);
16327 SDLoc DL(N);
16328
16329 // is.fpclass(poison, mask) -> poison
16330 if (Src.getOpcode() == ISD::POISON)
16331 return DAG.getPOISON(VT);
16332
16333 KnownFPClass Known = DAG.computeKnownFPClass(Src, Mask);
16334
16335 // All possible classes are within the mask: result is always true.
16336 if ((~Mask & Known.KnownFPClasses) == fcNone)
16337 return DAG.getBoolConstant(true, DL, VT, Src.getValueType());
16338
16339 // Clear test bits we know must be false from the source value.
16340 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
16341 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
16342 if ((Mask & Known.KnownFPClasses) != Mask) {
16343 return DAG.getNode(
16344 ISD::IS_FPCLASS, DL, VT, Src,
16345 DAG.getTargetConstant(Mask & Known.KnownFPClasses, DL, MVT::i32),
16346 N->getFlags());
16347 }
16348
16349 return SDValue();
16350}
16351
16352/// If the result of a load is shifted/masked/truncated to an effectively
16353/// narrower type, try to transform the load to a narrower type and/or
16354/// use an extending load.
16355SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
16356 unsigned Opc = N->getOpcode();
16357
16359 SDValue N0 = N->getOperand(0);
16360 EVT VT = N->getValueType(0);
16361 EVT ExtVT = VT;
16362
16363 // This transformation isn't valid for vector loads.
16364 if (VT.isVector())
16365 return SDValue();
16366
16367 // The ShAmt variable is used to indicate that we've consumed a right
16368 // shift. I.e. we want to narrow the width of the load by skipping to load the
16369 // ShAmt least significant bits.
16370 unsigned ShAmt = 0;
16371 // A special case is when the least significant bits from the load are masked
16372 // away, but using an AND rather than a right shift. HasShiftedOffset is used
16373 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
16374 // the result.
16375 unsigned ShiftedOffset = 0;
16376 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
16377 // extended to VT.
16378 if (Opc == ISD::SIGN_EXTEND_INREG) {
16379 ExtType = ISD::SEXTLOAD;
16380 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16381 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
16382 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
16383 // value, or it may be shifting a higher subword, half or byte into the
16384 // lowest bits.
16385
16386 // Only handle shift with constant shift amount, and the shiftee must be a
16387 // load.
16388 auto *LN = dyn_cast<LoadSDNode>(N0);
16389 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
16390 if (!N1C || !LN)
16391 return SDValue();
16392 // If the shift amount is larger than the memory type then we're not
16393 // accessing any of the loaded bytes.
16394 ShAmt = N1C->getZExtValue();
16395 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
16396 if (MemoryWidth <= ShAmt)
16397 return SDValue();
16398 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
16399 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
16400 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
16401 // If original load is a SEXTLOAD then we can't simply replace it by a
16402 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
16403 // followed by a ZEXT, but that is not handled at the moment). Similarly if
16404 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
16405 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
16406 LN->getExtensionType() == ISD::ZEXTLOAD) &&
16407 LN->getExtensionType() != ExtType)
16408 return SDValue();
16409 } else if (Opc == ISD::AND) {
16410 // An AND with a constant mask is the same as a truncate + zero-extend.
16411 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
16412 if (!AndC)
16413 return SDValue();
16414
16415 const APInt &Mask = AndC->getAPIntValue();
16416 unsigned ActiveBits = 0;
16417 if (Mask.isMask()) {
16418 ActiveBits = Mask.countr_one();
16419 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
16420 ShiftedOffset = ShAmt;
16421 } else {
16422 return SDValue();
16423 }
16424
16425 ExtType = ISD::ZEXTLOAD;
16426 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
16427 }
16428
16429 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
16430 // a right shift. Here we redo some of those checks, to possibly adjust the
16431 // ExtVT even further based on "a masking AND". We could also end up here for
16432 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
16433 // need to be done here as well.
16434 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
16435 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
16436 // Bail out when the SRL has more than one use. This is done for historical
16437 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
16438 // check below? And maybe it could be non-profitable to do the transform in
16439 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
16440 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
16441 if (!SRL.hasOneUse())
16442 return SDValue();
16443
16444 // Only handle shift with constant shift amount, and the shiftee must be a
16445 // load.
16446 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
16447 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
16448 if (!SRL1C || !LN)
16449 return SDValue();
16450
16451 // If the shift amount is larger than the input type then we're not
16452 // accessing any of the loaded bytes. If the load was a zextload/extload
16453 // then the result of the shift+trunc is zero/undef (handled elsewhere).
16454 ShAmt = SRL1C->getZExtValue();
16455 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
16456 if (ShAmt >= MemoryWidth)
16457 return SDValue();
16458
16459 // Because a SRL must be assumed to *need* to zero-extend the high bits
16460 // (as opposed to anyext the high bits), we can't combine the zextload
16461 // lowering of SRL and an sextload.
16462 if (LN->getExtensionType() == ISD::SEXTLOAD)
16463 return SDValue();
16464
16465 // Avoid reading outside the memory accessed by the original load (could
16466 // happened if we only adjust the load base pointer by ShAmt). Instead we
16467 // try to narrow the load even further. The typical scenario here is:
16468 // (i64 (truncate (i96 (srl (load x), 64)))) ->
16469 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
16470 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
16471 // Don't replace sextload by zextload.
16472 if (ExtType == ISD::SEXTLOAD)
16473 return SDValue();
16474 // Narrow the load.
16475 ExtType = ISD::ZEXTLOAD;
16476 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
16477 }
16478
16479 // If the SRL is only used by a masking AND, we may be able to adjust
16480 // the ExtVT to make the AND redundant.
16481 SDNode *Mask = *(SRL->user_begin());
16482 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
16483 isa<ConstantSDNode>(Mask->getOperand(1))) {
16484 unsigned Offset, ActiveBits;
16485 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
16486 if (ShiftMask.isMask()) {
16487 EVT MaskedVT =
16488 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
16489 // If the mask is smaller, recompute the type.
16490 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
16491 TLI.isLoadLegal(SRL.getValueType(), MaskedVT, LN->getAlign(),
16492 LN->getAddressSpace(), ExtType, false))
16493 ExtVT = MaskedVT;
16494 } else if (ExtType == ISD::ZEXTLOAD &&
16495 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
16496 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
16497 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
16498 // If the mask is shifted we can use a narrower load and a shl to insert
16499 // the trailing zeros.
16500 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
16501 TLI.isLoadLegal(SRL.getValueType(), MaskedVT, LN->getAlign(),
16502 LN->getAddressSpace(), ExtType, false)) {
16503 ExtVT = MaskedVT;
16504 ShAmt = Offset + ShAmt;
16505 ShiftedOffset = Offset;
16506 }
16507 }
16508 }
16509
16510 N0 = SRL.getOperand(0);
16511 }
16512
16513 // If the load is shifted left (and the result isn't shifted back right), we
16514 // can fold a truncate through the shift. The typical scenario is that N
16515 // points at a TRUNCATE here so the attempted fold is:
16516 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
16517 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
16518 unsigned ShLeftAmt = 0;
16519 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16520 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
16521 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
16522 ShLeftAmt = N01->getZExtValue();
16523 N0 = N0.getOperand(0);
16524 }
16525 }
16526
16527 // Look through a freeze if present between the operation and the load.
16528 // The freeze will be preserved on the narrowed result.
16529 SDValue FreezeNode;
16530 if (N0.getOpcode() == ISD::FREEZE) {
16531 FreezeNode = N0;
16532 N0 = N0.getOperand(0);
16533 }
16534
16535 // If we haven't found a load, we can't narrow it.
16536 if (!isa<LoadSDNode>(N0))
16537 return SDValue();
16538
16539 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
16540 // Reducing the width of a volatile load is illegal. For atomics, we may be
16541 // able to reduce the width provided we never widen again. (see D66309)
16542 if (!LN0->isSimple() ||
16543 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
16544 return SDValue();
16545
16546 // Bail early when looking through a multi-use freeze, since other users of
16547 // the freeze can depend on the full load value. But its still safe to change
16548 // the extension type from anyext to zext.
16549 if (FreezeNode && !FreezeNode.hasOneUse() &&
16550 (LN0->getMemoryVT().bitsGT(ExtVT) || ExtType != ISD::ZEXTLOAD ||
16551 (LN0->getExtensionType() != ISD::EXTLOAD &&
16552 LN0->getExtensionType() != ISD::ZEXTLOAD)))
16553 return SDValue();
16554
16555 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
16556 unsigned LVTStoreBits =
16558 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
16559 return LVTStoreBits - EVTStoreBits - ShAmt;
16560 };
16561
16562 // We need to adjust the pointer to the load by ShAmt bits in order to load
16563 // the correct bytes.
16564 unsigned PtrAdjustmentInBits =
16565 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
16566
16567 uint64_t PtrOff = PtrAdjustmentInBits / 8;
16568 SDLoc DL(LN0);
16569 // The original load itself didn't wrap, so an offset within it doesn't.
16570 SDValue NewPtr =
16573 AddToWorklist(NewPtr.getNode());
16574
16575 SDValue Load;
16576 if (ExtType == ISD::NON_EXTLOAD) {
16577 const MDNode *OldRanges = LN0->getRanges();
16578 const MDNode *NewRanges = nullptr;
16579 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
16580 // metadata is not the full-set for the new width then create a NewRanges
16581 // metadata for the truncated load
16582 if (ShAmt == 0 && OldRanges) {
16583 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
16584 unsigned BitSize = VT.getScalarSizeInBits();
16585
16586 // It is possible for an 8-bit extending load with 8-bit range
16587 // metadata to be narrowed to an 8-bit load. This guard is necessary to
16588 // ensure that truncation is strictly smaller.
16589 if (CR.getBitWidth() > BitSize) {
16590 ConstantRange TruncatedCR = CR.truncate(BitSize);
16591 if (!TruncatedCR.isFullSet()) {
16592 Metadata *Bounds[2] = {
16594 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
16596 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
16597 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
16598 }
16599 } else if (CR.getBitWidth() == BitSize)
16600 NewRanges = OldRanges;
16601 }
16602 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
16603 LN0->getPointerInfo().getWithOffset(PtrOff),
16604 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
16605 LN0->getAAInfo(), NewRanges);
16606 } else
16607 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
16608 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
16609 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
16610 LN0->getAAInfo());
16611
16612 // Replace the old load's chain with the new load's chain.
16613 WorklistRemover DeadNodes(*this);
16614 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16615
16616 // Replace old load value for multi-use freeze so all users benefit.
16617 if (FreezeNode && !FreezeNode.hasOneUse())
16618 DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), Load.getValue(0));
16619
16620 // If we looked through a freeze, rewrap the narrowed result and add an
16621 // Assert node so downstream analyses can see the range.
16623 if (FreezeNode) {
16624 Result = DAG.getNode(ISD::FREEZE, DL, VT, Result);
16625 if (ExtType == ISD::ZEXTLOAD)
16626 Result =
16627 DAG.getNode(ISD::AssertZext, DL, VT, Result, DAG.getValueType(ExtVT));
16628 else if (ExtType == ISD::SEXTLOAD)
16629 Result =
16630 DAG.getNode(ISD::AssertSext, DL, VT, Result, DAG.getValueType(ExtVT));
16631 }
16632
16633 // Shift the result left, if we've swallowed a left shift.
16634 if (ShLeftAmt != 0) {
16635 // If the shift amount is as large as the result size (but, presumably,
16636 // no larger than the source) then the useful bits of the result are
16637 // zero; we can't simply return the shortened shift, because the result
16638 // of that operation is undefined.
16639 if (ShLeftAmt >= VT.getScalarSizeInBits())
16640 Result = DAG.getConstant(0, DL, VT);
16641 else
16642 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
16643 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
16644 }
16645
16646 if (ShiftedOffset != 0) {
16647 // We're using a shifted mask, so the load now has an offset. This means
16648 // that data has been loaded into the lower bytes than it would have been
16649 // before, so we need to shl the loaded data into the correct position in the
16650 // register.
16651 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
16652 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
16653 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
16654 }
16655
16656 // Return the new loaded value.
16657 return Result;
16658}
16659
16660SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
16661 SDValue N0 = N->getOperand(0);
16662 SDValue N1 = N->getOperand(1);
16663 EVT VT = N->getValueType(0);
16664 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
16665 unsigned VTBits = VT.getScalarSizeInBits();
16666 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
16667 SDLoc DL(N);
16668
16669 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
16670 if (N0.isUndef())
16671 return DAG.getConstant(0, DL, VT);
16672
16673 // fold (sext_in_reg c1) -> c1
16674 if (SDValue C =
16676 return C;
16677
16678 // If the input is already sign extended, just drop the extension.
16679 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
16680 return N0;
16681
16682 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
16683 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16684 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
16685 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
16686
16687 // fold (sext_in_reg (sext x)) -> (sext x)
16688 // fold (sext_in_reg (aext x)) -> (sext x)
16689 // if x is small enough or if we know that x has more than 1 sign bit and the
16690 // sign_extend_inreg is extending from one of them.
16691 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
16692 SDValue N00 = N0.getOperand(0);
16693 unsigned N00Bits = N00.getScalarValueSizeInBits();
16694 if ((N00Bits <= ExtVTBits ||
16695 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
16696 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
16697 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
16698 }
16699
16700 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
16701 // if x is small enough or if we know that x has more than 1 sign bit and the
16702 // sign_extend_inreg is extending from one of them.
16704 SDValue N00 = N0.getOperand(0);
16705 unsigned N00Bits = N00.getScalarValueSizeInBits();
16706 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
16707 if ((N00Bits == ExtVTBits ||
16708 (!IsZext && (N00Bits < ExtVTBits ||
16709 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
16710 (!LegalOperations ||
16712 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
16713 }
16714
16715 // fold (sext_in_reg (zext x)) -> (sext x)
16716 // iff we are extending the source sign bit.
16717 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
16718 SDValue N00 = N0.getOperand(0);
16719 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
16720 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
16721 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
16722 }
16723
16724 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
16725 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
16726 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
16727
16728 // fold operands of sext_in_reg based on knowledge that the top bits are not
16729 // demanded.
16731 return SDValue(N, 0);
16732
16733 // fold (sext_in_reg (load x)) -> (smaller sextload x)
16734 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
16735 if (SDValue NarrowLoad = reduceLoadWidth(N))
16736 return NarrowLoad;
16737
16738 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
16739 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
16740 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
16741 if (N0.getOpcode() == ISD::SRL) {
16742 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
16743 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
16744 // We can turn this into an SRA iff the input to the SRL is already sign
16745 // extended enough.
16746 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
16747 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
16748 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
16749 N0.getOperand(1));
16750 }
16751 }
16752
16753 // fold (sext_inreg (extload x)) -> (sextload x)
16754 // If sextload is not supported by target, we can only do the combine when
16755 // load has one use. Doing otherwise can block folding the extload with other
16756 // extends that the target does support.
16758 auto *LN0 = cast<LoadSDNode>(N0);
16759 if (ExtVT == LN0->getMemoryVT() &&
16760 ((!LegalOperations && LN0->isSimple() && N0.hasOneUse()) ||
16761 TLI.isLoadLegal(VT, ExtVT, LN0->getAlign(), LN0->getAddressSpace(),
16762 ISD::SEXTLOAD, false))) {
16763 SDValue ExtLoad =
16764 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
16765 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
16766 CombineTo(N, ExtLoad);
16767 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16768 AddToWorklist(ExtLoad.getNode());
16769 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16770 }
16771 }
16772
16773 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
16775 auto *LN0 = cast<LoadSDNode>(N0);
16776
16777 if (N0.hasOneUse() && ExtVT == LN0->getMemoryVT() &&
16778 ((!LegalOperations && LN0->isSimple()) &&
16779 TLI.isLoadLegal(VT, ExtVT, LN0->getAlign(), LN0->getAddressSpace(),
16780 ISD::SEXTLOAD, false))) {
16781 SDValue ExtLoad =
16782 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
16783 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
16784 CombineTo(N, ExtLoad);
16785 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16786 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16787 }
16788 }
16789
16790 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
16791 // ignore it if the masked load is already sign extended
16792 bool Frozen = N0.getOpcode() == ISD::FREEZE && N0.hasOneUse();
16793 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(Frozen ? N0.getOperand(0) : N0)) {
16794 if (ExtVT == Ld->getMemoryVT() && Ld->hasNUsesOfValue(1, 0) &&
16796 TLI.isLoadLegal(VT, ExtVT, Ld->getAlign(), Ld->getAddressSpace(),
16797 ISD::SEXTLOAD, false)) {
16798 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
16799 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
16800 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
16801 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
16802 CombineTo(N, Frozen ? N0 : ExtMaskedLoad);
16803 CombineTo(Ld, ExtMaskedLoad, ExtMaskedLoad.getValue(1));
16804 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16805 }
16806 }
16807
16808 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
16809 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
16810 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
16812 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
16813 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
16814
16815 SDValue ExtLoad = DAG.getMaskedGather(
16816 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
16817 GN0->getIndexType(), ISD::SEXTLOAD);
16818
16819 CombineTo(N, ExtLoad);
16820 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16821 AddToWorklist(ExtLoad.getNode());
16822 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16823 }
16824 }
16825
16826 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
16827 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
16828 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
16829 N0.getOperand(1), false))
16830 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
16831 }
16832
16833 // Fold (iM_signext_inreg
16834 // (extract_subvector (zext|anyext|sext iN_v to _) _)
16835 // from iN)
16836 // -> (extract_subvector (signext iN_v to iM))
16837 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
16839 SDValue InnerExt = N0.getOperand(0);
16840 EVT InnerExtVT = InnerExt->getValueType(0);
16841 SDValue Extendee = InnerExt->getOperand(0);
16842
16843 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
16844 (!LegalOperations ||
16845 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
16846 SDValue SignExtExtendee =
16847 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
16848 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
16849 N0.getOperand(1));
16850 }
16851 }
16852
16853 return SDValue();
16854}
16855
16857 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
16858 bool LegalOperations) {
16859 unsigned InregOpcode = N->getOpcode();
16860 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
16861
16862 SDValue Src = N->getOperand(0);
16863 EVT VT = N->getValueType(0);
16864 EVT SrcVT = VT.changeVectorElementType(
16865 *DAG.getContext(), Src.getValueType().getVectorElementType());
16866
16867 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
16868 "Expected EXTEND_VECTOR_INREG dag node in input!");
16869
16870 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
16871 // FIXME: one-use check may be overly restrictive
16872 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
16873 return SDValue();
16874
16875 // Profitability check: we must be extending exactly one of it's operands.
16876 // FIXME: this is probably overly restrictive.
16877 Src = Src.getOperand(0);
16878 if (Src.getValueType() != SrcVT)
16879 return SDValue();
16880
16881 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
16882 return SDValue();
16883
16884 return DAG.getNode(Opcode, DL, VT, Src);
16885}
16886
16887SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
16888 SDValue N0 = N->getOperand(0);
16889 EVT VT = N->getValueType(0);
16890 SDLoc DL(N);
16891
16892 if (N0.isUndef()) {
16893 // aext_vector_inreg(undef) = undef because the top bits are undefined.
16894 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
16895 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
16896 ? DAG.getUNDEF(VT)
16897 : DAG.getConstant(0, DL, VT);
16898 }
16899
16900 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
16901 return Res;
16902
16904 return SDValue(N, 0);
16905
16907 LegalOperations))
16908 return R;
16909
16910 return SDValue();
16911}
16912
16913SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
16914 EVT VT = N->getValueType(0);
16915 SDValue N0 = N->getOperand(0);
16916
16917 SDValue FPVal;
16918 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
16920 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
16921 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
16922 DAG.getValueType(VT.getScalarType()));
16923
16924 return SDValue();
16925}
16926
16927/// Detect patterns of truncation with unsigned saturation:
16928///
16929/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
16930/// Return the source value x to be truncated or SDValue() if the pattern was
16931/// not matched.
16932///
16934 unsigned NumDstBits = VT.getScalarSizeInBits();
16935 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16936 // Saturation with truncation. We truncate from InVT to VT.
16937 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16938
16939 SDValue Min;
16940 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16941 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
16942 return Min;
16943
16944 return SDValue();
16945}
16946
16947/// Detect patterns of truncation with signed saturation:
16948/// (truncate (smin (smax (x, signed_min_of_dest_type),
16949/// signed_max_of_dest_type)) to dest_type)
16950/// or:
16951/// (truncate (smax (smin (x, signed_max_of_dest_type),
16952/// signed_min_of_dest_type)) to dest_type).
16953///
16954/// Return the source value to be truncated or SDValue() if the pattern was not
16955/// matched.
16957 unsigned NumDstBits = VT.getScalarSizeInBits();
16958 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16959 // Saturation with truncation. We truncate from InVT to VT.
16960 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16961
16962 SDValue Val;
16963 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16964 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16965
16966 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
16967 m_SpecificInt(SignedMax))))
16968 return Val;
16969
16970 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
16971 m_SpecificInt(SignedMin))))
16972 return Val;
16973
16974 return SDValue();
16975}
16976
16977/// Detect patterns of truncation with unsigned saturation:
16979 const SDLoc &DL) {
16980 unsigned NumDstBits = VT.getScalarSizeInBits();
16981 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16982 // Saturation with truncation. We truncate from InVT to VT.
16983 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16984
16985 SDValue Val;
16986 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16987 // Min == 0, Max is unsigned max of destination type.
16988 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16989 m_Zero())))
16990 return Val;
16991
16992 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16993 m_SpecificInt(UnsignedMax))))
16994 return Val;
16995
16996 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16997 m_SpecificInt(UnsignedMax))))
16998 return Val;
16999
17000 return SDValue();
17001}
17002
17003static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
17004 SDLoc &DL, const TargetLowering &TLI,
17005 SelectionDAG &DAG) {
17006 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
17007 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
17008 TLI.isTypeDesirableForOp(Opc, VT));
17009 };
17010
17011 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
17012 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
17013 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
17014 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
17015 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
17016 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
17017 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
17018 } else if (Src.getOpcode() == ISD::UMIN) {
17019 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
17020 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
17021 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
17022 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
17023 if (SDValue USatVal = detectUSatUPattern(Src, VT))
17024 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
17025 }
17026
17027 return SDValue();
17028}
17029
17030SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
17031 SDValue N0 = N->getOperand(0);
17032 EVT VT = N->getValueType(0);
17033 EVT SrcVT = N0.getValueType();
17034 bool isLE = DAG.getDataLayout().isLittleEndian();
17035 SDLoc DL(N);
17036
17037 // trunc(undef) = undef
17038 if (N0.isUndef())
17039 return DAG.getUNDEF(VT);
17040
17041 // fold (truncate (truncate x)) -> (truncate x)
17042 if (N0.getOpcode() == ISD::TRUNCATE)
17043 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
17044
17045 // fold saturated truncate
17046 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
17047 return SaturatedTR;
17048
17049 // fold (truncate c1) -> c1
17050 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
17051 return C;
17052
17053 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
17054 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
17055 N0.getOpcode() == ISD::SIGN_EXTEND ||
17056 N0.getOpcode() == ISD::ANY_EXTEND) {
17057 // if the source is smaller than the dest, we still need an extend.
17058 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
17059 SDNodeFlags Flags;
17060 if (N0.getOpcode() == ISD::ZERO_EXTEND)
17061 Flags.setNonNeg(N0->getFlags().hasNonNeg());
17062 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
17063 }
17064 // if the source is larger than the dest, than we just need the truncate.
17065 if (N0.getOperand(0).getValueType().bitsGT(VT))
17066 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
17067 // if the source and dest are the same type, we can drop both the extend
17068 // and the truncate.
17069 return N0.getOperand(0);
17070 }
17071
17072 // Try to narrow a truncate-of-sext_in_reg to the destination type:
17073 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
17074 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
17075 N0.hasOneUse()) {
17076 SDValue X = N0.getOperand(0);
17077 SDValue ExtVal = N0.getOperand(1);
17078 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
17079 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
17080 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
17081 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
17082 }
17083 }
17084
17085 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
17086 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
17087 return SDValue();
17088
17089 // Fold extract-and-trunc into a narrow extract. For example:
17090 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
17091 // i32 y = TRUNCATE(i64 x)
17092 // -- becomes --
17093 // v16i8 b = BITCAST (v2i64 val)
17094 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
17095 //
17096 // Note: We only run this optimization after type legalization (which often
17097 // creates this pattern) and before operation legalization after which
17098 // we need to be more careful about the vector instructions that we generate.
17099 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
17100 N0->hasOneUse()) {
17101 EVT TrTy = N->getValueType(0);
17102 SDValue Src = N0;
17103
17104 // Check for cases where we shift down an upper element before truncation.
17105 int EltOffset = 0;
17106 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
17107 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
17108 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
17109 Src = Src.getOperand(0);
17110 EltOffset = *ShAmt / TrTy.getSizeInBits();
17111 }
17112 }
17113 }
17114
17115 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17116 EVT VecTy = Src.getOperand(0).getValueType();
17117 EVT ExTy = Src.getValueType();
17118
17119 auto EltCnt = VecTy.getVectorElementCount();
17120 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
17121 auto NewEltCnt = EltCnt * SizeRatio;
17122
17123 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
17124 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
17125
17126 SDValue EltNo = Src->getOperand(1);
17127 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
17128 int Elt = EltNo->getAsZExtVal();
17129 int Index = isLE ? (Elt * SizeRatio + EltOffset)
17130 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
17131 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
17132 DAG.getBitcast(NVT, Src.getOperand(0)),
17133 DAG.getVectorIdxConstant(Index, DL));
17134 }
17135 }
17136 }
17137
17138 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
17139 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
17140 TLI.isTruncateFree(SrcVT, VT)) {
17141 if (!LegalOperations ||
17142 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
17143 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
17144 SDLoc SL(N0);
17145 SDValue Cond = N0.getOperand(0);
17146 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
17147 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
17148 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
17149 }
17150 }
17151
17152 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
17153 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
17154 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
17155 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
17156 SDValue Amt = N0.getOperand(1);
17157 KnownBits Known = DAG.computeKnownBits(Amt);
17158 unsigned Size = VT.getScalarSizeInBits();
17159 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
17160 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
17161 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
17162 if (AmtVT != Amt.getValueType()) {
17163 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
17164 AddToWorklist(Amt.getNode());
17165 }
17166 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
17167 }
17168 }
17169
17170 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
17171 return V;
17172
17173 if (SDValue ABD = foldABSToABD(N, DL))
17174 return ABD;
17175
17176 // Attempt to pre-truncate BUILD_VECTOR sources.
17177 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
17178 N0.hasOneUse() &&
17179 // Avoid creating illegal types if running after type legalizer.
17180 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
17181 if (TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()))
17182 return DAG.UnrollVectorOp(N);
17183
17184 // trunc(build_vector(ext(x), ext(x)) -> build_vector(x,x)
17185 if (SDValue SplatVal = DAG.getSplatValue(N0)) {
17186 if (ISD::isExtOpcode(SplatVal.getOpcode()) &&
17187 SrcVT.getScalarType() == SplatVal.getValueType())
17188 return DAG.UnrollVectorOp(N);
17189 }
17190 }
17191
17192 // trunc (splat_vector x) -> splat_vector (trunc x)
17193 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
17194 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
17195 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
17196 EVT SVT = VT.getScalarType();
17197 return DAG.getSplatVector(
17198 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
17199 }
17200
17201 // Fold a series of buildvector, bitcast, and truncate if possible.
17202 // For example fold
17203 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
17204 // (2xi32 (buildvector x, y)).
17205 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
17206 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17208 N0.getOperand(0).hasOneUse()) {
17209 SDValue BuildVect = N0.getOperand(0);
17210 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
17211 EVT TruncVecEltTy = VT.getVectorElementType();
17212
17213 // Check that the element types match.
17214 if (BuildVectEltTy == TruncVecEltTy) {
17215 // Now we only need to compute the offset of the truncated elements.
17216 unsigned BuildVecNumElts = BuildVect.getNumOperands();
17217 unsigned TruncVecNumElts = VT.getVectorNumElements();
17218 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
17219 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
17220
17221 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
17222 "Invalid number of elements");
17223
17225 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
17226 i += TruncEltOffset)
17227 Opnds.push_back(BuildVect.getOperand(i));
17228
17229 return DAG.getBuildVector(VT, DL, Opnds);
17230 }
17231 }
17232
17233 // fold (truncate (load x)) -> (smaller load x)
17234 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
17235 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
17236 if (SDValue Reduced = reduceLoadWidth(N))
17237 return Reduced;
17238
17239 // Handle the case where the truncated result is at least as wide as the
17240 // loaded type.
17241 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
17242 auto *LN0 = cast<LoadSDNode>(N0);
17243 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
17244 SDValue NewLoad = DAG.getExtLoad(
17245 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
17246 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
17247 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
17248 return NewLoad;
17249 }
17250 }
17251 }
17252
17253 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
17254 // where ... are all 'undef'.
17255 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
17257 SDValue V;
17258 unsigned Idx = 0;
17259 unsigned NumDefs = 0;
17260
17261 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
17262 SDValue X = N0.getOperand(i);
17263 if (!X.isUndef()) {
17264 V = X;
17265 Idx = i;
17266 NumDefs++;
17267 }
17268 // Stop if more than one members are non-undef.
17269 if (NumDefs > 1)
17270 break;
17271
17274 X.getValueType().getVectorElementCount()));
17275 }
17276
17277 if (NumDefs == 0)
17278 return DAG.getUNDEF(VT);
17279
17280 if (NumDefs == 1) {
17281 assert(V.getNode() && "The single defined operand is empty!");
17283 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
17284 if (i != Idx) {
17285 Opnds.push_back(DAG.getUNDEF(VTs[i]));
17286 continue;
17287 }
17288 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
17289 AddToWorklist(NV.getNode());
17290 Opnds.push_back(NV);
17291 }
17292 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
17293 }
17294 }
17295
17296 // Fold truncate of a bitcast of a vector to an extract of the low vector
17297 // element.
17298 //
17299 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
17300 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
17301 SDValue VecSrc = N0.getOperand(0);
17302 EVT VecSrcVT = VecSrc.getValueType();
17303 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
17304 (!LegalOperations ||
17305 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
17306 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
17307 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
17308 DAG.getVectorIdxConstant(Idx, DL));
17309 }
17310 }
17311
17312 // Simplify the operands using demanded-bits information.
17314 return SDValue(N, 0);
17315
17316 // fold (truncate (extract_subvector(ext x))) ->
17317 // (extract_subvector x)
17318 // TODO: This can be generalized to cover cases where the truncate and extract
17319 // do not fully cancel each other out.
17320 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
17321 SDValue N00 = N0.getOperand(0);
17322 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
17323 N00.getOpcode() == ISD::ZERO_EXTEND ||
17324 N00.getOpcode() == ISD::ANY_EXTEND) {
17325 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
17327 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
17328 N00.getOperand(0), N0.getOperand(1));
17329 }
17330 }
17331
17332 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17333 return NewVSel;
17334
17335 // Narrow a suitable binary operation with a non-opaque constant operand by
17336 // moving it ahead of the truncate. This is limited to pre-legalization
17337 // because targets may prefer a wider type during later combines and invert
17338 // this transform.
17339 switch (N0.getOpcode()) {
17340 case ISD::ADD:
17341 case ISD::SUB:
17342 case ISD::MUL:
17343 case ISD::AND:
17344 case ISD::OR:
17345 case ISD::XOR:
17346 if (!LegalOperations && N0.hasOneUse() &&
17347 (N0.getOperand(0) == N0.getOperand(1) ||
17349 isConstantOrConstantVector(N0.getOperand(1), true))) {
17350 // TODO: We already restricted this to pre-legalization, but for vectors
17351 // we are extra cautious to not create an unsupported operation.
17352 // Target-specific changes are likely needed to avoid regressions here.
17353 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
17354 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
17355 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
17356 SDNodeFlags Flags;
17357 // Propagate nuw for sub.
17358 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
17360 N0->getOperand(0),
17362 VT.getScalarSizeInBits())))
17363 Flags.setNoUnsignedWrap(true);
17364 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
17365 }
17366 }
17367 break;
17368 case ISD::ADDE:
17369 case ISD::UADDO_CARRY:
17370 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
17371 // (trunc uaddo_carry(X, Y, Carry)) ->
17372 // (uaddo_carry trunc(X), trunc(Y), Carry)
17373 // When the adde's carry is not used.
17374 // We only do for uaddo_carry before legalize operation
17375 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
17376 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
17377 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
17378 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
17379 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
17380 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
17381 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
17382 }
17383 break;
17384 case ISD::USUBSAT:
17385 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
17386 // enough to know that the upper bits are zero we must ensure that we don't
17387 // introduce an extra truncate.
17388 if (!LegalOperations && N0.hasOneUse() &&
17391 VT.getScalarSizeInBits() &&
17392 hasOperation(N0.getOpcode(), VT)) {
17393 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
17394 DAG, DL);
17395 }
17396 break;
17397 case ISD::AVGCEILS:
17398 case ISD::AVGCEILU:
17399 // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
17400 // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
17401 if (N0.hasOneUse()) {
17402 SDValue Op0 = N0.getOperand(0);
17403 SDValue Op1 = N0.getOperand(1);
17404 if (N0.getOpcode() == ISD::AVGCEILU) {
17406 Op0.getOpcode() == ISD::SIGN_EXTEND &&
17407 Op1.getOpcode() == ISD::SIGN_EXTEND &&
17408 Op0.getOperand(0).getValueType() == VT &&
17409 Op1.getOperand(0).getValueType() == VT)
17410 return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0),
17411 Op1.getOperand(0));
17412 } else {
17414 Op0.getOpcode() == ISD::ZERO_EXTEND &&
17415 Op1.getOpcode() == ISD::ZERO_EXTEND &&
17416 Op0.getOperand(0).getValueType() == VT &&
17417 Op1.getOperand(0).getValueType() == VT)
17418 return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0),
17419 Op1.getOperand(0));
17420 }
17421 }
17422 [[fallthrough]];
17423 case ISD::AVGFLOORS:
17424 case ISD::AVGFLOORU:
17425 case ISD::ABDS:
17426 case ISD::ABDU:
17427 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
17428 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
17429 if (!LegalOperations && N0.hasOneUse() &&
17430 TLI.isOperationLegal(N0.getOpcode(), VT)) {
17431 EVT TruncVT = VT;
17432 unsigned SrcBits = SrcVT.getScalarSizeInBits();
17433 unsigned TruncBits = TruncVT.getScalarSizeInBits();
17434
17435 SDValue A = N0.getOperand(0);
17436 SDValue B = N0.getOperand(1);
17437 bool CanFold = false;
17438
17439 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
17440 N0.getOpcode() == ISD::ABDU) {
17441 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
17442 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
17443 DAG.MaskedValueIsZero(A, UpperBits);
17444 } else {
17445 unsigned NeededBits = SrcBits - TruncBits;
17446 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
17447 DAG.ComputeNumSignBits(A) > NeededBits;
17448 }
17449
17450 if (CanFold) {
17451 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
17452 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
17453 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
17454 }
17455 }
17456 break;
17457 }
17458
17459 return SDValue();
17460}
17461
17462static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
17463 SDValue Elt = N->getOperand(i);
17464 if (Elt.getOpcode() != ISD::MERGE_VALUES)
17465 return Elt.getNode();
17466 return Elt.getOperand(Elt.getResNo()).getNode();
17467}
17468
17469/// build_pair (load, load) -> load
17470/// if load locations are consecutive.
17471SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
17472 assert(N->getOpcode() == ISD::BUILD_PAIR);
17473
17474 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
17475 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
17476
17477 // A BUILD_PAIR is always having the least significant part in elt 0 and the
17478 // most significant part in elt 1. So when combining into one large load, we
17479 // need to consider the endianness.
17480 if (DAG.getDataLayout().isBigEndian())
17481 std::swap(LD1, LD2);
17482
17483 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
17484 !LD1->hasOneUse() || !LD2->hasOneUse() ||
17485 LD1->getAddressSpace() != LD2->getAddressSpace())
17486 return SDValue();
17487
17488 unsigned LD1Fast = 0;
17489 EVT LD1VT = LD1->getValueType(0);
17490 unsigned LD1Bytes = LD1VT.getStoreSize();
17491 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
17492 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
17493 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17494 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
17495 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
17496 LD1->getPointerInfo(), LD1->getAlign());
17497
17498 return SDValue();
17499}
17500
17501static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
17502 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
17503 // and Lo parts; on big-endian machines it doesn't.
17504 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
17505}
17506
17507SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
17508 const TargetLowering &TLI) {
17509 // If this is not a bitcast to an FP type or if the target doesn't have
17510 // IEEE754-compliant FP logic, we're done.
17511 EVT VT = N->getValueType(0);
17512 SDValue N0 = N->getOperand(0);
17513 EVT SourceVT = N0.getValueType();
17514
17515 if (!VT.isFloatingPoint())
17516 return SDValue();
17517
17518 // TODO: Handle cases where the integer constant is a different scalar
17519 // bitwidth to the FP.
17520 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
17521 return SDValue();
17522
17523 unsigned FPOpcode;
17524 APInt SignMask;
17525 switch (N0.getOpcode()) {
17526 case ISD::AND:
17527 FPOpcode = ISD::FABS;
17528 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
17529 break;
17530 case ISD::XOR:
17531 FPOpcode = ISD::FNEG;
17532 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
17533 break;
17534 case ISD::OR:
17535 FPOpcode = ISD::FABS;
17536 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
17537 break;
17538 default:
17539 return SDValue();
17540 }
17541
17542 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
17543 return SDValue();
17544
17545 // This needs to be the inverse of logic in foldSignChangeInBitcast.
17546 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
17547 // removing this would require more changes.
17548 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
17549 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
17550 return true;
17551
17552 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
17553 };
17554
17555 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
17556 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
17557 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
17558 // fneg (fabs X)
17559 SDValue LogicOp0 = N0.getOperand(0);
17560 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
17561 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
17562 IsBitCastOrFree(LogicOp0, VT)) {
17563 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
17564 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
17565 NumFPLogicOpsConv++;
17566 if (N0.getOpcode() == ISD::OR)
17567 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
17568 return FPOp;
17569 }
17570
17571 return SDValue();
17572}
17573
17574SDValue DAGCombiner::visitBITCAST(SDNode *N) {
17575 SDValue N0 = N->getOperand(0);
17576 EVT VT = N->getValueType(0);
17577
17578 if (N0.isUndef())
17579 return DAG.getUNDEF(VT);
17580
17581 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
17582 // Only do this before legalize types, unless both types are integer and the
17583 // scalar type is legal. Only do this before legalize ops, since the target
17584 // maybe depending on the bitcast.
17585 // First check to see if this is all constant.
17586 // TODO: Support FP bitcasts after legalize types.
17587 if (VT.isVector() &&
17588 (!LegalTypes ||
17589 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
17590 TLI.isTypeLegal(VT.getVectorElementType()))) &&
17591 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
17592 cast<BuildVectorSDNode>(N0)->isConstant())
17593 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
17595
17596 // If the input is a constant, let getNode fold it.
17597 if (isIntOrFPConstant(N0)) {
17598 // If we can't allow illegal operations, we need to check that this is just
17599 // a fp -> int or int -> conversion and that the resulting operation will
17600 // be legal.
17601 if (!LegalOperations ||
17602 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
17604 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
17605 TLI.isOperationLegal(ISD::Constant, VT))) {
17606 SDValue C = DAG.getBitcast(VT, N0);
17607 if (C.getNode() != N)
17608 return C;
17609 }
17610 }
17611
17612 // (conv (conv x, t1), t2) -> (conv x, t2)
17613 if (N0.getOpcode() == ISD::BITCAST)
17614 return DAG.getBitcast(VT, N0.getOperand(0));
17615
17616 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
17617 // iff the current bitwise logicop type isn't legal
17618 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
17619 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
17620 auto IsFreeBitcast = [VT](SDValue V) {
17621 return (V.getOpcode() == ISD::BITCAST &&
17622 V.getOperand(0).getValueType() == VT) ||
17624 V->hasOneUse());
17625 };
17626 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
17627 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
17628 DAG.getBitcast(VT, N0.getOperand(0)),
17629 DAG.getBitcast(VT, N0.getOperand(1)));
17630 }
17631
17632 // fold (conv (load x)) -> (load (conv*)x)
17633 // fold (conv (freeze (load x))) -> (freeze (load (conv*)x))
17634 // If the resultant load doesn't need a higher alignment than the original!
17635 auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) {
17636 if (N0.getOpcode() == ISD::AssertNoFPClass)
17637 N0 = N0.getOperand(0);
17638 if (!ISD::isNormalLoad(N0.getNode()) || !N0.hasOneUse())
17639 return SDValue();
17640
17641 // Do not remove the cast if the types differ in endian layout.
17644 return SDValue();
17645
17646 // If the load is volatile, we only want to change the load type if the
17647 // resulting load is legal. Otherwise we might increase the number of
17648 // memory accesses. We don't care if the original type was legal or not
17649 // as we assume software couldn't rely on the number of accesses of an
17650 // illegal type.
17651 auto *LN0 = cast<LoadSDNode>(N0);
17652 if ((LegalOperations || !LN0->isSimple()) &&
17653 !TLI.isOperationLegal(ISD::LOAD, VT))
17654 return SDValue();
17655
17656 if (!TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
17657 *LN0->getMemOperand()))
17658 return SDValue();
17659
17660 // If the range metadata type does not match the new memory
17661 // operation type, remove the range metadata.
17662 if (const MDNode *MD = LN0->getRanges()) {
17663 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
17664 if (Lower->getBitWidth() != VT.getScalarSizeInBits() || !VT.isInteger()) {
17665 LN0->getMemOperand()->clearRanges();
17666 }
17667 }
17668 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
17669 LN0->getMemOperand());
17670 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
17671 return Load;
17672 };
17673
17674 if (SDValue NewLd = CastLoad(N0, SDLoc(N)))
17675 return NewLd;
17676
17677 if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
17678 if (SDValue NewLd = CastLoad(N0.getOperand(0), SDLoc(N)))
17679 return DAG.getFreeze(NewLd);
17680
17681 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
17682 return V;
17683
17684 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17685 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17686 //
17687 // For ppc_fp128:
17688 // fold (bitcast (fneg x)) ->
17689 // flipbit = signbit
17690 // (xor (bitcast x) (build_pair flipbit, flipbit))
17691 //
17692 // fold (bitcast (fabs x)) ->
17693 // flipbit = (and (extract_element (bitcast x), 0), signbit)
17694 // (xor (bitcast x) (build_pair flipbit, flipbit))
17695 // This often reduces constant pool loads.
17696 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
17697 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
17698 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
17699 !N0.getValueType().isVector()) {
17700 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
17701 AddToWorklist(NewConv.getNode());
17702
17703 SDLoc DL(N);
17704 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
17705 assert(VT.getSizeInBits() == 128);
17706 SDValue SignBit = DAG.getConstant(
17707 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
17708 SDValue FlipBit;
17709 if (N0.getOpcode() == ISD::FNEG) {
17710 FlipBit = SignBit;
17711 AddToWorklist(FlipBit.getNode());
17712 } else {
17713 assert(N0.getOpcode() == ISD::FABS);
17714 SDValue Hi =
17715 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
17717 SDLoc(NewConv)));
17718 AddToWorklist(Hi.getNode());
17719 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
17720 AddToWorklist(FlipBit.getNode());
17721 }
17722 SDValue FlipBits =
17723 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
17724 AddToWorklist(FlipBits.getNode());
17725 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
17726 }
17727 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
17728 if (N0.getOpcode() == ISD::FNEG)
17729 return DAG.getNode(ISD::XOR, DL, VT,
17730 NewConv, DAG.getConstant(SignBit, DL, VT));
17731 assert(N0.getOpcode() == ISD::FABS);
17732 return DAG.getNode(ISD::AND, DL, VT,
17733 NewConv, DAG.getConstant(~SignBit, DL, VT));
17734 }
17735
17736 // fold (bitconvert (fcopysign cst, x)) ->
17737 // (or (and (bitconvert x), sign), (and cst, (not sign)))
17738 // Note that we don't handle (copysign x, cst) because this can always be
17739 // folded to an fneg or fabs.
17740 //
17741 // For ppc_fp128:
17742 // fold (bitcast (fcopysign cst, x)) ->
17743 // flipbit = (and (extract_element
17744 // (xor (bitcast cst), (bitcast x)), 0),
17745 // signbit)
17746 // (xor (bitcast cst) (build_pair flipbit, flipbit))
17747 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17749 !VT.isVector()) {
17750 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
17751 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
17752 if (isTypeLegal(IntXVT)) {
17753 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
17754 AddToWorklist(X.getNode());
17755
17756 // If X has a different width than the result/lhs, sext it or truncate it.
17757 unsigned VTWidth = VT.getSizeInBits();
17758 if (OrigXWidth < VTWidth) {
17759 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
17760 AddToWorklist(X.getNode());
17761 } else if (OrigXWidth > VTWidth) {
17762 // To get the sign bit in the right place, we have to shift it right
17763 // before truncating.
17764 SDLoc DL(X);
17765 X = DAG.getNode(ISD::SRL, DL,
17766 X.getValueType(), X,
17767 DAG.getConstant(OrigXWidth-VTWidth, DL,
17768 X.getValueType()));
17769 AddToWorklist(X.getNode());
17770 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
17771 AddToWorklist(X.getNode());
17772 }
17773
17774 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
17775 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
17776 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
17777 AddToWorklist(Cst.getNode());
17778 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
17779 AddToWorklist(X.getNode());
17780 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
17781 AddToWorklist(XorResult.getNode());
17782 SDValue XorResult64 = DAG.getNode(
17783 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
17785 SDLoc(XorResult)));
17786 AddToWorklist(XorResult64.getNode());
17787 SDValue FlipBit =
17788 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
17789 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
17790 AddToWorklist(FlipBit.getNode());
17791 SDValue FlipBits =
17792 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
17793 AddToWorklist(FlipBits.getNode());
17794 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
17795 }
17796 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
17797 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
17798 X, DAG.getConstant(SignBit, SDLoc(X), VT));
17799 AddToWorklist(X.getNode());
17800
17801 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
17802 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
17803 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
17804 AddToWorklist(Cst.getNode());
17805
17806 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
17807 }
17808 }
17809
17810 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
17811 if (N0.getOpcode() == ISD::BUILD_PAIR)
17812 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
17813 return CombineLD;
17814
17815 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
17816 // => int_vt (any_extend elt_vt:x)
17817 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
17818 SDValue SrcScalar = N0.getOperand(0);
17819 if (SrcScalar.getValueType().isScalarInteger())
17820 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
17821 }
17822
17823 // Remove double bitcasts from shuffles - this is often a legacy of
17824 // XformToShuffleWithZero being used to combine bitmaskings (of
17825 // float vectors bitcast to integer vectors) into shuffles.
17826 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
17827 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
17828 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
17831 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
17832
17833 // If operands are a bitcast, peek through if it casts the original VT.
17834 // If operands are a constant, just bitcast back to original VT.
17835 auto PeekThroughBitcast = [&](SDValue Op) {
17836 if (Op.getOpcode() == ISD::BITCAST &&
17837 Op.getOperand(0).getValueType() == VT)
17838 return SDValue(Op.getOperand(0));
17839 if (Op.isUndef() || isAnyConstantBuildVector(Op))
17840 return DAG.getBitcast(VT, Op);
17841 return SDValue();
17842 };
17843
17844 // FIXME: If either input vector is bitcast, try to convert the shuffle to
17845 // the result type of this bitcast. This would eliminate at least one
17846 // bitcast. See the transform in InstCombine.
17847 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
17848 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
17849 if (!(SV0 && SV1))
17850 return SDValue();
17851
17852 int MaskScale =
17854 SmallVector<int, 8> NewMask;
17855 for (int M : SVN->getMask())
17856 for (int i = 0; i != MaskScale; ++i)
17857 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
17858
17859 SDValue LegalShuffle =
17860 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
17861 if (LegalShuffle)
17862 return LegalShuffle;
17863 }
17864
17865 return SDValue();
17866}
17867
17868SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
17869 EVT VT = N->getValueType(0);
17870 return CombineConsecutiveLoads(N, VT);
17871}
17872
17873SDValue DAGCombiner::visitFREEZE(SDNode *N) {
17874 SDValue N0 = N->getOperand(0);
17875
17877 return N0;
17878
17879 // If we have frozen and unfrozen users of N0, update so everything uses N.
17880 if (!N0.isUndef() && !N0.hasOneUse()) {
17881 SDValue FrozenN0(N, 0);
17882 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
17883 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
17884 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
17885 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
17886 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17887 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
17888 DAG.UpdateNodeOperands(N, N0);
17889 // Revisit the node.
17890 AddToWorklist(N);
17891 return FrozenN0;
17892 }
17893
17894 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
17895 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
17896 // example https://reviews.llvm.org/D136529#4120959.
17897 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
17898 return SDValue();
17899
17900 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
17901 // Try to push freeze through instructions that propagate but don't produce
17902 // poison as far as possible. If an operand of freeze follows three
17903 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
17904 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
17905 // the freeze through to the operands that are not guaranteed non-poison.
17906 // NOTE: we will strip poison-generating flags, so ignore them here.
17908 /*ConsiderFlags*/ false) ||
17909 N0->getNumValues() != 1 || !N0->hasOneUse())
17910 return SDValue();
17911
17912 // TOOD: we should always allow multiple operands, however this increases the
17913 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
17914 // below causing later nodes that share frozen operands to fold again and no
17915 // longer being able to confirm other operands are not poison due to recursion
17916 // depth limits on isGuaranteedNotToBeUndefOrPoison.
17917 bool AllowMultipleMaybePoisonOperands =
17918 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
17919 N0.getOpcode() == ISD::BUILD_VECTOR ||
17921 N0.getOpcode() == ISD::BUILD_PAIR ||
17924
17925 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
17926 // ones" or "constant" into something that depends on FrozenUndef. We can
17927 // instead pick undef values to keep those properties, while at the same time
17928 // folding away the freeze.
17929 // If we implement a more general solution for folding away freeze(undef) in
17930 // the future, then this special handling can be removed.
17931 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
17932 SDLoc DL(N0);
17933 EVT VT = N0.getValueType();
17935 return DAG.getAllOnesConstant(DL, VT);
17938 for (const SDValue &Op : N0->op_values())
17939 NewVecC.push_back(
17940 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
17941 return DAG.getBuildVector(VT, DL, NewVecC);
17942 }
17943 }
17944
17945 SmallSet<SDValue, 8> MaybePoisonOperands;
17946 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
17947 for (auto [OpNo, Op] : enumerate(N0->ops())) {
17950 continue;
17951 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
17952 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
17953 if (IsNewMaybePoisonOperand)
17954 MaybePoisonOperandNumbers.push_back(OpNo);
17955 if (!HadMaybePoisonOperands)
17956 continue;
17957 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
17958 // Multiple maybe-poison ops when not allowed - bail out.
17959 return SDValue();
17960 }
17961 }
17962 // NOTE: the whole op may be not guaranteed to not be undef or poison because
17963 // it could create undef or poison due to it's poison-generating flags.
17964 // So not finding any maybe-poison operands is fine.
17965
17966 for (unsigned OpNo : MaybePoisonOperandNumbers) {
17967 // N0 can mutate during iteration, so make sure to refetch the maybe poison
17968 // operands via the operand numbers. The typical scenario is that we have
17969 // something like this
17970 // t262: i32 = freeze t181
17971 // t150: i32 = ctlz_zero_poison t262
17972 // t184: i32 = ctlz_zero_poison t181
17973 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
17974 // When freezing the t181 operand we get t262 back, and then the
17975 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
17976 // also recursively replace t184 by t150.
17977 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
17978 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
17979 if (MaybePoisonOperand.isUndef())
17980 continue;
17981 // First, freeze each offending operand.
17982 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
17983 // Then, change all other uses of unfrozen operand to use frozen operand.
17984 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
17985 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
17986 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
17987 // But, that also updated the use in the freeze we just created, thus
17988 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17989 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
17990 MaybePoisonOperand);
17991 }
17992
17993 // This node has been merged with another.
17994 if (N->getOpcode() == ISD::DELETED_NODE)
17995 return SDValue(N, 0);
17996 }
17997
17998 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17999
18000 // The whole node may have been updated, so the value we were holding
18001 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
18002 N0 = N->getOperand(0);
18003
18004 // Finally, recreate the node, it's operands were updated to use
18005 // frozen operands, so we just need to use it's "original" operands.
18007 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
18008 // leave for a future patch.
18009 for (SDValue &Op : Ops) {
18010 if (Op.isUndef())
18011 Op = DAG.getFreeze(Op);
18012 }
18013
18014 SDLoc DL(N0);
18015
18016 // Special case handling for ShuffleVectorSDNode nodes.
18017 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
18018 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
18019 SVN->getMask());
18020
18021 // NOTE: this strips poison generating flags.
18022 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
18023 // ninf, nsz, or fast.
18024 // However, contract, reassoc, afn, and arcp should be preserved,
18025 // as these fast-math flags do not introduce poison values.
18026 SDNodeFlags SrcFlags = N0->getFlags();
18027 SDNodeFlags SafeFlags;
18028 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
18029 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
18030 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
18031 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
18032 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
18033}
18034
18035// Returns true if floating point contraction is allowed on the FMUL-SDValue
18036// `N`
18038 assert(N.getOpcode() == ISD::FMUL);
18039
18040 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
18041 N->getFlags().hasAllowContract();
18042}
18043
18044/// Try to perform FMA combining on a given FADD node.
18045template <class MatchContextClass>
18046SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
18047 SDValue N0 = N->getOperand(0);
18048 SDValue N1 = N->getOperand(1);
18049 EVT VT = N->getValueType(0);
18050 SDLoc SL(N);
18051 MatchContextClass matcher(DAG, TLI, N);
18052 const TargetOptions &Options = DAG.getTarget().Options;
18053
18054 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
18055
18056 // Floating-point multiply-add with intermediate rounding.
18057 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
18058 // FIXME: Add VP_FMAD opcode.
18059 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
18060
18061 // Floating-point multiply-add without intermediate rounding.
18062 bool HasFMA =
18063 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
18065
18066 // No valid opcode, do not combine.
18067 if (!HasFMAD && !HasFMA)
18068 return SDValue();
18069
18070 bool AllowFusionGlobally =
18071 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
18072 // If the addition is not contractable, do not combine.
18073 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
18074 return SDValue();
18075
18076 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
18077 // beneficial. It does not reduce latency. It increases register pressure. It
18078 // replaces an fadd with an fma which is a more complex instruction, so is
18079 // likely to have a larger encoding, use more functional units, etc.
18080 if (N0 == N1)
18081 return SDValue();
18082
18083 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
18084 return SDValue();
18085
18086 // Always prefer FMAD to FMA for precision.
18087 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
18089
18090 auto isFusedOp = [&](SDValue N) {
18091 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
18092 };
18093
18094 // Is the node an FMUL and contractable either due to global flags or
18095 // SDNodeFlags.
18096 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
18097 if (!matcher.match(N, ISD::FMUL))
18098 return false;
18099 return AllowFusionGlobally || N->getFlags().hasAllowContract();
18100 };
18101 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
18102 // prefer to fold the multiply with fewer uses.
18104 if (N0->use_size() > N1->use_size())
18105 std::swap(N0, N1);
18106 }
18107
18108 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
18109 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
18110 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
18111 N0.getOperand(1), N1);
18112 }
18113
18114 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
18115 // Note: Commutes FADD operands.
18116 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
18117 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
18118 N1.getOperand(1), N0);
18119 }
18120
18121 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
18122 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
18123 // This also works with nested fma instructions:
18124 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
18125 // fma A, B, (fma C, D, fma (E, F, G))
18126 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
18127 // fma A, B, (fma C, D, fma (E, F, G)).
18128 // This requires reassociation because it changes the order of operations.
18129 bool CanReassociate = N->getFlags().hasAllowReassociation();
18130 if (CanReassociate) {
18131 SDValue FMA, E;
18132 if (isFusedOp(N0) && N0.hasOneUse()) {
18133 FMA = N0;
18134 E = N1;
18135 } else if (isFusedOp(N1) && N1.hasOneUse()) {
18136 FMA = N1;
18137 E = N0;
18138 }
18139
18140 SDValue TmpFMA = FMA;
18141 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
18142 SDValue FMul = TmpFMA->getOperand(2);
18143 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
18144 SDValue C = FMul.getOperand(0);
18145 SDValue D = FMul.getOperand(1);
18146 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
18148 // Replacing the inner FMul could cause the outer FMA to be simplified
18149 // away.
18150 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
18151 }
18152
18153 TmpFMA = TmpFMA->getOperand(2);
18154 }
18155 }
18156
18157 // Look through FP_EXTEND nodes to do more combining.
18158
18159 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
18160 if (matcher.match(N0, ISD::FP_EXTEND)) {
18161 SDValue N00 = N0.getOperand(0);
18162 if (isContractableFMUL(N00) &&
18163 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18164 N00.getValueType())) {
18165 return matcher.getNode(
18166 PreferredFusedOpcode, SL, VT,
18167 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
18168 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
18169 }
18170 }
18171
18172 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
18173 // Note: Commutes FADD operands.
18174 if (matcher.match(N1, ISD::FP_EXTEND)) {
18175 SDValue N10 = N1.getOperand(0);
18176 if (isContractableFMUL(N10) &&
18177 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18178 N10.getValueType())) {
18179 return matcher.getNode(
18180 PreferredFusedOpcode, SL, VT,
18181 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
18182 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
18183 }
18184 }
18185
18186 // More folding opportunities when target permits.
18187 if (Aggressive) {
18188 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
18189 // -> (fma x, y, (fma (fpext u), (fpext v), z))
18190 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
18191 SDValue Z) {
18192 return matcher.getNode(
18193 PreferredFusedOpcode, SL, VT, X, Y,
18194 matcher.getNode(PreferredFusedOpcode, SL, VT,
18195 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
18196 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
18197 };
18198 if (isFusedOp(N0)) {
18199 SDValue N02 = N0.getOperand(2);
18200 if (matcher.match(N02, ISD::FP_EXTEND)) {
18201 SDValue N020 = N02.getOperand(0);
18202 if (isContractableFMUL(N020) &&
18203 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18204 N020.getValueType())) {
18205 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
18206 N020.getOperand(0), N020.getOperand(1),
18207 N1);
18208 }
18209 }
18210 }
18211
18212 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
18213 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
18214 // FIXME: This turns two single-precision and one double-precision
18215 // operation into two double-precision operations, which might not be
18216 // interesting for all targets, especially GPUs.
18217 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
18218 SDValue Z) {
18219 return matcher.getNode(
18220 PreferredFusedOpcode, SL, VT,
18221 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
18222 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
18223 matcher.getNode(PreferredFusedOpcode, SL, VT,
18224 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
18225 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
18226 };
18227 if (N0.getOpcode() == ISD::FP_EXTEND) {
18228 SDValue N00 = N0.getOperand(0);
18229 if (isFusedOp(N00)) {
18230 SDValue N002 = N00.getOperand(2);
18231 if (isContractableFMUL(N002) &&
18232 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18233 N00.getValueType())) {
18234 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
18235 N002.getOperand(0), N002.getOperand(1),
18236 N1);
18237 }
18238 }
18239 }
18240
18241 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
18242 // -> (fma y, z, (fma (fpext u), (fpext v), x))
18243 if (isFusedOp(N1)) {
18244 SDValue N12 = N1.getOperand(2);
18245 if (N12.getOpcode() == ISD::FP_EXTEND) {
18246 SDValue N120 = N12.getOperand(0);
18247 if (isContractableFMUL(N120) &&
18248 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18249 N120.getValueType())) {
18250 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
18251 N120.getOperand(0), N120.getOperand(1),
18252 N0);
18253 }
18254 }
18255 }
18256
18257 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
18258 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
18259 // FIXME: This turns two single-precision and one double-precision
18260 // operation into two double-precision operations, which might not be
18261 // interesting for all targets, especially GPUs.
18262 if (N1.getOpcode() == ISD::FP_EXTEND) {
18263 SDValue N10 = N1.getOperand(0);
18264 if (isFusedOp(N10)) {
18265 SDValue N102 = N10.getOperand(2);
18266 if (isContractableFMUL(N102) &&
18267 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18268 N10.getValueType())) {
18269 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
18270 N102.getOperand(0), N102.getOperand(1),
18271 N0);
18272 }
18273 }
18274 }
18275 }
18276
18277 return SDValue();
18278}
18279
18280/// Try to perform FMA combining on a given FSUB node.
18281template <class MatchContextClass>
18282SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
18283 SDValue N0 = N->getOperand(0);
18284 SDValue N1 = N->getOperand(1);
18285 EVT VT = N->getValueType(0);
18286 SDLoc SL(N);
18287 MatchContextClass matcher(DAG, TLI, N);
18288 const TargetOptions &Options = DAG.getTarget().Options;
18289
18290 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
18291
18292 // Floating-point multiply-add with intermediate rounding.
18293 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
18294 // FIXME: Add VP_FMAD opcode.
18295 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
18296
18297 // Floating-point multiply-add without intermediate rounding.
18298 bool HasFMA =
18299 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
18301
18302 // No valid opcode, do not combine.
18303 if (!HasFMAD && !HasFMA)
18304 return SDValue();
18305
18306 const SDNodeFlags Flags = N->getFlags();
18307 bool AllowFusionGlobally =
18308 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
18309
18310 // If the subtraction is not contractable, do not combine.
18311 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
18312 return SDValue();
18313
18314 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
18315 return SDValue();
18316
18317 // Always prefer FMAD to FMA for precision.
18318 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
18320 bool NoSignedZero = Flags.hasNoSignedZeros();
18321
18322 // Is the node an FMUL and contractable either due to global flags or
18323 // SDNodeFlags.
18324 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
18325 if (!matcher.match(N, ISD::FMUL))
18326 return false;
18327 return AllowFusionGlobally || N->getFlags().hasAllowContract();
18328 };
18329
18330 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
18331 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
18332 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
18333 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
18334 XY.getOperand(1),
18335 matcher.getNode(ISD::FNEG, SL, VT, Z));
18336 }
18337 return SDValue();
18338 };
18339
18340 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
18341 // Note: Commutes FSUB operands.
18342 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
18343 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
18344 return matcher.getNode(
18345 PreferredFusedOpcode, SL, VT,
18346 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
18347 YZ.getOperand(1), X);
18348 }
18349 return SDValue();
18350 };
18351
18352 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
18353 // prefer to fold the multiply with fewer uses.
18354 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
18355 (N0->use_size() > N1->use_size())) {
18356 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
18357 if (SDValue V = tryToFoldXSubYZ(N0, N1))
18358 return V;
18359 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
18360 if (SDValue V = tryToFoldXYSubZ(N0, N1))
18361 return V;
18362 } else {
18363 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
18364 if (SDValue V = tryToFoldXYSubZ(N0, N1))
18365 return V;
18366 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
18367 if (SDValue V = tryToFoldXSubYZ(N0, N1))
18368 return V;
18369 }
18370
18371 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
18372 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
18373 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
18374 SDValue N00 = N0.getOperand(0).getOperand(0);
18375 SDValue N01 = N0.getOperand(0).getOperand(1);
18376 return matcher.getNode(PreferredFusedOpcode, SL, VT,
18377 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
18378 matcher.getNode(ISD::FNEG, SL, VT, N1));
18379 }
18380
18381 // Look through FP_EXTEND nodes to do more combining.
18382
18383 // fold (fsub (fpext (fmul x, y)), z)
18384 // -> (fma (fpext x), (fpext y), (fneg z))
18385 if (matcher.match(N0, ISD::FP_EXTEND)) {
18386 SDValue N00 = N0.getOperand(0);
18387 if (isContractableFMUL(N00) &&
18388 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18389 N00.getValueType())) {
18390 return matcher.getNode(
18391 PreferredFusedOpcode, SL, VT,
18392 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
18393 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
18394 matcher.getNode(ISD::FNEG, SL, VT, N1));
18395 }
18396 }
18397
18398 // fold (fsub x, (fpext (fmul y, z)))
18399 // -> (fma (fneg (fpext y)), (fpext z), x)
18400 // Note: Commutes FSUB operands.
18401 if (matcher.match(N1, ISD::FP_EXTEND)) {
18402 SDValue N10 = N1.getOperand(0);
18403 if (isContractableFMUL(N10) &&
18404 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18405 N10.getValueType())) {
18406 return matcher.getNode(
18407 PreferredFusedOpcode, SL, VT,
18408 matcher.getNode(
18409 ISD::FNEG, SL, VT,
18410 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
18411 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
18412 }
18413 }
18414
18415 // fold (fsub (fpext (fneg (fmul, x, y))), z)
18416 // -> (fneg (fma (fpext x), (fpext y), z))
18417 // Note: This could be removed with appropriate canonicalization of the
18418 // input expression into (fneg (fadd (fpext (fmul, x, y)), z)). However, the
18419 // command line flag -fp-contract=fast and fast-math flag contract prevent
18420 // from implementing the canonicalization in visitFSUB.
18421 if (matcher.match(N0, ISD::FP_EXTEND)) {
18422 SDValue N00 = N0.getOperand(0);
18423 if (matcher.match(N00, ISD::FNEG)) {
18424 SDValue N000 = N00.getOperand(0);
18425 if (isContractableFMUL(N000) &&
18426 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18427 N00.getValueType())) {
18428 return matcher.getNode(
18429 ISD::FNEG, SL, VT,
18430 matcher.getNode(
18431 PreferredFusedOpcode, SL, VT,
18432 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
18433 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
18434 N1));
18435 }
18436 }
18437 }
18438
18439 // fold (fsub (fneg (fpext (fmul, x, y))), z)
18440 // -> (fneg (fma (fpext x)), (fpext y), z)
18441 // Note: This could be removed with appropriate canonicalization of the
18442 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
18443 // command line flag -fp-contract=fast and fast-math flag contract prevent
18444 // from implementing the canonicalization in visitFSUB.
18445 if (matcher.match(N0, ISD::FNEG)) {
18446 SDValue N00 = N0.getOperand(0);
18447 if (matcher.match(N00, ISD::FP_EXTEND)) {
18448 SDValue N000 = N00.getOperand(0);
18449 if (isContractableFMUL(N000) &&
18450 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18451 N000.getValueType())) {
18452 return matcher.getNode(
18453 ISD::FNEG, SL, VT,
18454 matcher.getNode(
18455 PreferredFusedOpcode, SL, VT,
18456 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
18457 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
18458 N1));
18459 }
18460 }
18461 }
18462
18463 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
18464 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
18465 };
18466
18467 auto isFusedOp = [&](SDValue N) {
18468 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
18469 };
18470
18471 // More folding opportunities when target permits.
18472 if (Aggressive && N->getFlags().hasAllowReassociation()) {
18473 bool CanFuse = N->getFlags().hasAllowContract();
18474 // fold (fsub (fma x, y, (fmul u, v)), z)
18475 // -> (fma x, y (fma u, v, (fneg z)))
18476 if (CanFuse && isFusedOp(N0) &&
18477 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
18478 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
18479 return matcher.getNode(
18480 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
18481 matcher.getNode(PreferredFusedOpcode, SL, VT,
18482 N0.getOperand(2).getOperand(0),
18483 N0.getOperand(2).getOperand(1),
18484 matcher.getNode(ISD::FNEG, SL, VT, N1)));
18485 }
18486
18487 // fold (fsub x, (fma y, z, (fmul u, v)))
18488 // -> (fma (fneg y), z, (fma (fneg u), v, x))
18489 if (CanFuse && isFusedOp(N1) &&
18490 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
18491 N1->hasOneUse() && NoSignedZero) {
18492 SDValue N20 = N1.getOperand(2).getOperand(0);
18493 SDValue N21 = N1.getOperand(2).getOperand(1);
18494 return matcher.getNode(
18495 PreferredFusedOpcode, SL, VT,
18496 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
18497 N1.getOperand(1),
18498 matcher.getNode(PreferredFusedOpcode, SL, VT,
18499 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
18500 }
18501
18502 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
18503 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
18504 if (isFusedOp(N0) && N0->hasOneUse()) {
18505 SDValue N02 = N0.getOperand(2);
18506 if (matcher.match(N02, ISD::FP_EXTEND)) {
18507 SDValue N020 = N02.getOperand(0);
18508 if (isContractableAndReassociableFMUL(N020) &&
18509 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18510 N020.getValueType())) {
18511 return matcher.getNode(
18512 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
18513 matcher.getNode(
18514 PreferredFusedOpcode, SL, VT,
18515 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
18516 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
18517 matcher.getNode(ISD::FNEG, SL, VT, N1)));
18518 }
18519 }
18520 }
18521
18522 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
18523 // -> (fma (fpext x), (fpext y),
18524 // (fma (fpext u), (fpext v), (fneg z)))
18525 // FIXME: This turns two single-precision and one double-precision
18526 // operation into two double-precision operations, which might not be
18527 // interesting for all targets, especially GPUs.
18528 if (matcher.match(N0, ISD::FP_EXTEND)) {
18529 SDValue N00 = N0.getOperand(0);
18530 if (isFusedOp(N00)) {
18531 SDValue N002 = N00.getOperand(2);
18532 if (isContractableAndReassociableFMUL(N002) &&
18533 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18534 N00.getValueType())) {
18535 return matcher.getNode(
18536 PreferredFusedOpcode, SL, VT,
18537 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
18538 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
18539 matcher.getNode(
18540 PreferredFusedOpcode, SL, VT,
18541 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
18542 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
18543 matcher.getNode(ISD::FNEG, SL, VT, N1)));
18544 }
18545 }
18546 }
18547
18548 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
18549 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
18550 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
18551 N1->hasOneUse()) {
18552 SDValue N120 = N1.getOperand(2).getOperand(0);
18553 if (isContractableAndReassociableFMUL(N120) &&
18554 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18555 N120.getValueType())) {
18556 SDValue N1200 = N120.getOperand(0);
18557 SDValue N1201 = N120.getOperand(1);
18558 return matcher.getNode(
18559 PreferredFusedOpcode, SL, VT,
18560 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
18561 N1.getOperand(1),
18562 matcher.getNode(
18563 PreferredFusedOpcode, SL, VT,
18564 matcher.getNode(ISD::FNEG, SL, VT,
18565 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
18566 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
18567 }
18568 }
18569
18570 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
18571 // -> (fma (fneg (fpext y)), (fpext z),
18572 // (fma (fneg (fpext u)), (fpext v), x))
18573 // FIXME: This turns two single-precision and one double-precision
18574 // operation into two double-precision operations, which might not be
18575 // interesting for all targets, especially GPUs.
18576 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
18577 SDValue CvtSrc = N1.getOperand(0);
18578 SDValue N100 = CvtSrc.getOperand(0);
18579 SDValue N101 = CvtSrc.getOperand(1);
18580 SDValue N102 = CvtSrc.getOperand(2);
18581 if (isContractableAndReassociableFMUL(N102) &&
18582 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
18583 CvtSrc.getValueType())) {
18584 SDValue N1020 = N102.getOperand(0);
18585 SDValue N1021 = N102.getOperand(1);
18586 return matcher.getNode(
18587 PreferredFusedOpcode, SL, VT,
18588 matcher.getNode(ISD::FNEG, SL, VT,
18589 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
18590 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
18591 matcher.getNode(
18592 PreferredFusedOpcode, SL, VT,
18593 matcher.getNode(ISD::FNEG, SL, VT,
18594 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
18595 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
18596 }
18597 }
18598 }
18599
18600 return SDValue();
18601}
18602
18603/// Try to perform FMA combining on a given FMUL node based on the distributive
18604/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
18605/// subtraction instead of addition).
18606SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
18607 SDValue N0 = N->getOperand(0);
18608 SDValue N1 = N->getOperand(1);
18609 EVT VT = N->getValueType(0);
18610 SDLoc SL(N);
18611
18612 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
18613
18614 const TargetOptions &Options = DAG.getTarget().Options;
18615
18616 // The transforms below are incorrect when x == 0 and y == inf, because the
18617 // intermediate multiplication produces a nan.
18618 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
18619 if (!FAdd->getFlags().hasNoInfs())
18620 return SDValue();
18621
18622 // Floating-point multiply-add without intermediate rounding.
18623 bool HasFMA =
18625 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
18627
18628 // Floating-point multiply-add with intermediate rounding. This can result
18629 // in a less precise result due to the changed rounding order.
18630 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
18631
18632 // No valid opcode, do not combine.
18633 if (!HasFMAD && !HasFMA)
18634 return SDValue();
18635
18636 // Always prefer FMAD to FMA for precision.
18637 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
18639
18640 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
18641 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
18642 auto FuseFADD = [&](SDValue X, SDValue Y) {
18643 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
18644 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
18645 if (C->isExactlyValue(+1.0))
18646 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
18647 Y);
18648 if (C->isExactlyValue(-1.0))
18649 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
18650 DAG.getNode(ISD::FNEG, SL, VT, Y));
18651 }
18652 }
18653 return SDValue();
18654 };
18655
18656 if (SDValue FMA = FuseFADD(N0, N1))
18657 return FMA;
18658 if (SDValue FMA = FuseFADD(N1, N0))
18659 return FMA;
18660
18661 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
18662 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
18663 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
18664 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
18665 auto FuseFSUB = [&](SDValue X, SDValue Y) {
18666 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
18667 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
18668 if (C0->isExactlyValue(+1.0))
18669 return DAG.getNode(PreferredFusedOpcode, SL, VT,
18670 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
18671 Y);
18672 if (C0->isExactlyValue(-1.0))
18673 return DAG.getNode(PreferredFusedOpcode, SL, VT,
18674 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
18675 DAG.getNode(ISD::FNEG, SL, VT, Y));
18676 }
18677 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
18678 if (C1->isExactlyValue(+1.0))
18679 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
18680 DAG.getNode(ISD::FNEG, SL, VT, Y));
18681 if (C1->isExactlyValue(-1.0))
18682 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
18683 Y);
18684 }
18685 }
18686 return SDValue();
18687 };
18688
18689 if (SDValue FMA = FuseFSUB(N0, N1))
18690 return FMA;
18691 if (SDValue FMA = FuseFSUB(N1, N0))
18692 return FMA;
18693
18694 return SDValue();
18695}
18696
18697SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
18698 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18699
18700 // FADD -> FMA combines:
18701 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
18702 if (Fused.getOpcode() != ISD::DELETED_NODE)
18703 AddToWorklist(Fused.getNode());
18704 return Fused;
18705 }
18706 return SDValue();
18707}
18708
18709SDValue DAGCombiner::visitFADD(SDNode *N) {
18710 SDValue N0 = N->getOperand(0);
18711 SDValue N1 = N->getOperand(1);
18712 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
18713 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
18714 EVT VT = N->getValueType(0);
18715 SDLoc DL(N);
18716 SDNodeFlags Flags = N->getFlags();
18717 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18718
18719 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18720 return R;
18721
18722 // fold (fadd c1, c2) -> c1 + c2
18723 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
18724 return C;
18725
18726 // canonicalize constant to RHS
18727 if (N0CFP && !N1CFP)
18728 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
18729
18730 // fold vector ops
18731 if (VT.isVector())
18732 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18733 return FoldedVOp;
18734
18735 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
18736 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
18737 if (N1C && N1C->isZero())
18738 if (N1C->isNegative() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0)))
18739 return N0;
18740
18741 if (SDValue NewSel = foldBinOpIntoSelect(N))
18742 return NewSel;
18743
18744 // fold (fadd A, (fneg B)) -> (fsub A, B)
18745 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
18746 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18747 N1, DAG, LegalOperations, ForCodeSize))
18748 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
18749
18750 // fold (fadd (fneg A), B) -> (fsub B, A)
18751 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
18752 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18753 N0, DAG, LegalOperations, ForCodeSize))
18754 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
18755
18756 auto isFMulNegTwo = [](SDValue FMul) {
18757 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
18758 return false;
18759 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
18760 return C && C->isExactlyValue(-2.0);
18761 };
18762
18763 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
18764 if (isFMulNegTwo(N0)) {
18765 SDValue B = N0.getOperand(0);
18766 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
18767 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
18768 }
18769 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
18770 if (isFMulNegTwo(N1)) {
18771 SDValue B = N1.getOperand(0);
18772 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
18773 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
18774 }
18775
18776 // No FP constant should be created after legalization as Instruction
18777 // Selection pass has a hard time dealing with FP constants.
18778 bool AllowNewConst = (Level < AfterLegalizeDAG);
18779
18780 // If nnan is enabled, fold lots of things.
18781 if (Flags.hasNoNaNs() && AllowNewConst) {
18782 // If allowed, fold (fadd (fneg x), x) -> 0.0
18783 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
18784 return DAG.getConstantFP(0.0, DL, VT);
18785
18786 // If allowed, fold (fadd x, (fneg x)) -> 0.0
18787 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
18788 return DAG.getConstantFP(0.0, DL, VT);
18789 }
18790
18791 // If reassoc and nsz, fold lots of things.
18792 // TODO: break out portions of the transformations below for which Unsafe is
18793 // considered and which do not require both nsz and reassoc
18794 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18795 AllowNewConst) {
18796 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
18797 if (N1CFP && N0.getOpcode() == ISD::FADD &&
18799 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
18800 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
18801 }
18802
18803 // We can fold chains of FADD's of the same value into multiplications.
18804 // This transform is not safe in general because we are reducing the number
18805 // of rounding steps.
18806 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
18807 if (N0.getOpcode() == ISD::FMUL) {
18808 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18809 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
18810
18811 // (fadd (fmul x, c), x) -> (fmul x, c+1)
18812 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
18813 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18814 DAG.getConstantFP(1.0, DL, VT));
18815 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
18816 }
18817
18818 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
18819 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
18820 N1.getOperand(0) == N1.getOperand(1) &&
18821 N0.getOperand(0) == N1.getOperand(0)) {
18822 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18823 DAG.getConstantFP(2.0, DL, VT));
18824 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
18825 }
18826 }
18827
18828 if (N1.getOpcode() == ISD::FMUL) {
18829 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18830 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
18831
18832 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
18833 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
18834 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18835 DAG.getConstantFP(1.0, DL, VT));
18836 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
18837 }
18838
18839 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
18840 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
18841 N0.getOperand(0) == N0.getOperand(1) &&
18842 N1.getOperand(0) == N0.getOperand(0)) {
18843 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18844 DAG.getConstantFP(2.0, DL, VT));
18845 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
18846 }
18847 }
18848
18849 if (N0.getOpcode() == ISD::FADD) {
18850 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18851 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
18852 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
18853 (N0.getOperand(0) == N1)) {
18854 return DAG.getNode(ISD::FMUL, DL, VT, N1,
18855 DAG.getConstantFP(3.0, DL, VT));
18856 }
18857 }
18858
18859 if (N1.getOpcode() == ISD::FADD) {
18860 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18861 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
18862 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
18863 N1.getOperand(0) == N0) {
18864 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18865 DAG.getConstantFP(3.0, DL, VT));
18866 }
18867 }
18868
18869 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
18870 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
18871 N0.getOperand(0) == N0.getOperand(1) &&
18872 N1.getOperand(0) == N1.getOperand(1) &&
18873 N0.getOperand(0) == N1.getOperand(0)) {
18874 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
18875 DAG.getConstantFP(4.0, DL, VT));
18876 }
18877 }
18878 } // reassoc && nsz && AllowNewConst
18879
18880 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
18881 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
18882 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
18883 VT, N0, N1, Flags))
18884 return SD;
18885 }
18886
18887 // FADD -> FMA combines:
18888 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
18889 if (Fused.getOpcode() != ISD::DELETED_NODE)
18890 AddToWorklist(Fused.getNode());
18891 return Fused;
18892 }
18893 return SDValue();
18894}
18895
18896SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
18897 SDValue Chain = N->getOperand(0);
18898 SDValue N0 = N->getOperand(1);
18899 SDValue N1 = N->getOperand(2);
18900 EVT VT = N->getValueType(0);
18901 EVT ChainVT = N->getValueType(1);
18902 SDLoc DL(N);
18903 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18904
18905 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
18906 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18907 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18908 N1, DAG, LegalOperations, ForCodeSize)) {
18909 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18910 {Chain, N0, NegN1});
18911 }
18912
18913 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
18914 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18915 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18916 N0, DAG, LegalOperations, ForCodeSize)) {
18917 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18918 {Chain, N1, NegN0});
18919 }
18920 return SDValue();
18921}
18922
18923SDValue DAGCombiner::visitFSUB(SDNode *N) {
18924 SDValue N0 = N->getOperand(0);
18925 SDValue N1 = N->getOperand(1);
18926 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
18927 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18928 EVT VT = N->getValueType(0);
18929 SDLoc DL(N);
18930 const SDNodeFlags Flags = N->getFlags();
18931 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18932
18933 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18934 return R;
18935
18936 // fold (fsub c1, c2) -> c1-c2
18937 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
18938 return C;
18939
18940 // fold vector ops
18941 if (VT.isVector())
18942 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18943 return FoldedVOp;
18944
18945 if (SDValue NewSel = foldBinOpIntoSelect(N))
18946 return NewSel;
18947
18948 // (fsub A, 0) -> A
18949 if (N1CFP && N1CFP->isZero()) {
18950 if (!N1CFP->isNegative() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18951 return N0;
18952 }
18953 }
18954
18955 if (N0 == N1) {
18956 // (fsub x, x) -> 0.0
18957 if (Flags.hasNoNaNs())
18958 return DAG.getConstantFP(0.0f, DL, VT);
18959 }
18960
18961 // (fsub -0.0, N1) -> -N1
18962 if (N0CFP && N0CFP->isZero()) {
18963 if (N0CFP->isNegative() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18964 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
18965 // flushed to zero, unless all users treat denorms as zero (DAZ).
18966 // FIXME: This transform will change the sign of a NaN and the behavior
18967 // of a signaling NaN. It is only valid when a NoNaN flag is present.
18968 DenormalMode DenormMode = DAG.getDenormalMode(VT);
18969 if (DenormMode == DenormalMode::getIEEE()) {
18970 if (SDValue NegN1 =
18971 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18972 return NegN1;
18973 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18974 return DAG.getNode(ISD::FNEG, DL, VT, N1);
18975 }
18976 }
18977 }
18978
18979 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18980 N1.getOpcode() == ISD::FADD) {
18981 // X - (X + Y) -> -Y
18982 if (N0 == N1->getOperand(0))
18983 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
18984 // X - (Y + X) -> -Y
18985 if (N0 == N1->getOperand(1))
18986 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
18987 }
18988
18989 // fold (fsub A, (fneg B)) -> (fadd A, B)
18990 if (SDValue NegN1 =
18991 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18992 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
18993
18994 // FSUB -> FMA combines:
18995 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18996 AddToWorklist(Fused.getNode());
18997 return Fused;
18998 }
18999
19000 return SDValue();
19001}
19002
19003// Transform IEEE Floats:
19004// (fmul C, (uitofp Pow2))
19005// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
19006// (fdiv C, (uitofp Pow2))
19007// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
19008//
19009// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
19010// there is no need for more than an add/sub.
19011//
19012// This is valid under the following circumstances:
19013// 1) We are dealing with IEEE floats
19014// 2) C is normal
19015// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
19016// TODO: Much of this could also be used for generating `ldexp` on targets the
19017// prefer it.
19018SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
19019 EVT VT = N->getValueType(0);
19021 return SDValue();
19022
19023 SDValue ConstOp, Pow2Op;
19024
19025 std::optional<int> Mantissa;
19026 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
19027 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
19028 return false;
19029
19030 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
19031 Pow2Op = N->getOperand(1 - ConstOpIdx);
19032 unsigned Pow2Opc = Pow2Op.getOpcode();
19033 if (Pow2Opc != ISD::UINT_TO_FP && Pow2Opc != ISD::SINT_TO_FP)
19034 return false;
19035
19036 Pow2Op = Pow2Op.getOperand(0);
19037
19038 KnownBits Pow2OpKnownBits = DAG.computeKnownBits(Pow2Op);
19039 if (Pow2Opc == ISD::SINT_TO_FP && !Pow2OpKnownBits.isNonNegative())
19040 return false;
19041
19042 int MaxExpChange = Pow2OpKnownBits.countMaxActiveBits();
19043
19044 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
19045 if (CFP == nullptr)
19046 return false;
19047
19048 const APFloat &APF = CFP->getValueAPF();
19049
19050 // Make sure we have normal constant.
19051 if (!APF.isNormal())
19052 return false;
19053
19054 // Make sure the floats exponent is within the bounds that this transform
19055 // produces bitwise equals value.
19056 int CurExp = ilogb(APF);
19057 // FMul by pow2 will only increase exponent.
19058 int MinExp =
19059 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
19060 // FDiv by pow2 will only decrease exponent.
19061 int MaxExp =
19062 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
19063 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
19065 return false;
19066
19067 // Finally make sure we actually know the mantissa for the float type.
19068 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
19069 if (!Mantissa)
19070 Mantissa = ThisMantissa;
19071
19072 return *Mantissa == ThisMantissa && ThisMantissa > 0;
19073 };
19074
19075 // TODO: We may be able to include undefs.
19076 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
19077 };
19078
19079 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
19080 return SDValue();
19081
19082 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
19083 return SDValue();
19084
19085 // Get log2 after all other checks have taken place. This is because
19086 // BuildLogBase2 may create a new node.
19087 SDLoc DL(N);
19088 // Get Log2 type with same bitwidth as the float type (VT).
19089 EVT NewIntVT = VT.changeElementType(
19090 *DAG.getContext(),
19092
19093 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
19094 /*InexpensiveOnly*/ true, NewIntVT);
19095 if (!Log2)
19096 return SDValue();
19097
19098 // Perform actual transform.
19099 SDValue MantissaShiftCnt =
19100 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
19101 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
19102 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
19103 // cast. We could implement that by handle here to handle the casts.
19104 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
19105 SDValue ResAsInt =
19106 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
19107 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
19108 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
19109 return ResAsFP;
19110}
19111
19112SDValue DAGCombiner::visitFMUL(SDNode *N) {
19113 SDValue N0 = N->getOperand(0);
19114 SDValue N1 = N->getOperand(1);
19115 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
19116 EVT VT = N->getValueType(0);
19117 SDLoc DL(N);
19118 const SDNodeFlags Flags = N->getFlags();
19119 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19120
19121 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
19122 return R;
19123
19124 // fold (fmul c1, c2) -> c1*c2
19125 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
19126 return C;
19127
19128 // canonicalize constant to RHS
19131 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
19132
19133 // fold vector ops
19134 if (VT.isVector())
19135 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
19136 return FoldedVOp;
19137
19138 if (SDValue NewSel = foldBinOpIntoSelect(N))
19139 return NewSel;
19140
19141 if (Flags.hasAllowReassociation()) {
19142 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
19144 N0.getOpcode() == ISD::FMUL) {
19145 SDValue N00 = N0.getOperand(0);
19146 SDValue N01 = N0.getOperand(1);
19147 // Avoid an infinite loop by making sure that N00 is not a constant
19148 // (the inner multiply has not been constant folded yet).
19151 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
19152 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
19153 }
19154 }
19155
19156 // Match a special-case: we convert X * 2.0 into fadd.
19157 // fmul (fadd X, X), C -> fmul X, 2.0 * C
19158 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
19159 N0.getOperand(0) == N0.getOperand(1)) {
19160 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
19161 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
19162 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
19163 }
19164
19165 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
19166 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
19167 VT, N0, N1, Flags))
19168 return SD;
19169 }
19170
19171 // fold (fmul X, 2.0) -> (fadd X, X)
19172 if (N1CFP && N1CFP->isExactlyValue(+2.0))
19173 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
19174
19175 // fold (fmul X, -1.0) -> (fsub -0.0, X)
19176 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
19177 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
19178 return DAG.getNode(ISD::FSUB, DL, VT,
19179 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
19180 }
19181 }
19182
19183 // -N0 * -N1 --> N0 * N1
19188 SDValue NegN0 =
19189 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
19190 if (NegN0) {
19191 HandleSDNode NegN0Handle(NegN0);
19192 SDValue NegN1 =
19193 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
19194 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
19196 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
19197 }
19198
19199 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
19200 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
19201 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
19202 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
19203 TLI.isOperationLegal(ISD::FABS, VT)) {
19204 SDValue Select = N0, X = N1;
19205 if (Select.getOpcode() != ISD::SELECT)
19206 std::swap(Select, X);
19207
19208 SDValue Cond = Select.getOperand(0);
19209 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
19210 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
19211
19212 if (TrueOpnd && FalseOpnd &&
19213 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
19214 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
19215 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
19216 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19217 switch (CC) {
19218 default: break;
19219 case ISD::SETOLT:
19220 case ISD::SETULT:
19221 case ISD::SETOLE:
19222 case ISD::SETULE:
19223 case ISD::SETLT:
19224 case ISD::SETLE:
19225 std::swap(TrueOpnd, FalseOpnd);
19226 [[fallthrough]];
19227 case ISD::SETOGT:
19228 case ISD::SETUGT:
19229 case ISD::SETOGE:
19230 case ISD::SETUGE:
19231 case ISD::SETGT:
19232 case ISD::SETGE:
19233 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
19234 TLI.isOperationLegal(ISD::FNEG, VT))
19235 return DAG.getNode(ISD::FNEG, DL, VT,
19236 DAG.getNode(ISD::FABS, DL, VT, X));
19237 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
19238 return DAG.getNode(ISD::FABS, DL, VT, X);
19239
19240 break;
19241 }
19242 }
19243 }
19244
19245 // FMUL -> FMA combines:
19246 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
19247 AddToWorklist(Fused.getNode());
19248 return Fused;
19249 }
19250
19251 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
19252 // able to run.
19253 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
19254 return R;
19255
19256 return SDValue();
19257}
19258
19259template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
19260 SDValue N0 = N->getOperand(0);
19261 SDValue N1 = N->getOperand(1);
19262 SDValue N2 = N->getOperand(2);
19263 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
19264 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
19265 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
19266 EVT VT = N->getValueType(0);
19267 SDLoc DL(N);
19268 // FMA nodes have flags that propagate to the created nodes.
19269 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19270 MatchContextClass matcher(DAG, TLI, N);
19271
19272 // Constant fold FMA.
19273 if (SDValue C =
19274 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
19275 return C;
19276
19277 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
19282 SDValue NegN0 =
19283 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
19284 if (NegN0) {
19285 HandleSDNode NegN0Handle(NegN0);
19286 SDValue NegN1 =
19287 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
19288 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
19290 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
19291 }
19292
19293 if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
19294 if (N->getFlags().hasNoSignedZeros() ||
19295 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
19296 if (N0CFP && N0CFP->isZero())
19297 return N2;
19298 if (N1CFP && N1CFP->isZero())
19299 return N2;
19300 }
19301 }
19302
19303 // FIXME: Support splat of constant.
19304 if (N0CFP && N0CFP->isExactlyValue(1.0))
19305 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
19306 if (N1CFP && N1CFP->isExactlyValue(1.0))
19307 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
19308
19309 // Canonicalize (fma c, x, y) -> (fma x, c, y)
19312 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
19313
19314 bool CanReassociate = N->getFlags().hasAllowReassociation();
19315 if (CanReassociate) {
19316 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
19317 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
19320 return matcher.getNode(
19321 ISD::FMUL, DL, VT, N0,
19322 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
19323 }
19324
19325 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
19326 if (matcher.match(N0, ISD::FMUL) &&
19329 return matcher.getNode(
19330 ISD::FMA, DL, VT, N0.getOperand(0),
19331 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
19332 }
19333 }
19334
19335 // (fma x, -1, y) -> (fadd (fneg x), y)
19336 // FIXME: Support splat of constant.
19337 if (N1CFP) {
19338 if (N1CFP->isExactlyValue(1.0))
19339 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
19340
19341 if (N1CFP->isExactlyValue(-1.0) &&
19342 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
19343 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
19344 AddToWorklist(RHSNeg.getNode());
19345 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
19346 }
19347
19348 // fma (fneg x), K, y -> fma x -K, y
19349 if (matcher.match(N0, ISD::FNEG) &&
19351 (N1.hasOneUse() &&
19352 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
19353 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
19354 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
19355 }
19356 }
19357
19358 // FIXME: Support splat of constant.
19359 if (CanReassociate) {
19360 // (fma x, c, x) -> (fmul x, (c+1))
19361 if (N1CFP && N0 == N2) {
19362 return matcher.getNode(ISD::FMUL, DL, VT, N0,
19363 matcher.getNode(ISD::FADD, DL, VT, N1,
19364 DAG.getConstantFP(1.0, DL, VT)));
19365 }
19366
19367 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
19368 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
19369 return matcher.getNode(ISD::FMUL, DL, VT, N0,
19370 matcher.getNode(ISD::FADD, DL, VT, N1,
19371 DAG.getConstantFP(-1.0, DL, VT)));
19372 }
19373 }
19374
19375 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
19376 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
19377 if (!TLI.isFNegFree(VT))
19379 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
19380 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
19381 return SDValue();
19382}
19383
19384SDValue DAGCombiner::visitFMAD(SDNode *N) {
19385 SDValue N0 = N->getOperand(0);
19386 SDValue N1 = N->getOperand(1);
19387 SDValue N2 = N->getOperand(2);
19388 EVT VT = N->getValueType(0);
19389 SDLoc DL(N);
19390
19391 // Constant fold FMAD.
19392 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
19393 return C;
19394
19395 return SDValue();
19396}
19397
19398SDValue DAGCombiner::visitFMULADD(SDNode *N) {
19399 SDValue N0 = N->getOperand(0);
19400 SDValue N1 = N->getOperand(1);
19401 SDValue N2 = N->getOperand(2);
19402 EVT VT = N->getValueType(0);
19403 SDLoc DL(N);
19404
19405 // Constant fold FMULADD.
19406 if (SDValue C =
19407 DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
19408 return C;
19409
19410 return SDValue();
19411}
19412
19413// Combine multiple FDIVs with the same divisor into multiple FMULs by the
19414// reciprocal.
19415// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
19416// Notice that this is not always beneficial. One reason is different targets
19417// may have different costs for FDIV and FMUL, so sometimes the cost of two
19418// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
19419// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
19420SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
19421 // TODO: Limit this transform based on optsize/minsize - it always creates at
19422 // least 1 extra instruction. But the perf win may be substantial enough
19423 // that only minsize should restrict this.
19424 const SDNodeFlags Flags = N->getFlags();
19425 if (LegalDAG || !Flags.hasAllowReciprocal())
19426 return SDValue();
19427
19428 // Skip if current node is a reciprocal/fneg-reciprocal.
19429 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
19430 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
19431 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
19432 return SDValue();
19433
19434 // Exit early if the target does not want this transform or if there can't
19435 // possibly be enough uses of the divisor to make the transform worthwhile.
19436 unsigned MinUses = TLI.combineRepeatedFPDivisors();
19437
19438 // For splat vectors, scale the number of uses by the splat factor. If we can
19439 // convert the division into a scalar op, that will likely be much faster.
19440 unsigned NumElts = 1;
19441 EVT VT = N->getValueType(0);
19442 if (VT.isVector() && DAG.isSplatValue(N1))
19443 NumElts = VT.getVectorMinNumElements();
19444
19445 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
19446 return SDValue();
19447
19448 // Find all FDIV users of the same divisor.
19449 // Use a set because duplicates may be present in the user list.
19450 SetVector<SDNode *> Users;
19451 for (auto *U : N1->users()) {
19452 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
19453 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
19454 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
19455 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
19456 U->getFlags().hasAllowReassociation() &&
19457 U->getFlags().hasNoSignedZeros())
19458 continue;
19459
19460 // This division is eligible for optimization only if global unsafe math
19461 // is enabled or if this division allows reciprocal formation.
19462 if (U->getFlags().hasAllowReciprocal())
19463 Users.insert(U);
19464 }
19465 }
19466
19467 // Now that we have the actual number of divisor uses, make sure it meets
19468 // the minimum threshold specified by the target.
19469 if ((Users.size() * NumElts) < MinUses)
19470 return SDValue();
19471
19472 SDLoc DL(N);
19473 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
19474 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
19475
19476 // Dividend / Divisor -> Dividend * Reciprocal
19477 for (auto *U : Users) {
19478 SDValue Dividend = U->getOperand(0);
19479 if (Dividend != FPOne) {
19480 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
19481 Reciprocal, Flags);
19482 CombineTo(U, NewNode);
19483 } else if (U != Reciprocal.getNode()) {
19484 // In the absence of fast-math-flags, this user node is always the
19485 // same node as Reciprocal, but with FMF they may be different nodes.
19486 CombineTo(U, Reciprocal);
19487 }
19488 }
19489 return SDValue(N, 0); // N was replaced.
19490}
19491
19492SDValue DAGCombiner::visitFDIV(SDNode *N) {
19493 SDValue N0 = N->getOperand(0);
19494 SDValue N1 = N->getOperand(1);
19495 EVT VT = N->getValueType(0);
19496 SDLoc DL(N);
19497 SDNodeFlags Flags = N->getFlags();
19498 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19499
19500 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
19501 return R;
19502
19503 // fold (fdiv c1, c2) -> c1/c2
19504 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
19505 return C;
19506
19507 // fold vector ops
19508 if (VT.isVector())
19509 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
19510 return FoldedVOp;
19511
19512 if (SDValue NewSel = foldBinOpIntoSelect(N))
19513 return NewSel;
19514
19516 return V;
19517
19518 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
19519 // the loss is acceptable with AllowReciprocal.
19520 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
19521 // Compute the reciprocal 1.0 / c2.
19522 const APFloat &N1APF = N1CFP->getValueAPF();
19523 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
19525 // Only do the transform if the reciprocal is a legal fp immediate that
19526 // isn't too nasty (eg NaN, denormal, ...).
19527 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
19528 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
19529 (!LegalOperations ||
19530 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
19531 // backend)... we should handle this gracefully after Legalize.
19532 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
19534 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
19535 return DAG.getNode(ISD::FMUL, DL, VT, N0,
19536 DAG.getConstantFP(Recip, DL, VT));
19537 }
19538
19539 if (Flags.hasAllowReciprocal()) {
19540 // If this FDIV is part of a reciprocal square root, it may be folded
19541 // into a target-specific square root estimate instruction.
19542 bool N1AllowReciprocal = N1->getFlags().hasAllowReciprocal();
19543 if (N1.getOpcode() == ISD::FSQRT) {
19544 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), N1->getFlags()))
19545 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
19546 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
19547 N1.getOperand(0).getOpcode() == ISD::FSQRT &&
19548 N1AllowReciprocal) {
19549 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
19550 N1.getOperand(0)->getFlags())) {
19551 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
19552 AddToWorklist(RV.getNode());
19553 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
19554 }
19555 } else if (N1.getOpcode() == ISD::FP_ROUND &&
19556 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
19557 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
19558 N1.getOperand(0)->getFlags())) {
19559 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
19560 AddToWorklist(RV.getNode());
19561 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
19562 }
19563 } else if (N1.getOpcode() == ISD::FMUL) {
19564 // Look through an FMUL. Even though this won't remove the FDIV directly,
19565 // it's still worthwhile to get rid of the FSQRT if possible.
19566 SDValue Sqrt, Y;
19567 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
19568 Sqrt = N1.getOperand(0);
19569 Y = N1.getOperand(1);
19570 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
19571 Sqrt = N1.getOperand(1);
19572 Y = N1.getOperand(0);
19573 }
19574 if (Sqrt.getNode()) {
19575 // If the other multiply operand is known positive, pull it into the
19576 // sqrt. That will eliminate the division if we convert to an estimate.
19577 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
19578 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
19579 SDValue A;
19580 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
19581 A = Y.getOperand(0);
19582 else if (Y == Sqrt.getOperand(0))
19583 A = Y;
19584 if (A) {
19585 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
19586 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
19587 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
19588 SDValue AAZ =
19589 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
19590 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Sqrt->getFlags()))
19591 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
19592
19593 // Estimate creation failed. Clean up speculatively created nodes.
19594 recursivelyDeleteUnusedNodes(AAZ.getNode());
19595 }
19596 }
19597
19598 // We found a FSQRT, so try to make this fold:
19599 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
19600 if (SDValue Rsqrt =
19601 buildRsqrtEstimate(Sqrt.getOperand(0), Sqrt->getFlags())) {
19602 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
19603 AddToWorklist(Div.getNode());
19604 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
19605 }
19606 }
19607 }
19608
19609 // Fold into a reciprocal estimate and multiply instead of a real divide.
19610 if (Flags.hasNoInfs())
19611 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
19612 return RV;
19613 }
19614
19615 // Fold X/Sqrt(X) -> Sqrt(X)
19616 if (DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) &&
19617 Flags.hasAllowReassociation())
19618 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
19619 return N1;
19620
19621 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
19626 SDValue NegN0 =
19627 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
19628 if (NegN0) {
19629 HandleSDNode NegN0Handle(NegN0);
19630 SDValue NegN1 =
19631 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
19632 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
19634 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
19635 }
19636
19637 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
19638 return R;
19639
19640 return SDValue();
19641}
19642
19643SDValue DAGCombiner::visitFREM(SDNode *N) {
19644 SDValue N0 = N->getOperand(0);
19645 SDValue N1 = N->getOperand(1);
19646 EVT VT = N->getValueType(0);
19647 SDNodeFlags Flags = N->getFlags();
19648 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19649 SDLoc DL(N);
19650
19651 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
19652 return R;
19653
19654 // fold (frem c1, c2) -> fmod(c1,c2)
19655 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
19656 return C;
19657
19658 if (SDValue NewSel = foldBinOpIntoSelect(N))
19659 return NewSel;
19660
19661 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
19662 // power of 2.
19663 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
19667 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
19668 bool NeedsCopySign = !DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) &&
19670 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
19671 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
19672 SDValue MLA;
19674 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
19675 N1, N0);
19676 } else {
19677 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
19678 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
19679 }
19680 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
19681 }
19682
19683 return SDValue();
19684}
19685
19686SDValue DAGCombiner::visitFSQRT(SDNode *N) {
19687 SDNodeFlags Flags = N->getFlags();
19688
19689 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
19690 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
19691 if (!Flags.hasApproximateFuncs() || !Flags.hasNoInfs())
19692 return SDValue();
19693
19694 SDValue N0 = N->getOperand(0);
19695 if (TLI.isFsqrtCheap(N0, DAG))
19696 return SDValue();
19697
19698 // FSQRT nodes have flags that propagate to the created nodes.
19699 SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
19700 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
19701 // transform the fdiv, we may produce a sub-optimal estimate sequence
19702 // because the reciprocal calculation may not have to filter out a
19703 // 0.0 input.
19704 return buildSqrtEstimate(N0, Flags);
19705}
19706
19707/// copysign(x, fp_extend(y)) -> copysign(x, y)
19708/// copysign(x, fp_round(y)) -> copysign(x, y)
19709/// Operands to the functions are the type of X and Y respectively.
19710static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
19711 // Always fold no-op FP casts.
19712 if (XTy == YTy)
19713 return true;
19714
19715 // Do not optimize out type conversion of f128 type yet.
19716 // For some targets like x86_64, configuration is changed to keep one f128
19717 // value in one SSE register, but instruction selection cannot handle
19718 // FCOPYSIGN on SSE registers yet.
19719 if (YTy == MVT::f128)
19720 return false;
19721
19722 // Avoid mismatched vector operand types, for better instruction selection.
19723 return !YTy.isVector();
19724}
19725
19727 SDValue N1 = N->getOperand(1);
19728 if (N1.getOpcode() != ISD::FP_EXTEND &&
19729 N1.getOpcode() != ISD::FP_ROUND)
19730 return false;
19731 EVT N1VT = N1->getValueType(0);
19732 EVT N1Op0VT = N1->getOperand(0).getValueType();
19733 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
19734}
19735
19736SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
19737 SDValue N0 = N->getOperand(0);
19738 SDValue N1 = N->getOperand(1);
19739 EVT VT = N->getValueType(0);
19740 SDLoc DL(N);
19741
19742 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
19743 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
19744 return C;
19745
19746 // copysign(x, fp_extend(y)) -> copysign(x, y)
19747 // copysign(x, fp_round(y)) -> copysign(x, y)
19749 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
19750
19752 return SDValue(N, 0);
19753
19754 if (VT != N1.getValueType())
19755 return SDValue();
19756
19757 // If this is equivalent to a disjoint or, replace it with one. This can
19758 // happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
19759 if (DAG.SignBitIsZeroFP(N0) &&
19761 // TODO: Just directly match the shift pattern. computeKnownBits is heavy
19762 // for a such a narrowly targeted case.
19763 EVT IntVT = VT.changeTypeToInteger();
19764 // TODO: It appears to be profitable in some situations to unconditionally
19765 // emit a fabs(n0) to perform this combine.
19766 SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0);
19767 SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1);
19768
19769 SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1,
19771 return DAG.getNode(ISD::BITCAST, DL, VT, SignOr);
19772 }
19773
19774 return SDValue();
19775}
19776
19777SDValue DAGCombiner::visitFPOW(SDNode *N) {
19778 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
19779 if (!ExponentC)
19780 return SDValue();
19781 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19782
19783 // Try to convert x ** (1/3) into cube root.
19784 // TODO: Handle the various flavors of long double.
19785 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
19786 // Some range near 1/3 should be fine.
19787 EVT VT = N->getValueType(0);
19788 EVT ScalarVT = VT.getScalarType();
19789 if ((ScalarVT == MVT::f32 &&
19790 ExponentC->getValueAPF().isExactlyValue(1.0f / 3.0f)) ||
19791 (ScalarVT == MVT::f64 &&
19792 ExponentC->getValueAPF().isExactlyValue(1.0 / 3.0))) {
19793 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
19794 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
19795 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
19796 // For regular numbers, rounding may cause the results to differ.
19797 // Therefore, we require { nsz ninf nnan afn } for this transform.
19798 // TODO: We could select out the special cases if we don't have nsz/ninf.
19799 SDNodeFlags Flags = N->getFlags();
19800 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
19801 !Flags.hasApproximateFuncs())
19802 return SDValue();
19803
19804 // Do not create a cbrt() libcall if the target does not have it, and do not
19805 // turn a pow that has lowering support into a cbrt() libcall.
19806 RTLIB::Libcall LC = RTLIB::getCBRT(VT);
19807 bool HasLibCall =
19808 DAG.getLibcalls().getLibcallImpl(LC) != RTLIB::Unsupported;
19809 if (!HasLibCall ||
19812 return SDValue();
19813
19814 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
19815 }
19816
19817 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
19818 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
19819 // TODO: This could be extended (using a target hook) to handle smaller
19820 // power-of-2 fractional exponents.
19821 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
19822 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
19823 if (ExponentIs025 || ExponentIs075) {
19824 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
19825 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
19826 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
19827 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
19828 // For regular numbers, rounding may cause the results to differ.
19829 // Therefore, we require { nsz ninf afn } for this transform.
19830 // TODO: We could select out the special cases if we don't have nsz/ninf.
19831 SDNodeFlags Flags = N->getFlags();
19832
19833 // We only need no signed zeros for the 0.25 case.
19834 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
19835 !Flags.hasApproximateFuncs())
19836 return SDValue();
19837
19838 // Don't double the number of libcalls. We are trying to inline fast code.
19840 return SDValue();
19841
19842 // Assume that libcalls are the smallest code.
19843 // TODO: This restriction should probably be lifted for vectors.
19844 if (ForCodeSize)
19845 return SDValue();
19846
19847 // pow(X, 0.25) --> sqrt(sqrt(X))
19848 SDLoc DL(N);
19849 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
19850 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
19851 if (ExponentIs025)
19852 return SqrtSqrt;
19853 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
19854 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
19855 }
19856
19857 return SDValue();
19858}
19859
19861 const TargetLowering &TLI) {
19862 // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
19863 // Additionally, if there are clamps ([us]min or [us]max) around
19864 // the fpto[us]i, we can fold those into fminnum/fmaxnum around the ftrunc.
19865 // If NoSignedZerosFPMath is enabled, this is a direct replacement.
19866 // Otherwise, for strict math, we must handle edge cases:
19867 // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
19868 // as example, it first becomes integer 0, and is converted back to +0.0.
19869 // FTRUNC on its own could produce -0.0.
19870
19871 // FIXME: We should be able to use node-level FMF here.
19872 EVT VT = N->getValueType(0);
19873 if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
19874 return SDValue();
19875
19876 bool IsUnsigned = N->getOpcode() == ISD::UINT_TO_FP;
19877 bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
19878 assert(IsSigned || IsUnsigned);
19879
19880 bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
19882 // For signed conversions: The optimization changes signed zero behavior.
19883 if (IsSigned && !IsSignedZeroSafe)
19884 return SDValue();
19885 // For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
19886 // (unless outputting a signed zero is OK).
19887 if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
19888 return SDValue();
19889
19890 // Collect potential clamp operations (outermost to innermost) and peel.
19891 struct ClampInfo {
19892 bool IsMin;
19894 };
19895 constexpr unsigned MaxClamps = 2;
19897 unsigned MinOp = IsUnsigned ? ISD::UMIN : ISD::SMIN;
19898 unsigned MaxOp = IsUnsigned ? ISD::UMAX : ISD::SMAX;
19899 SDValue IntVal = N->getOperand(0);
19900 for (unsigned Level = 0; Level < MaxClamps; ++Level) {
19901 if (!IntVal.hasOneUse() ||
19902 (IntVal.getOpcode() != MinOp && IntVal.getOpcode() != MaxOp))
19903 break;
19904 SDValue RHS = IntVal.getOperand(1);
19905 APInt IntConst;
19906 if (auto *IntConstNode = dyn_cast<ConstantSDNode>(RHS))
19907 IntConst = IntConstNode->getAPIntValue();
19908 else if (!ISD::isConstantSplatVector(RHS.getNode(), IntConst))
19909 return SDValue();
19910 APFloat FPConst(VT.getFltSemantics());
19911 FPConst.convertFromAPInt(IntConst, IsSigned, APFloat::rmNearestTiesToEven);
19912 // Verify roundtrip exactness.
19913 APSInt RoundTrip(IntConst.getBitWidth(), IsUnsigned);
19914 bool IsExact;
19915 if (FPConst.convertToInteger(RoundTrip, APFloat::rmTowardZero, &IsExact) !=
19916 APFloat::opOK ||
19917 !IsExact || static_cast<const APInt &>(RoundTrip) != IntConst)
19918 return SDValue();
19919 bool IsMin = IntVal.getOpcode() == MinOp;
19920 Clamps.push_back({IsMin, DAG.getConstantFP(FPConst, DL, VT)});
19921 IntVal = IntVal.getOperand(0);
19922 }
19923
19924 // Check that the sequence ends with the correct kind of fpto[us]i.
19925 unsigned FPToIntOp = IsUnsigned ? ISD::FP_TO_UINT : ISD::FP_TO_SINT;
19926 if (IntVal.getOpcode() != FPToIntOp ||
19927 IntVal.getOperand(0).getValueType() != VT)
19928 return SDValue();
19929
19930 SDValue Result = IntVal.getOperand(0);
19931 if (IsUnsigned && !IsSignedZeroSafe && TLI.isFAbsFree(VT))
19932 Result = DAG.getNode(ISD::FABS, DL, VT, Result);
19933 Result = DAG.getNode(ISD::FTRUNC, DL, VT, Result);
19934 // Apply clamps, if any, in reverse order (innermost first).
19935 for (const ClampInfo &Clamp : reverse(Clamps)) {
19936 unsigned FPClampOp =
19937 getMinMaxOpcodeForClamp(Clamp.IsMin, Result, Clamp.Constant, DAG, TLI);
19938 if (FPClampOp == ISD::DELETED_NODE)
19939 return SDValue();
19940 Result = DAG.getNode(FPClampOp, DL, VT, Result, Clamp.Constant);
19941 }
19942 return Result;
19943}
19944
19945SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
19946 SDValue N0 = N->getOperand(0);
19947 EVT VT = N->getValueType(0);
19948 EVT OpVT = N0.getValueType();
19949 SDLoc DL(N);
19950
19951 // [us]itofp(undef) = 0, because the result value is bounded.
19952 if (N0.isUndef())
19953 return DAG.getConstantFP(0.0, DL, VT);
19954
19955 // fold (sint_to_fp c1) -> c1fp
19956 // ...but only if the target supports immediate floating-point values
19957 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19958 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
19959 return C;
19960
19961 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
19962 // but UINT_TO_FP is legal on this target, try to convert.
19963 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
19964 hasOperation(ISD::UINT_TO_FP, OpVT)) {
19965 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
19966 if (DAG.SignBitIsZero(N0))
19967 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
19968 }
19969
19970 // The next optimizations are desirable only if SELECT_CC can be lowered.
19971 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
19972 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
19973 !VT.isVector() &&
19974 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19975 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
19976 DAG.getConstantFP(0.0, DL, VT));
19977
19978 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
19979 // (select (setcc x, y, cc), 1.0, 0.0)
19980 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
19981 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
19982 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19983 return DAG.getSelect(DL, VT, N0.getOperand(0),
19984 DAG.getConstantFP(1.0, DL, VT),
19985 DAG.getConstantFP(0.0, DL, VT));
19986
19987 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19988 return FTrunc;
19989
19990 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
19991 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
19993 N0.getOperand(0).getValueType()))
19994 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
19995
19996 return SDValue();
19997}
19998
19999SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
20000 SDValue N0 = N->getOperand(0);
20001 EVT VT = N->getValueType(0);
20002 EVT OpVT = N0.getValueType();
20003 SDLoc DL(N);
20004
20005 // [us]itofp(undef) = 0, because the result value is bounded.
20006 if (N0.isUndef())
20007 return DAG.getConstantFP(0.0, DL, VT);
20008
20009 // fold (uint_to_fp c1) -> c1fp
20010 // ...but only if the target supports immediate floating-point values
20011 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
20012 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
20013 return C;
20014
20015 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
20016 // but SINT_TO_FP is legal on this target, try to convert.
20017 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
20018 hasOperation(ISD::SINT_TO_FP, OpVT)) {
20019 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
20020 if (DAG.SignBitIsZero(N0))
20021 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
20022 }
20023
20024 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
20025 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
20026 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
20027 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
20028 DAG.getConstantFP(0.0, DL, VT));
20029
20030 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
20031 return FTrunc;
20032
20033 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
20034 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
20036 N0.getOperand(0).getValueType()))
20037 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
20038
20039 return SDValue();
20040}
20041
20042// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
20044 SDValue N0 = N->getOperand(0);
20045 EVT VT = N->getValueType(0);
20046
20047 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
20048 return SDValue();
20049
20050 SDValue Src = N0.getOperand(0);
20051 EVT SrcVT = Src.getValueType();
20052 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
20053 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
20054
20055 // We can safely assume the conversion won't overflow the output range,
20056 // because (for example) (uint8_t)18293.f is undefined behavior.
20057
20058 // Since we can assume the conversion won't overflow, our decision as to
20059 // whether the input will fit in the float should depend on the minimum
20060 // of the input range and output range.
20061
20062 // This means this is also safe for a signed input and unsigned output, since
20063 // a negative input would lead to undefined behavior.
20064 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
20065 unsigned OutputSize = (int)VT.getScalarSizeInBits();
20066 unsigned ActualSize = std::min(InputSize, OutputSize);
20067 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
20068
20069 // We can only fold away the float conversion if the input range can be
20070 // represented exactly in the float range.
20071 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
20072 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
20073 unsigned ExtOp =
20074 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
20075 return DAG.getNode(ExtOp, DL, VT, Src);
20076 }
20077 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
20078 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
20079 return DAG.getBitcast(VT, Src);
20080 }
20081 return SDValue();
20082}
20083
20084SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
20085 SDValue N0 = N->getOperand(0);
20086 EVT VT = N->getValueType(0);
20087 SDLoc DL(N);
20088
20089 // fold (fp_to_sint undef) -> undef
20090 if (N0.isUndef())
20091 return DAG.getUNDEF(VT);
20092
20093 // fold (fp_to_sint c1fp) -> c1
20094 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
20095 return C;
20096
20097 return FoldIntToFPToInt(N, DL, DAG);
20098}
20099
20100SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
20101 SDValue N0 = N->getOperand(0);
20102 EVT VT = N->getValueType(0);
20103 SDLoc DL(N);
20104
20105 // fold (fp_to_uint undef) -> undef
20106 if (N0.isUndef())
20107 return DAG.getUNDEF(VT);
20108
20109 // fold (fp_to_uint c1fp) -> c1
20110 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
20111 return C;
20112
20113 return FoldIntToFPToInt(N, DL, DAG);
20114}
20115
20116SDValue DAGCombiner::visitXROUND(SDNode *N) {
20117 SDValue N0 = N->getOperand(0);
20118 EVT VT = N->getValueType(0);
20119
20120 // fold (lrint|llrint undef) -> undef
20121 // fold (lround|llround undef) -> undef
20122 if (N0.isUndef())
20123 return DAG.getUNDEF(VT);
20124
20125 // fold (lrint|llrint c1fp) -> c1
20126 // fold (lround|llround c1fp) -> c1
20127 if (SDValue C =
20128 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
20129 return C;
20130
20131 return SDValue();
20132}
20133
20134SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
20135 SDValue N0 = N->getOperand(0);
20136 SDValue N1 = N->getOperand(1);
20137 EVT VT = N->getValueType(0);
20138 SDLoc DL(N);
20139
20140 // fold (fp_round c1fp) -> c1fp
20141 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
20142 return C;
20143
20144 // fold (fp_round (fp_extend x)) -> x
20145 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
20146 return N0.getOperand(0);
20147
20148 // fold (fp_round (fp_round x)) -> (fp_round x)
20149 if (N0.getOpcode() == ISD::FP_ROUND) {
20150 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
20151 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
20152
20153 // Avoid folding legal fp_rounds into non-legal ones.
20154 if (!hasOperation(ISD::FP_ROUND, VT))
20155 return SDValue();
20156
20157 // Skip this folding if it results in an fp_round from f80 to f16.
20158 //
20159 // f80 to f16 always generates an expensive (and as yet, unimplemented)
20160 // libcall to __truncxfhf2 instead of selecting native f16 conversion
20161 // instructions from f32 or f64. Moreover, the first (value-preserving)
20162 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
20163 // x86.
20164 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
20165 return SDValue();
20166
20167 // If the first fp_round isn't a value preserving truncation, it might
20168 // introduce a tie in the second fp_round, that wouldn't occur in the
20169 // single-step fp_round we want to fold to.
20170 // In other words, double rounding isn't the same as rounding.
20171 // Also, this is a value preserving truncation iff both fp_round's are.
20172 if ((N->getFlags().hasAllowContract() &&
20173 N0->getFlags().hasAllowContract()) ||
20174 N0IsTrunc)
20175 return DAG.getNode(
20176 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
20177 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
20178 }
20179
20180 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
20181 // Note: From a legality perspective, this is a two step transform. First,
20182 // we duplicate the fp_round to the arguments of the copysign, then we
20183 // eliminate the fp_round on Y. The second step requires an additional
20184 // predicate to match the implementation above.
20185 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
20187 N0.getValueType())) {
20188 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
20189 N0.getOperand(0), N1);
20190 AddToWorklist(Tmp.getNode());
20191 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
20192 }
20193
20194 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
20195 return NewVSel;
20196
20197 return SDValue();
20198}
20199
20200// Eliminate a floating-point widening of a narrowed value if the fast math
20201// flags allow it.
20203 SDValue N0 = N->getOperand(0);
20204 EVT VT = N->getValueType(0);
20205
20206 unsigned NarrowingOp;
20207 switch (N->getOpcode()) {
20208 case ISD::FP16_TO_FP:
20209 NarrowingOp = ISD::FP_TO_FP16;
20210 break;
20211 case ISD::BF16_TO_FP:
20212 NarrowingOp = ISD::FP_TO_BF16;
20213 break;
20214 case ISD::FP_EXTEND:
20215 NarrowingOp = ISD::FP_ROUND;
20216 break;
20217 default:
20218 llvm_unreachable("Expected widening FP cast");
20219 }
20220
20221 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
20222 const SDNodeFlags NarrowFlags = N0->getFlags();
20223 const SDNodeFlags WidenFlags = N->getFlags();
20224 // Narrowing can introduce inf and change the encoding of a nan, so the
20225 // widen must have the nnan and ninf flags to indicate that we don't need to
20226 // care about that. We are also removing a rounding step, and that requires
20227 // both the narrow and widen to allow contraction.
20228 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
20229 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
20230 return N0.getOperand(0);
20231 }
20232 }
20233
20234 return SDValue();
20235}
20236
20237SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
20238 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
20239 SDValue N0 = N->getOperand(0);
20240 EVT VT = N->getValueType(0);
20241 SDLoc DL(N);
20242
20243 if (VT.isVector())
20244 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
20245 return FoldedVOp;
20246
20247 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
20248 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
20249 return SDValue();
20250
20251 // fold (fp_extend c1fp) -> c1fp
20252 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
20253 return C;
20254
20255 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
20256 if (N0.getOpcode() == ISD::FP16_TO_FP &&
20258 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
20259
20260 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
20261 // value of X.
20262 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
20263 SDValue In = N0.getOperand(0);
20264 if (In.getValueType() == VT) return In;
20265 if (VT.bitsLT(In.getValueType()))
20266 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
20267 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
20268 }
20269
20270 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
20271 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
20272 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
20273 if (TLI.isLoadLegalOrCustom(VT, N0.getValueType(), LN0->getAlign(),
20274 LN0->getAddressSpace(), ISD::EXTLOAD, false)) {
20275 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
20276 LN0->getBasePtr(), N0.getValueType(),
20277 LN0->getMemOperand());
20278 CombineTo(N, ExtLoad);
20279 CombineTo(
20280 N0.getNode(),
20281 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
20282 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
20283 ExtLoad.getValue(1));
20284 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20285 }
20286 }
20287
20288 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
20289 return NewVSel;
20290
20291 if (SDValue CastEliminated = eliminateFPCastPair(N))
20292 return CastEliminated;
20293
20294 return SDValue();
20295}
20296
20297SDValue DAGCombiner::visitFCEIL(SDNode *N) {
20298 SDValue N0 = N->getOperand(0);
20299 EVT VT = N->getValueType(0);
20300
20301 // fold (fceil c1) -> fceil(c1)
20302 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
20303 return C;
20304
20305 return SDValue();
20306}
20307
20308SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
20309 SDValue N0 = N->getOperand(0);
20310 EVT VT = N->getValueType(0);
20311
20312 // fold (ftrunc c1) -> ftrunc(c1)
20313 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
20314 return C;
20315
20316 // fold ftrunc (known rounded int x) -> x
20317 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
20318 // likely to be generated to extract integer from a rounded floating value.
20319 switch (N0.getOpcode()) {
20320 default: break;
20321 case ISD::FRINT:
20322 case ISD::FTRUNC:
20323 case ISD::FNEARBYINT:
20324 case ISD::FROUND:
20325 case ISD::FROUNDEVEN:
20326 case ISD::FFLOOR:
20327 case ISD::FCEIL:
20328 return N0;
20329 }
20330
20331 return SDValue();
20332}
20333
20334SDValue DAGCombiner::visitFFREXP(SDNode *N) {
20335 SDValue N0 = N->getOperand(0);
20336
20337 // fold (ffrexp c1) -> ffrexp(c1)
20339 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
20340 return SDValue();
20341}
20342
20343SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
20344 SDValue N0 = N->getOperand(0);
20345 EVT VT = N->getValueType(0);
20346
20347 // fold (ffloor c1) -> ffloor(c1)
20348 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
20349 return C;
20350
20351 return SDValue();
20352}
20353
20354SDValue DAGCombiner::visitFNEG(SDNode *N) {
20355 SDValue N0 = N->getOperand(0);
20356 EVT VT = N->getValueType(0);
20357 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
20358
20359 // Constant fold FNEG.
20360 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
20361 return C;
20362
20363 if (SDValue NegN0 =
20364 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
20365 return NegN0;
20366
20367 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
20368 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
20369 // know it was called from a context with a nsz flag if the input fsub does
20370 // not.
20371 if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
20372 N0.hasOneUse()) {
20373 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
20374 N0.getOperand(0));
20375 }
20376
20378 return SDValue(N, 0);
20379
20380 if (SDValue Cast = foldSignChangeInBitcast(N))
20381 return Cast;
20382
20383 return SDValue();
20384}
20385
20386SDValue DAGCombiner::visitFMinMax(SDNode *N) {
20387 SDValue N0 = N->getOperand(0);
20388 SDValue N1 = N->getOperand(1);
20389 EVT VT = N->getValueType(0);
20390 const SDNodeFlags Flags = N->getFlags();
20391 unsigned Opc = N->getOpcode();
20392 bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
20393 bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM;
20394 bool IsMin =
20396 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
20397
20398 // Constant fold.
20399 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
20400 return C;
20401
20402 // Canonicalize to constant on RHS.
20405 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
20406
20407 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
20408 const APFloat &AF = N1CFP->getValueAPF();
20409
20410 // minnum(X, qnan) -> X
20411 // maxnum(X, qnan) -> X
20412 // minnum(X, snan) -> qnan
20413 // maxnum(X, snan) -> qnan
20414 // minimum(X, nan) -> qnan
20415 // maximum(X, nan) -> qnan
20416 // minimumnum(X, nan) -> X
20417 // maximumnum(X, nan) -> X
20418 if (AF.isNaN()) {
20419 if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) {
20420 if (AF.isSignaling())
20421 return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
20422 return N->getOperand(1);
20423 }
20424 return N->getOperand(0);
20425 }
20426
20427 // In the following folds, inf can be replaced with the largest finite
20428 // float, if the ninf flag is set.
20429 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
20430 // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
20431 // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
20432 // minimum(X, -inf) -> -inf if nnan
20433 // maximum(X, +inf) -> +inf if nnan
20434 // minimumnum(X, -inf) -> -inf
20435 // maximumnum(X, +inf) -> +inf
20436 if (IsMin == AF.isNegative() &&
20437 (!PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
20438 return N->getOperand(1);
20439
20440 // minnum(X, +inf) -> X if nnan
20441 // maxnum(X, -inf) -> X if nnan
20442 // minimum(X, +inf) -> X (ignoring quieting of sNaNs)
20443 // maximum(X, -inf) -> X (ignoring quieting of sNaNs)
20444 // minimumnum(X, +inf) -> X if nnan
20445 // maximumnum(X, -inf) -> X if nnan
20446 if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
20447 return N->getOperand(0);
20448 }
20449 }
20450
20451 // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
20453 return SDValue();
20454
20455 if (SDValue SD = reassociateReduction(
20456 PropAllNaNsToQNaNs
20459 Opc, SDLoc(N), VT, N0, N1, Flags))
20460 return SD;
20461
20462 return SDValue();
20463}
20464
20465SDValue DAGCombiner::visitFABS(SDNode *N) {
20466 SDValue N0 = N->getOperand(0);
20467 EVT VT = N->getValueType(0);
20468 SDLoc DL(N);
20469
20470 // fold (fabs c1) -> fabs(c1)
20471 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
20472 return C;
20473
20475 return SDValue(N, 0);
20476
20477 if (SDValue Cast = foldSignChangeInBitcast(N))
20478 return Cast;
20479
20480 return SDValue();
20481}
20482
20483SDValue DAGCombiner::visitBRCOND(SDNode *N) {
20484 SDValue Chain = N->getOperand(0);
20485 SDValue N1 = N->getOperand(1);
20486 SDValue N2 = N->getOperand(2);
20487
20488 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
20489 // nondeterministic jumps).
20490 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
20491 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
20492 N1->getOperand(0), N2, N->getFlags());
20493 }
20494
20495 // Variant of the previous fold where there is a SETCC in between:
20496 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
20497 // =>
20498 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
20499 // =>
20500 // BRCOND(SETCC(X, CONST, Cond))
20501 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
20502 // isn't equivalent to true or false.
20503 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
20504 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
20505 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
20506 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
20508 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
20509 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
20510 bool Updated = false;
20511
20512 // Is 'X Cond C' always true or false?
20513 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
20514 bool False = (Cond == ISD::SETULT && C->isZero()) ||
20515 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
20516 (Cond == ISD::SETUGT && C->isAllOnes()) ||
20517 (Cond == ISD::SETGT && C->isMaxSignedValue());
20518 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
20519 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
20520 (Cond == ISD::SETUGE && C->isZero()) ||
20521 (Cond == ISD::SETGE && C->isMinSignedValue());
20522 return True || False;
20523 };
20524
20525 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
20526 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
20527 S0 = S0->getOperand(0);
20528 Updated = true;
20529 }
20530 }
20531 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
20532 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
20533 S1 = S1->getOperand(0);
20534 Updated = true;
20535 }
20536 }
20537
20538 if (Updated)
20539 return DAG.getNode(
20540 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
20541 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
20542 N->getFlags());
20543 }
20544
20545 // If N is a constant we could fold this into a fallthrough or unconditional
20546 // branch. However that doesn't happen very often in normal code, because
20547 // Instcombine/SimplifyCFG should have handled the available opportunities.
20548 // If we did this folding here, it would be necessary to update the
20549 // MachineBasicBlock CFG, which is awkward.
20550
20551 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
20552 // on the target, also copy fast math flags.
20553 if (N1.getOpcode() == ISD::SETCC &&
20555 N1.getOperand(0).getValueType())) {
20556 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
20557 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
20558 N1->getFlags());
20559 }
20560
20561 if (N1.hasOneUse()) {
20562 // rebuildSetCC calls visitXor which may change the Chain when there is a
20563 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
20564 HandleSDNode ChainHandle(Chain);
20565 if (SDValue NewN1 = rebuildSetCC(N1))
20566 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
20567 ChainHandle.getValue(), NewN1, N2, N->getFlags());
20568 }
20569
20570 return SDValue();
20571}
20572
20573SDValue DAGCombiner::rebuildSetCC(SDValue N) {
20574 if (N.getOpcode() == ISD::SRL ||
20575 (N.getOpcode() == ISD::TRUNCATE &&
20576 (N.getOperand(0).hasOneUse() &&
20577 N.getOperand(0).getOpcode() == ISD::SRL))) {
20578 // Look pass the truncate.
20579 if (N.getOpcode() == ISD::TRUNCATE)
20580 N = N.getOperand(0);
20581
20582 // Match this pattern so that we can generate simpler code:
20583 //
20584 // %a = ...
20585 // %b = and i32 %a, 2
20586 // %c = srl i32 %b, 1
20587 // brcond i32 %c ...
20588 //
20589 // into
20590 //
20591 // %a = ...
20592 // %b = and i32 %a, 2
20593 // %c = setcc eq %b, 0
20594 // brcond %c ...
20595 //
20596 // This applies only when the AND constant value has one bit set and the
20597 // SRL constant is equal to the log2 of the AND constant. The back-end is
20598 // smart enough to convert the result into a TEST/JMP sequence.
20599 SDValue Op0 = N.getOperand(0);
20600 SDValue Op1 = N.getOperand(1);
20601
20602 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
20603 SDValue AndOp1 = Op0.getOperand(1);
20604
20605 if (AndOp1.getOpcode() == ISD::Constant) {
20606 const APInt &AndConst = AndOp1->getAsAPIntVal();
20607
20608 if (AndConst.isPowerOf2() &&
20609 Op1->getAsAPIntVal() == AndConst.logBase2()) {
20610 SDLoc DL(N);
20611 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
20612 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
20613 ISD::SETNE);
20614 }
20615 }
20616 }
20617 }
20618
20619 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
20620 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
20621 if (N.getOpcode() == ISD::XOR) {
20622 // Because we may call this on a speculatively constructed
20623 // SimplifiedSetCC Node, we need to simplify this node first.
20624 // Ideally this should be folded into SimplifySetCC and not
20625 // here. For now, grab a handle to N so we don't lose it from
20626 // replacements interal to the visit.
20627 while (N.getOpcode() == ISD::XOR) {
20628 HandleSDNode XORHandle(N);
20629 SDValue Tmp = visitXOR(N.getNode());
20630 // No simplification done.
20631 if (!Tmp.getNode())
20632 break;
20633 // Returning N is form in-visit replacement that may invalidated
20634 // N. Grab value from Handle.
20635 if (Tmp.getNode() == N.getNode())
20636 N = XORHandle.getValue();
20637 else // Node simplified. Try simplifying again.
20638 N = Tmp;
20639 }
20640
20641 if (N.getOpcode() != ISD::XOR)
20642 return N;
20643
20644 SDValue Op0 = N->getOperand(0);
20645 SDValue Op1 = N->getOperand(1);
20646
20647 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
20648 bool Equal = false;
20649 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
20650 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
20651 Op0.getValueType() == MVT::i1) {
20652 N = Op0;
20653 Op0 = N->getOperand(0);
20654 Op1 = N->getOperand(1);
20655 Equal = true;
20656 }
20657
20658 EVT SetCCVT = N.getValueType();
20659 if (LegalTypes)
20660 SetCCVT = getSetCCResultType(SetCCVT);
20661 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
20662 // it would introduce illegal operations post-legalization as this can
20663 // result in infinite looping between converting xor->setcc here, and
20664 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
20666 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
20667 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
20668 }
20669 }
20670
20671 return SDValue();
20672}
20673
20674// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
20675//
20676SDValue DAGCombiner::visitBR_CC(SDNode *N) {
20677 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
20678 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
20679
20680 // If N is a constant we could fold this into a fallthrough or unconditional
20681 // branch. However that doesn't happen very often in normal code, because
20682 // Instcombine/SimplifyCFG should have handled the available opportunities.
20683 // If we did this folding here, it would be necessary to update the
20684 // MachineBasicBlock CFG, which is awkward.
20685
20686 // Use SimplifySetCC to simplify SETCC's.
20688 CondLHS, CondRHS, CC->get(), SDLoc(N),
20689 false);
20690 if (Simp.getNode()) AddToWorklist(Simp.getNode());
20691
20692 // fold to a simpler setcc
20693 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
20694 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
20695 N->getOperand(0), Simp.getOperand(2),
20696 Simp.getOperand(0), Simp.getOperand(1),
20697 N->getOperand(4));
20698
20699 return SDValue();
20700}
20701
20702static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
20703 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
20704 const TargetLowering &TLI) {
20705 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20706 if (LD->isIndexed())
20707 return false;
20708 EVT VT = LD->getMemoryVT();
20709 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
20710 return false;
20711 Ptr = LD->getBasePtr();
20712 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20713 if (ST->isIndexed())
20714 return false;
20715 EVT VT = ST->getMemoryVT();
20716 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
20717 return false;
20718 Ptr = ST->getBasePtr();
20719 IsLoad = false;
20720 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
20721 if (LD->isIndexed())
20722 return false;
20723 EVT VT = LD->getMemoryVT();
20724 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
20725 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
20726 return false;
20727 Ptr = LD->getBasePtr();
20728 IsMasked = true;
20730 if (ST->isIndexed())
20731 return false;
20732 EVT VT = ST->getMemoryVT();
20733 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
20734 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
20735 return false;
20736 Ptr = ST->getBasePtr();
20737 IsLoad = false;
20738 IsMasked = true;
20739 } else {
20740 return false;
20741 }
20742 return true;
20743}
20744
20745/// Try turning a load/store into a pre-indexed load/store when the base
20746/// pointer is an add or subtract and it has other uses besides the load/store.
20747/// After the transformation, the new indexed load/store has effectively folded
20748/// the add/subtract in and all of its other uses are redirected to the
20749/// new load/store.
20750bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
20751 if (Level < AfterLegalizeDAG)
20752 return false;
20753
20754 bool IsLoad = true;
20755 bool IsMasked = false;
20756 SDValue Ptr;
20757 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
20758 Ptr, TLI))
20759 return false;
20760
20761 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
20762 // out. There is no reason to make this a preinc/predec.
20763 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
20764 Ptr->hasOneUse())
20765 return false;
20766
20767 // Ask the target to do addressing mode selection.
20771 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
20772 return false;
20773
20774 // Backends without true r+i pre-indexed forms may need to pass a
20775 // constant base with a variable offset so that constant coercion
20776 // will work with the patterns in canonical form.
20777 bool Swapped = false;
20778 if (isa<ConstantSDNode>(BasePtr)) {
20779 std::swap(BasePtr, Offset);
20780 Swapped = true;
20781 }
20782
20783 // Don't create a indexed load / store with zero offset.
20785 return false;
20786
20787 // Try turning it into a pre-indexed load / store except when:
20788 // 1) The new base ptr is a frame index.
20789 // 2) If N is a store and the new base ptr is either the same as or is a
20790 // predecessor of the value being stored.
20791 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
20792 // that would create a cycle.
20793 // 4) All uses are load / store ops that use it as old base ptr.
20794
20795 // Check #1. Preinc'ing a frame index would require copying the stack pointer
20796 // (plus the implicit offset) to a register to preinc anyway.
20797 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
20798 return false;
20799
20800 // Check #2.
20801 if (!IsLoad) {
20802 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
20803 : cast<StoreSDNode>(N)->getValue();
20804
20805 // Would require a copy.
20806 if (Val == BasePtr)
20807 return false;
20808
20809 // Would create a cycle.
20810 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
20811 return false;
20812 }
20813
20814 // Caches for hasPredecessorHelper.
20815 SmallPtrSet<const SDNode *, 32> Visited;
20817 Worklist.push_back(N);
20818
20819 // If the offset is a constant, there may be other adds of constants that
20820 // can be folded with this one. We should do this to avoid having to keep
20821 // a copy of the original base pointer.
20822 SmallVector<SDNode *, 16> OtherUses;
20825 for (SDUse &Use : BasePtr->uses()) {
20826 // Skip the use that is Ptr and uses of other results from BasePtr's
20827 // node (important for nodes that return multiple results).
20828 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
20829 continue;
20830
20831 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
20832 MaxSteps))
20833 continue;
20834
20835 if (Use.getUser()->getOpcode() != ISD::ADD &&
20836 Use.getUser()->getOpcode() != ISD::SUB) {
20837 OtherUses.clear();
20838 break;
20839 }
20840
20841 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
20842 if (!isa<ConstantSDNode>(Op1)) {
20843 OtherUses.clear();
20844 break;
20845 }
20846
20847 // FIXME: In some cases, we can be smarter about this.
20848 if (Op1.getValueType() != Offset.getValueType()) {
20849 OtherUses.clear();
20850 break;
20851 }
20852
20853 OtherUses.push_back(Use.getUser());
20854 }
20855
20856 if (Swapped)
20857 std::swap(BasePtr, Offset);
20858
20859 // Now check for #3 and #4.
20860 bool RealUse = false;
20861
20862 for (SDNode *User : Ptr->users()) {
20863 if (User == N)
20864 continue;
20865 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
20866 return false;
20867
20868 // If Ptr may be folded in addressing mode of other use, then it's
20869 // not profitable to do this transformation.
20870 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
20871 RealUse = true;
20872 }
20873
20874 if (!RealUse)
20875 return false;
20876
20878 if (!IsMasked) {
20879 if (IsLoad)
20880 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
20881 else
20882 Result =
20883 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
20884 } else {
20885 if (IsLoad)
20886 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
20887 Offset, AM);
20888 else
20889 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
20890 Offset, AM);
20891 }
20892 ++PreIndexedNodes;
20893 ++NodesCombined;
20894 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
20895 Result.dump(&DAG); dbgs() << '\n');
20896 WorklistRemover DeadNodes(*this);
20897 if (IsLoad) {
20898 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
20899 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
20900 } else {
20901 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
20902 }
20903
20904 // Finally, since the node is now dead, remove it from the graph.
20905 deleteAndRecombine(N);
20906
20907 if (Swapped)
20908 std::swap(BasePtr, Offset);
20909
20910 // Replace other uses of BasePtr that can be updated to use Ptr
20911 for (SDNode *OtherUse : OtherUses) {
20912 unsigned OffsetIdx = 1;
20913 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
20914 OffsetIdx = 0;
20915 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
20916 "Expected BasePtr operand");
20917
20918 // We need to replace ptr0 in the following expression:
20919 // x0 * offset0 + y0 * ptr0 = t0
20920 // knowing that
20921 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
20922 //
20923 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
20924 // indexed load/store and the expression that needs to be re-written.
20925 //
20926 // Therefore, we have:
20927 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
20928
20929 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
20930 const APInt &Offset0 = CN->getAPIntValue();
20931 const APInt &Offset1 = Offset->getAsAPIntVal();
20932 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
20933 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
20934 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
20935 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
20936
20937 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
20938
20939 APInt CNV = Offset0;
20940 if (X0 < 0) CNV = -CNV;
20941 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
20942 else CNV = CNV - Offset1;
20943
20944 SDLoc DL(OtherUse);
20945
20946 // We can now generate the new expression.
20947 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
20948 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
20949
20950 SDValue NewUse =
20951 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
20952 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
20953 deleteAndRecombine(OtherUse);
20954 }
20955
20956 // Replace the uses of Ptr with uses of the updated base value.
20957 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
20958 deleteAndRecombine(Ptr.getNode());
20959 AddToWorklist(Result.getNode());
20960
20961 return true;
20962}
20963
20964static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
20965 SDValue &BasePtr, SDValue &Offset,
20967 SelectionDAG &DAG,
20968 const TargetLowering &TLI) {
20969 if (PtrUse == N ||
20970 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
20971 return false;
20972
20973 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
20974 return false;
20975
20976 // Don't create a indexed load / store with zero offset.
20978 return false;
20979
20980 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
20981 return false;
20982
20985 for (SDNode *User : BasePtr->users()) {
20986 if (User == Ptr.getNode())
20987 continue;
20988
20989 // No if there's a later user which could perform the index instead.
20990 if (isa<MemSDNode>(User)) {
20991 bool IsLoad = true;
20992 bool IsMasked = false;
20993 SDValue OtherPtr;
20995 IsMasked, OtherPtr, TLI)) {
20997 Worklist.push_back(User);
20998 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
20999 return false;
21000 }
21001 }
21002
21003 // If all the uses are load / store addresses, then don't do the
21004 // transformation.
21005 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
21006 for (SDNode *UserUser : User->users())
21007 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
21008 return false;
21009 }
21010 }
21011 return true;
21012}
21013
21015 bool &IsMasked, SDValue &Ptr,
21016 SDValue &BasePtr, SDValue &Offset,
21018 SelectionDAG &DAG,
21019 const TargetLowering &TLI) {
21021 IsMasked, Ptr, TLI) ||
21022 Ptr->hasOneUse())
21023 return nullptr;
21024
21025 // Try turning it into a post-indexed load / store except when
21026 // 1) All uses are load / store ops that use it as base ptr (and
21027 // it may be folded as addressing mmode).
21028 // 2) Op must be independent of N, i.e. Op is neither a predecessor
21029 // nor a successor of N. Otherwise, if Op is folded that would
21030 // create a cycle.
21032 for (SDUse &U : Ptr->uses()) {
21033 if (U.getResNo() != Ptr.getResNo())
21034 continue;
21035
21036 // Check for #1.
21037 SDNode *Op = U.getUser();
21038 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
21039 continue;
21040
21041 // Check for #2.
21044 // Ptr is predecessor to both N and Op.
21045 Visited.insert(Ptr.getNode());
21046 Worklist.push_back(N);
21047 Worklist.push_back(Op);
21048 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
21049 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
21050 return Op;
21051 }
21052 return nullptr;
21053}
21054
21055/// Try to combine a load/store with a add/sub of the base pointer node into a
21056/// post-indexed load/store. The transformation folded the add/subtract into the
21057/// new indexed load/store effectively and all of its uses are redirected to the
21058/// new load/store.
21059bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
21060 if (Level < AfterLegalizeDAG)
21061 return false;
21062
21063 bool IsLoad = true;
21064 bool IsMasked = false;
21065 SDValue Ptr;
21069 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
21070 Offset, AM, DAG, TLI);
21071 if (!Op)
21072 return false;
21073
21075 if (!IsMasked)
21076 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
21077 Offset, AM)
21078 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
21079 BasePtr, Offset, AM);
21080 else
21081 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
21082 BasePtr, Offset, AM)
21083 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
21084 BasePtr, Offset, AM);
21085 ++PostIndexedNodes;
21086 ++NodesCombined;
21087 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
21088 Result.dump(&DAG); dbgs() << '\n');
21089 WorklistRemover DeadNodes(*this);
21090 if (IsLoad) {
21091 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
21092 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
21093 } else {
21094 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
21095 }
21096
21097 // Finally, since the node is now dead, remove it from the graph.
21098 deleteAndRecombine(N);
21099
21100 // Replace the uses of Use with uses of the updated base value.
21102 Result.getValue(IsLoad ? 1 : 0));
21103 deleteAndRecombine(Op);
21104 return true;
21105}
21106
21107/// Return the base-pointer arithmetic from an indexed \p LD.
21108SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
21109 ISD::MemIndexedMode AM = LD->getAddressingMode();
21110 assert(AM != ISD::UNINDEXED);
21111 SDValue BP = LD->getOperand(1);
21112 SDValue Inc = LD->getOperand(2);
21113
21114 // Some backends use TargetConstants for load offsets, but don't expect
21115 // TargetConstants in general ADD nodes. We can convert these constants into
21116 // regular Constants (if the constant is not opaque).
21118 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
21119 "Cannot split out indexing using opaque target constants");
21120 if (Inc.getOpcode() == ISD::TargetConstant) {
21121 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
21122 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
21123 ConstInc->getValueType(0));
21124 }
21125
21126 unsigned Opc =
21127 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
21128 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
21129}
21130
21132 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
21133}
21134
21135bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
21136 EVT STType = Val.getValueType();
21137 EVT STMemType = ST->getMemoryVT();
21138 if (STType == STMemType)
21139 return true;
21140 if (isTypeLegal(STMemType))
21141 return false; // fail.
21142 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
21143 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
21144 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
21145 return true;
21146 }
21147 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
21148 STType.isInteger() && STMemType.isInteger()) {
21149 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
21150 return true;
21151 }
21152 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
21153 Val = DAG.getBitcast(STMemType, Val);
21154 return true;
21155 }
21156 return false; // fail.
21157}
21158
21159bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
21160 EVT LDMemType = LD->getMemoryVT();
21161 EVT LDType = LD->getValueType(0);
21162 assert(Val.getValueType() == LDMemType &&
21163 "Attempting to extend value of non-matching type");
21164 if (LDType == LDMemType)
21165 return true;
21166 if (LDMemType.isInteger() && LDType.isInteger()) {
21167 switch (LD->getExtensionType()) {
21168 case ISD::NON_EXTLOAD:
21169 Val = DAG.getBitcast(LDType, Val);
21170 return true;
21171 case ISD::EXTLOAD:
21172 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
21173 return true;
21174 case ISD::SEXTLOAD:
21175 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
21176 return true;
21177 case ISD::ZEXTLOAD:
21178 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
21179 return true;
21180 }
21181 }
21182 return false;
21183}
21184
21185StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
21186 int64_t &Offset) {
21187 SDValue Chain = LD->getOperand(0);
21188
21189 // Look through CALLSEQ_START.
21190 if (Chain.getOpcode() == ISD::CALLSEQ_START)
21191 Chain = Chain->getOperand(0);
21192
21193 StoreSDNode *ST = nullptr;
21195 if (Chain.getOpcode() == ISD::TokenFactor) {
21196 // Look for unique store within the TokenFactor.
21197 for (SDValue Op : Chain->ops()) {
21198 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
21199 if (!Store)
21200 continue;
21201 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
21202 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
21203 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
21204 continue;
21205 // Make sure the store is not aliased with any nodes in TokenFactor.
21206 GatherAllAliases(Store, Chain, Aliases);
21207 if (Aliases.empty() ||
21208 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
21209 ST = Store;
21210 break;
21211 }
21212 } else {
21213 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
21214 if (Store) {
21215 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
21216 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
21217 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
21218 ST = Store;
21219 }
21220 }
21221
21222 return ST;
21223}
21224
21225SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
21226 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
21227 return SDValue();
21228 SDValue Chain = LD->getOperand(0);
21229 int64_t Offset;
21230
21231 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
21232 // TODO: Relax this restriction for unordered atomics (see D66309)
21233 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
21234 return SDValue();
21235
21236 EVT LDType = LD->getValueType(0);
21237 EVT LDMemType = LD->getMemoryVT();
21238 EVT STMemType = ST->getMemoryVT();
21239 EVT STType = ST->getValue().getValueType();
21240
21241 // There are two cases to consider here:
21242 // 1. The store is fixed width and the load is scalable. In this case we
21243 // don't know at compile time if the store completely envelops the load
21244 // so we abandon the optimisation.
21245 // 2. The store is scalable and the load is fixed width. We could
21246 // potentially support a limited number of cases here, but there has been
21247 // no cost-benefit analysis to prove it's worth it.
21248 bool LdStScalable = LDMemType.isScalableVT();
21249 if (LdStScalable != STMemType.isScalableVT())
21250 return SDValue();
21251
21252 // If we are dealing with scalable vectors on a big endian platform the
21253 // calculation of offsets below becomes trickier, since we do not know at
21254 // compile time the absolute size of the vector. Until we've done more
21255 // analysis on big-endian platforms it seems better to bail out for now.
21256 if (LdStScalable && DAG.getDataLayout().isBigEndian())
21257 return SDValue();
21258
21259 // Normalize for Endianness. After this Offset=0 will denote that the least
21260 // significant bit in the loaded value maps to the least significant bit in
21261 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
21262 // n:th least significant byte of the stored value.
21263 int64_t OrigOffset = Offset;
21264 if (DAG.getDataLayout().isBigEndian())
21265 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
21266 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
21267 8 -
21268 Offset;
21269
21270 // Check that the stored value cover all bits that are loaded.
21271 bool STCoversLD;
21272
21273 TypeSize LdMemSize = LDMemType.getSizeInBits();
21274 TypeSize StMemSize = STMemType.getSizeInBits();
21275 if (LdStScalable)
21276 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
21277 else
21278 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
21279 StMemSize.getFixedValue());
21280
21281 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
21282 if (LD->isIndexed()) {
21283 // Cannot handle opaque target constants and we must respect the user's
21284 // request not to split indexes from loads.
21285 if (!canSplitIdx(LD))
21286 return SDValue();
21287 SDValue Idx = SplitIndexingFromLoad(LD);
21288 SDValue Ops[] = {Val, Idx, Chain};
21289 return CombineTo(LD, Ops, 3);
21290 }
21291 return CombineTo(LD, Val, Chain);
21292 };
21293
21294 if (!STCoversLD)
21295 return SDValue();
21296
21297 // Memory as copy space (potentially masked).
21298 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
21299 // Simple case: Direct non-truncating forwarding
21300 if (LDType.getSizeInBits() == LdMemSize)
21301 return ReplaceLd(LD, ST->getValue(), Chain);
21302 // Can we model the truncate and extension with an and mask?
21303 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
21304 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
21305 // Mask to size of LDMemType
21306 auto Mask =
21308 StMemSize.getFixedValue()),
21309 SDLoc(ST), STType);
21310 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
21311 return ReplaceLd(LD, Val, Chain);
21312 }
21313 }
21314
21315 // Handle some cases for big-endian that would be Offset 0 and handled for
21316 // little-endian.
21317 SDValue Val = ST->getValue();
21318 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
21319 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
21320 !LDType.isVector() && isTypeLegal(STType) &&
21321 TLI.isOperationLegal(ISD::SRL, STType)) {
21322 Val = DAG.getNode(
21323 ISD::SRL, SDLoc(LD), STType, Val,
21324 DAG.getShiftAmountConstant(Offset * 8, STType, SDLoc(LD)));
21325 Offset = 0;
21326 }
21327 }
21328
21329 // TODO: Deal with nonzero offset.
21330 if (LD->getBasePtr().isUndef() || Offset != 0)
21331 return SDValue();
21332 // Model necessary truncations / extenstions.
21333 // Truncate Value To Stored Memory Size.
21334 do {
21335 if (!getTruncatedStoreValue(ST, Val))
21336 break;
21337 if (!isTypeLegal(LDMemType))
21338 break;
21339 if (STMemType != LDMemType) {
21340 if (LdMemSize == StMemSize) {
21341 if (TLI.isOperationLegal(ISD::BITCAST, LDMemType) &&
21342 isTypeLegal(LDMemType) &&
21343 TLI.isOperationLegal(ISD::BITCAST, STMemType) &&
21344 isTypeLegal(STMemType) &&
21345 TLI.isLoadBitCastBeneficial(LDMemType, STMemType, DAG,
21346 *LD->getMemOperand()))
21347 Val = DAG.getBitcast(LDMemType, Val);
21348 else
21349 break;
21350 } else if (LDMemType.isVector() && isTypeLegal(STMemType)) {
21351 EVT EltVT = LDMemType.getVectorElementType();
21352 TypeSize EltSize = EltVT.getSizeInBits();
21353
21354 if (!StMemSize.isKnownMultipleOf(EltSize))
21355 break;
21356
21357 EVT InterVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
21358 StMemSize.divideCoefficientBy(EltSize));
21359 if (!TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, LDMemType) ||
21360 !TLI.isTypeLegal(InterVT))
21361 break;
21362
21363 // In case of big-endian the offset is normalized to zero, denoting
21364 // the last bit. For big-endian we need to transform the extraction
21365 // to the last sub-vector.
21366 unsigned ExtIdx = 0;
21367 if (DAG.getDataLayout().isBigEndian()) {
21368 ExtIdx =
21369 InterVT.getVectorNumElements() - LDMemType.getVectorNumElements();
21370 }
21371
21372 if (!TLI.isExtractSubvectorCheap(LDMemType, InterVT, ExtIdx))
21373 break;
21374 Val = DAG.getExtractSubvector(SDLoc(LD), LDMemType,
21375 DAG.getBitcast(InterVT, Val), ExtIdx);
21376 } else if (!STMemType.isVector() && !LDMemType.isVector() &&
21377 STMemType.isInteger() && LDMemType.isInteger())
21378 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
21379 else
21380 break;
21381 }
21382 if (!extendLoadedValueToExtension(LD, Val))
21383 break;
21384 return ReplaceLd(LD, Val, Chain);
21385 } while (false);
21386
21387 // On failure, cleanup dead nodes we may have created.
21388 if (Val->use_empty())
21389 deleteAndRecombine(Val.getNode());
21390 return SDValue();
21391}
21392
21393SDValue DAGCombiner::visitLOAD(SDNode *N) {
21394 LoadSDNode *LD = cast<LoadSDNode>(N);
21395 SDValue Chain = LD->getChain();
21396 SDValue Ptr = LD->getBasePtr();
21397
21398 // If load is not volatile and there are no uses of the loaded value (and
21399 // the updated indexed value in case of indexed loads), change uses of the
21400 // chain value into uses of the chain input (i.e. delete the dead load).
21401 // TODO: Allow this for unordered atomics (see D66309)
21402 if (LD->isSimple()) {
21403 if (N->getValueType(1) == MVT::Other) {
21404 // Unindexed loads.
21405 if (!N->hasAnyUseOfValue(0)) {
21406 // It's not safe to use the two value CombineTo variant here. e.g.
21407 // v1, chain2 = load chain1, loc
21408 // v2, chain3 = load chain2, loc
21409 // v3 = add v2, c
21410 // Now we replace use of chain2 with chain1. This makes the second load
21411 // isomorphic to the one we are deleting, and thus makes this load live.
21412 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
21413 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
21414 dbgs() << "\n");
21415 WorklistRemover DeadNodes(*this);
21416 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
21417 AddUsersToWorklist(Chain.getNode());
21418 if (N->use_empty())
21419 deleteAndRecombine(N);
21420
21421 return SDValue(N, 0); // Return N so it doesn't get rechecked!
21422 }
21423 } else {
21424 // Indexed loads.
21425 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
21426
21427 // If this load has an opaque TargetConstant offset, then we cannot split
21428 // the indexing into an add/sub directly (that TargetConstant may not be
21429 // valid for a different type of node, and we cannot convert an opaque
21430 // target constant into a regular constant).
21431 bool CanSplitIdx = canSplitIdx(LD);
21432
21433 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
21434 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
21435 SDValue Index;
21436 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
21437 Index = SplitIndexingFromLoad(LD);
21438 // Try to fold the base pointer arithmetic into subsequent loads and
21439 // stores.
21440 AddUsersToWorklist(N);
21441 } else
21442 Index = DAG.getUNDEF(N->getValueType(1));
21443 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
21444 dbgs() << "\nWith: "; Undef.dump(&DAG);
21445 dbgs() << " and 2 other values\n");
21446 WorklistRemover DeadNodes(*this);
21448 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
21449 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
21450 deleteAndRecombine(N);
21451 return SDValue(N, 0); // Return N so it doesn't get rechecked!
21452 }
21453 }
21454 }
21455
21456 // If this load is directly stored, replace the load value with the stored
21457 // value.
21458 if (auto V = ForwardStoreValueToDirectLoad(LD))
21459 return V;
21460
21461 // Try to infer better alignment information than the load already has.
21462 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
21463 !LD->isAtomic()) {
21464 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21465 if (*Alignment > LD->getAlign() &&
21466 isAligned(*Alignment, LD->getSrcValueOffset())) {
21467 SDValue NewLoad = DAG.getExtLoad(
21468 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
21469 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
21470 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21471 // NewLoad will always be N as we are only refining the alignment
21472 assert(NewLoad.getNode() == N);
21473 (void)NewLoad;
21474 }
21475 }
21476 }
21477
21478 if (LD->isUnindexed()) {
21479 // Walk up chain skipping non-aliasing memory nodes.
21480 SDValue BetterChain = FindBetterChain(LD, Chain);
21481
21482 // If there is a better chain.
21483 if (Chain != BetterChain) {
21484 SDValue ReplLoad;
21485
21486 // Replace the chain to void dependency.
21487 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
21488 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
21489 BetterChain, Ptr, LD->getMemOperand());
21490 } else {
21491 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
21492 LD->getValueType(0),
21493 BetterChain, Ptr, LD->getMemoryVT(),
21494 LD->getMemOperand());
21495 }
21496
21497 // Create token factor to keep old chain connected.
21498 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
21499 MVT::Other, Chain, ReplLoad.getValue(1));
21500
21501 // Replace uses with load result and token factor
21502 return CombineTo(N, ReplLoad.getValue(0), Token);
21503 }
21504 }
21505
21506 // Try transforming N to an indexed load.
21507 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21508 return SDValue(N, 0);
21509
21510 // Try to slice up N to more direct loads if the slices are mapped to
21511 // different register banks or pairing can take place.
21512 if (SliceUpLoad(N))
21513 return SDValue(N, 0);
21514
21515 return SDValue();
21516}
21517
21518namespace {
21519
21520/// Helper structure used to slice a load in smaller loads.
21521/// Basically a slice is obtained from the following sequence:
21522/// Origin = load Ty1, Base
21523/// Shift = srl Ty1 Origin, CstTy Amount
21524/// Inst = trunc Shift to Ty2
21525///
21526/// Then, it will be rewritten into:
21527/// Slice = load SliceTy, Base + SliceOffset
21528/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
21529///
21530/// SliceTy is deduced from the number of bits that are actually used to
21531/// build Inst.
21532struct LoadedSlice {
21533 /// Helper structure used to compute the cost of a slice.
21534 struct Cost {
21535 /// Are we optimizing for code size.
21536 bool ForCodeSize = false;
21537
21538 /// Various cost.
21539 unsigned Loads = 0;
21540 unsigned Truncates = 0;
21541 unsigned CrossRegisterBanksCopies = 0;
21542 unsigned ZExts = 0;
21543 unsigned Shift = 0;
21544
21545 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
21546
21547 /// Get the cost of one isolated slice.
21548 Cost(const LoadedSlice &LS, bool ForCodeSize)
21549 : ForCodeSize(ForCodeSize), Loads(1) {
21550 EVT TruncType = LS.Inst->getValueType(0);
21551 EVT LoadedType = LS.getLoadedType();
21552 if (TruncType != LoadedType &&
21553 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
21554 ZExts = 1;
21555 }
21556
21557 /// Account for slicing gain in the current cost.
21558 /// Slicing provide a few gains like removing a shift or a
21559 /// truncate. This method allows to grow the cost of the original
21560 /// load with the gain from this slice.
21561 void addSliceGain(const LoadedSlice &LS) {
21562 // Each slice saves a truncate.
21563 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
21564 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
21565 ++Truncates;
21566 // If there is a shift amount, this slice gets rid of it.
21567 if (LS.Shift)
21568 ++Shift;
21569 // If this slice can merge a cross register bank copy, account for it.
21570 if (LS.canMergeExpensiveCrossRegisterBankCopy())
21571 ++CrossRegisterBanksCopies;
21572 }
21573
21574 Cost &operator+=(const Cost &RHS) {
21575 Loads += RHS.Loads;
21576 Truncates += RHS.Truncates;
21577 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
21578 ZExts += RHS.ZExts;
21579 Shift += RHS.Shift;
21580 return *this;
21581 }
21582
21583 bool operator==(const Cost &RHS) const {
21584 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
21585 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
21586 ZExts == RHS.ZExts && Shift == RHS.Shift;
21587 }
21588
21589 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
21590
21591 bool operator<(const Cost &RHS) const {
21592 // Assume cross register banks copies are as expensive as loads.
21593 // FIXME: Do we want some more target hooks?
21594 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
21595 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
21596 // Unless we are optimizing for code size, consider the
21597 // expensive operation first.
21598 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
21599 return ExpensiveOpsLHS < ExpensiveOpsRHS;
21600 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
21601 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
21602 }
21603
21604 bool operator>(const Cost &RHS) const { return RHS < *this; }
21605
21606 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
21607
21608 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
21609 };
21610
21611 // The last instruction that represent the slice. This should be a
21612 // truncate instruction.
21613 SDNode *Inst;
21614
21615 // The original load instruction.
21616 LoadSDNode *Origin;
21617
21618 // The right shift amount in bits from the original load.
21619 unsigned Shift;
21620
21621 // The DAG from which Origin came from.
21622 // This is used to get some contextual information about legal types, etc.
21623 SelectionDAG *DAG;
21624
21625 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
21626 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
21627 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
21628
21629 /// Get the bits used in a chunk of bits \p BitWidth large.
21630 /// \return Result is \p BitWidth and has used bits set to 1 and
21631 /// not used bits set to 0.
21632 APInt getUsedBits() const {
21633 // Reproduce the trunc(lshr) sequence:
21634 // - Start from the truncated value.
21635 // - Zero extend to the desired bit width.
21636 // - Shift left.
21637 assert(Origin && "No original load to compare against.");
21638 unsigned BitWidth = Origin->getValueSizeInBits(0);
21639 assert(Inst && "This slice is not bound to an instruction");
21640 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
21641 "Extracted slice is bigger than the whole type!");
21642 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
21643 UsedBits.setAllBits();
21644 UsedBits = UsedBits.zext(BitWidth);
21645 UsedBits <<= Shift;
21646 return UsedBits;
21647 }
21648
21649 /// Get the size of the slice to be loaded in bytes.
21650 unsigned getLoadedSize() const {
21651 unsigned SliceSize = getUsedBits().popcount();
21652 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
21653 return SliceSize / 8;
21654 }
21655
21656 /// Get the type that will be loaded for this slice.
21657 /// Note: This may not be the final type for the slice.
21658 EVT getLoadedType() const {
21659 assert(DAG && "Missing context");
21660 LLVMContext &Ctxt = *DAG->getContext();
21661 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
21662 }
21663
21664 /// Get the alignment of the load used for this slice.
21665 Align getAlign() const {
21666 Align Alignment = Origin->getAlign();
21667 uint64_t Offset = getOffsetFromBase();
21668 if (Offset != 0)
21669 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
21670 return Alignment;
21671 }
21672
21673 /// Check if this slice can be rewritten with legal operations.
21674 bool isLegal() const {
21675 // An invalid slice is not legal.
21676 if (!Origin || !Inst || !DAG)
21677 return false;
21678
21679 // Offsets are for indexed load only, we do not handle that.
21680 if (!Origin->getOffset().isUndef())
21681 return false;
21682
21683 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
21684
21685 // Check that the type is legal.
21686 EVT SliceType = getLoadedType();
21687 if (!TLI.isTypeLegal(SliceType))
21688 return false;
21689
21690 // Check that the load is legal for this type.
21691 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
21692 return false;
21693
21694 // Check that the offset can be computed.
21695 // 1. Check its type.
21696 EVT PtrType = Origin->getBasePtr().getValueType();
21697 if (PtrType == MVT::Untyped || PtrType.isExtended())
21698 return false;
21699
21700 // 2. Check that it fits in the immediate.
21701 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
21702 return false;
21703
21704 // 3. Check that the computation is legal.
21705 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
21706 return false;
21707
21708 // Check that the zext is legal if it needs one.
21709 EVT TruncateType = Inst->getValueType(0);
21710 if (TruncateType != SliceType &&
21711 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
21712 return false;
21713
21714 return true;
21715 }
21716
21717 /// Get the offset in bytes of this slice in the original chunk of
21718 /// bits.
21719 /// \pre DAG != nullptr.
21720 uint64_t getOffsetFromBase() const {
21721 assert(DAG && "Missing context.");
21722 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
21723 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
21724 uint64_t Offset = Shift / 8;
21725 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
21726 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
21727 "The size of the original loaded type is not a multiple of a"
21728 " byte.");
21729 // If Offset is bigger than TySizeInBytes, it means we are loading all
21730 // zeros. This should have been optimized before in the process.
21731 assert(TySizeInBytes > Offset &&
21732 "Invalid shift amount for given loaded size");
21733 if (IsBigEndian)
21734 Offset = TySizeInBytes - Offset - getLoadedSize();
21735 return Offset;
21736 }
21737
21738 /// Generate the sequence of instructions to load the slice
21739 /// represented by this object and redirect the uses of this slice to
21740 /// this new sequence of instructions.
21741 /// \pre this->Inst && this->Origin are valid Instructions and this
21742 /// object passed the legal check: LoadedSlice::isLegal returned true.
21743 /// \return The last instruction of the sequence used to load the slice.
21744 SDValue loadSlice() const {
21745 assert(Inst && Origin && "Unable to replace a non-existing slice.");
21746 const SDValue &OldBaseAddr = Origin->getBasePtr();
21747 SDValue BaseAddr = OldBaseAddr;
21748 // Get the offset in that chunk of bytes w.r.t. the endianness.
21749 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
21750 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
21751 if (Offset) {
21752 // BaseAddr = BaseAddr + Offset.
21753 EVT ArithType = BaseAddr.getValueType();
21754 SDLoc DL(Origin);
21755 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
21756 DAG->getConstant(Offset, DL, ArithType));
21757 }
21758
21759 // Create the type of the loaded slice according to its size.
21760 EVT SliceType = getLoadedType();
21761
21762 // Create the load for the slice.
21763 SDValue LastInst =
21764 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
21766 Origin->getMemOperand()->getFlags());
21767 // If the final type is not the same as the loaded type, this means that
21768 // we have to pad with zero. Create a zero extend for that.
21769 EVT FinalType = Inst->getValueType(0);
21770 if (SliceType != FinalType)
21771 LastInst =
21772 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
21773 return LastInst;
21774 }
21775
21776 /// Check if this slice can be merged with an expensive cross register
21777 /// bank copy. E.g.,
21778 /// i = load i32
21779 /// f = bitcast i32 i to float
21780 bool canMergeExpensiveCrossRegisterBankCopy() const {
21781 if (!Inst || !Inst->hasOneUse())
21782 return false;
21783 SDNode *User = *Inst->user_begin();
21784 if (User->getOpcode() != ISD::BITCAST)
21785 return false;
21786 assert(DAG && "Missing context");
21787 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
21788 EVT ResVT = User->getValueType(0);
21789 const TargetRegisterClass *ResRC =
21790 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
21791 const TargetRegisterClass *ArgRC =
21792 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
21793 User->getOperand(0)->isDivergent());
21794 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
21795 return false;
21796
21797 // At this point, we know that we perform a cross-register-bank copy.
21798 // Check if it is expensive.
21799 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
21800 // Assume bitcasts are cheap, unless both register classes do not
21801 // explicitly share a common sub class.
21802 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
21803 return false;
21804
21805 // Check if it will be merged with the load.
21806 // 1. Check the alignment / fast memory access constraint.
21807 unsigned IsFast = 0;
21808 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
21809 Origin->getAddressSpace(), getAlign(),
21810 Origin->getMemOperand()->getFlags(), &IsFast) ||
21811 !IsFast)
21812 return false;
21813
21814 // 2. Check that the load is a legal operation for that type.
21815 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
21816 return false;
21817
21818 // 3. Check that we do not have a zext in the way.
21819 if (Inst->getValueType(0) != getLoadedType())
21820 return false;
21821
21822 return true;
21823 }
21824};
21825
21826} // end anonymous namespace
21827
21828/// Check that all bits set in \p UsedBits form a dense region, i.e.,
21829/// \p UsedBits looks like 0..0 1..1 0..0.
21830static bool areUsedBitsDense(const APInt &UsedBits) {
21831 // If all the bits are one, this is dense!
21832 if (UsedBits.isAllOnes())
21833 return true;
21834
21835 // Get rid of the unused bits on the right.
21836 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
21837 // Get rid of the unused bits on the left.
21838 if (NarrowedUsedBits.countl_zero())
21839 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
21840 // Check that the chunk of bits is completely used.
21841 return NarrowedUsedBits.isAllOnes();
21842}
21843
21844/// Check whether or not \p First and \p Second are next to each other
21845/// in memory. This means that there is no hole between the bits loaded
21846/// by \p First and the bits loaded by \p Second.
21847static bool areSlicesNextToEachOther(const LoadedSlice &First,
21848 const LoadedSlice &Second) {
21849 assert(First.Origin == Second.Origin && First.Origin &&
21850 "Unable to match different memory origins.");
21851 APInt UsedBits = First.getUsedBits();
21852 assert((UsedBits & Second.getUsedBits()) == 0 &&
21853 "Slices are not supposed to overlap.");
21854 UsedBits |= Second.getUsedBits();
21855 return areUsedBitsDense(UsedBits);
21856}
21857
21858/// Adjust the \p GlobalLSCost according to the target
21859/// paring capabilities and the layout of the slices.
21860/// \pre \p GlobalLSCost should account for at least as many loads as
21861/// there is in the slices in \p LoadedSlices.
21863 LoadedSlice::Cost &GlobalLSCost) {
21864 unsigned NumberOfSlices = LoadedSlices.size();
21865 // If there is less than 2 elements, no pairing is possible.
21866 if (NumberOfSlices < 2)
21867 return;
21868
21869 // Sort the slices so that elements that are likely to be next to each
21870 // other in memory are next to each other in the list.
21871 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
21872 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
21873 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
21874 });
21875 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
21876 // First (resp. Second) is the first (resp. Second) potentially candidate
21877 // to be placed in a paired load.
21878 const LoadedSlice *First = nullptr;
21879 const LoadedSlice *Second = nullptr;
21880 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
21881 // Set the beginning of the pair.
21882 First = Second) {
21883 Second = &LoadedSlices[CurrSlice];
21884
21885 // If First is NULL, it means we start a new pair.
21886 // Get to the next slice.
21887 if (!First)
21888 continue;
21889
21890 EVT LoadedType = First->getLoadedType();
21891
21892 // If the types of the slices are different, we cannot pair them.
21893 if (LoadedType != Second->getLoadedType())
21894 continue;
21895
21896 // Check if the target supplies paired loads for this type.
21897 Align RequiredAlignment;
21898 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
21899 // move to the next pair, this type is hopeless.
21900 Second = nullptr;
21901 continue;
21902 }
21903 // Check if we meet the alignment requirement.
21904 if (First->getAlign() < RequiredAlignment)
21905 continue;
21906
21907 // Check that both loads are next to each other in memory.
21908 if (!areSlicesNextToEachOther(*First, *Second))
21909 continue;
21910
21911 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
21912 --GlobalLSCost.Loads;
21913 // Move to the next pair.
21914 Second = nullptr;
21915 }
21916}
21917
21918/// Check the profitability of all involved LoadedSlice.
21919/// Currently, it is considered profitable if there is exactly two
21920/// involved slices (1) which are (2) next to each other in memory, and
21921/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
21922///
21923/// Note: The order of the elements in \p LoadedSlices may be modified, but not
21924/// the elements themselves.
21925///
21926/// FIXME: When the cost model will be mature enough, we can relax
21927/// constraints (1) and (2).
21929 const APInt &UsedBits, bool ForCodeSize) {
21930 unsigned NumberOfSlices = LoadedSlices.size();
21932 return NumberOfSlices > 1;
21933
21934 // Check (1).
21935 if (NumberOfSlices != 2)
21936 return false;
21937
21938 // Check (2).
21939 if (!areUsedBitsDense(UsedBits))
21940 return false;
21941
21942 // Check (3).
21943 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
21944 // The original code has one big load.
21945 OrigCost.Loads = 1;
21946 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
21947 const LoadedSlice &LS = LoadedSlices[CurrSlice];
21948 // Accumulate the cost of all the slices.
21949 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
21950 GlobalSlicingCost += SliceCost;
21951
21952 // Account as cost in the original configuration the gain obtained
21953 // with the current slices.
21954 OrigCost.addSliceGain(LS);
21955 }
21956
21957 // If the target supports paired load, adjust the cost accordingly.
21958 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
21959 return OrigCost > GlobalSlicingCost;
21960}
21961
21962/// If the given load, \p LI, is used only by trunc or trunc(lshr)
21963/// operations, split it in the various pieces being extracted.
21964///
21965/// This sort of thing is introduced by SROA.
21966/// This slicing takes care not to insert overlapping loads.
21967/// \pre LI is a simple load (i.e., not an atomic or volatile load).
21968bool DAGCombiner::SliceUpLoad(SDNode *N) {
21969 if (Level < AfterLegalizeDAG)
21970 return false;
21971
21972 LoadSDNode *LD = cast<LoadSDNode>(N);
21973 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
21974 !LD->getValueType(0).isInteger())
21975 return false;
21976
21977 // The algorithm to split up a load of a scalable vector into individual
21978 // elements currently requires knowing the length of the loaded type,
21979 // so will need adjusting to work on scalable vectors.
21980 if (LD->getValueType(0).isScalableVector())
21981 return false;
21982
21983 // Keep track of already used bits to detect overlapping values.
21984 // In that case, we will just abort the transformation.
21985 APInt UsedBits(LD->getValueSizeInBits(0), 0);
21986
21987 SmallVector<LoadedSlice, 4> LoadedSlices;
21988
21989 // Check if this load is used as several smaller chunks of bits.
21990 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
21991 // of computation for each trunc.
21992 for (SDUse &U : LD->uses()) {
21993 // Skip the uses of the chain.
21994 if (U.getResNo() != 0)
21995 continue;
21996
21997 SDNode *User = U.getUser();
21998 unsigned Shift = 0;
21999
22000 // Check if this is a trunc(lshr).
22001 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
22002 isa<ConstantSDNode>(User->getOperand(1))) {
22003 Shift = User->getConstantOperandVal(1);
22004 User = *User->user_begin();
22005 }
22006
22007 // At this point, User is a Truncate, iff we encountered, trunc or
22008 // trunc(lshr).
22009 if (User->getOpcode() != ISD::TRUNCATE)
22010 return false;
22011
22012 // The width of the type must be a power of 2 and greater than 8-bits.
22013 // Otherwise the load cannot be represented in LLVM IR.
22014 // Moreover, if we shifted with a non-8-bits multiple, the slice
22015 // will be across several bytes. We do not support that.
22016 unsigned Width = User->getValueSizeInBits(0);
22017 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
22018 return false;
22019
22020 // Build the slice for this chain of computations.
22021 LoadedSlice LS(User, LD, Shift, &DAG);
22022 APInt CurrentUsedBits = LS.getUsedBits();
22023
22024 // Check if this slice overlaps with another.
22025 if ((CurrentUsedBits & UsedBits) != 0)
22026 return false;
22027 // Update the bits used globally.
22028 UsedBits |= CurrentUsedBits;
22029
22030 // Check if the new slice would be legal.
22031 if (!LS.isLegal())
22032 return false;
22033
22034 // Record the slice.
22035 LoadedSlices.push_back(LS);
22036 }
22037
22038 // Abort slicing if it does not seem to be profitable.
22039 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
22040 return false;
22041
22042 ++SlicedLoads;
22043
22044 // Rewrite each chain to use an independent load.
22045 // By construction, each chain can be represented by a unique load.
22046
22047 // Prepare the argument for the new token factor for all the slices.
22048 SmallVector<SDValue, 8> ArgChains;
22049 for (const LoadedSlice &LS : LoadedSlices) {
22050 SDValue SliceInst = LS.loadSlice();
22051 CombineTo(LS.Inst, SliceInst, true);
22052 if (SliceInst.getOpcode() != ISD::LOAD)
22053 SliceInst = SliceInst.getOperand(0);
22054 assert(SliceInst->getOpcode() == ISD::LOAD &&
22055 "It takes more than a zext to get to the loaded slice!!");
22056 ArgChains.push_back(SliceInst.getValue(1));
22057 }
22058
22059 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
22060 ArgChains);
22061 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
22062 AddToWorklist(Chain.getNode());
22063 return true;
22064}
22065
22066/// Check to see if V is (and load (ptr), imm), where the load is having
22067/// specific bytes cleared out. If so, return the byte size being masked out
22068/// and the shift amount.
22069static std::pair<unsigned, unsigned>
22071 std::pair<unsigned, unsigned> Result(0, 0);
22072
22073 // Check for the structure we're looking for.
22074 if (V->getOpcode() != ISD::AND ||
22075 !isa<ConstantSDNode>(V->getOperand(1)) ||
22076 !ISD::isNormalLoad(V->getOperand(0).getNode()))
22077 return Result;
22078
22079 // Check the chain and pointer.
22080 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
22081 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
22082
22083 // This only handles simple types.
22084 if (V.getValueType() != MVT::i16 &&
22085 V.getValueType() != MVT::i32 &&
22086 V.getValueType() != MVT::i64)
22087 return Result;
22088
22089 // Check the constant mask. Invert it so that the bits being masked out are
22090 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
22091 // follow the sign bit for uniformity.
22092 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
22093 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
22094 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
22095 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
22096 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
22097 if (NotMaskLZ == 64) return Result; // All zero mask.
22098
22099 // See if we have a continuous run of bits. If so, we have 0*1+0*
22100 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
22101 return Result;
22102
22103 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
22104 if (V.getValueType() != MVT::i64 && NotMaskLZ)
22105 NotMaskLZ -= 64-V.getValueSizeInBits();
22106
22107 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
22108 switch (MaskedBytes) {
22109 case 1:
22110 case 2:
22111 case 4: break;
22112 default: return Result; // All one mask, or 5-byte mask.
22113 }
22114
22115 // Verify that the first bit starts at a multiple of mask so that the access
22116 // is aligned the same as the access width.
22117 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
22118
22119 // For narrowing to be valid, it must be the case that the load the
22120 // immediately preceding memory operation before the store.
22121 if (LD == Chain.getNode())
22122 ; // ok.
22123 else if (Chain->getOpcode() == ISD::TokenFactor &&
22124 SDValue(LD, 1).hasOneUse()) {
22125 // LD has only 1 chain use so they are no indirect dependencies.
22126 if (!LD->isOperandOf(Chain.getNode()))
22127 return Result;
22128 } else
22129 return Result; // Fail.
22130
22131 Result.first = MaskedBytes;
22132 Result.second = NotMaskTZ/8;
22133 return Result;
22134}
22135
22136/// Check to see if IVal is something that provides a value as specified by
22137/// MaskInfo. If so, replace the specified store with a narrower store of
22138/// truncated IVal.
22139static SDValue
22140ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
22141 SDValue IVal, StoreSDNode *St,
22142 DAGCombiner *DC) {
22143 unsigned NumBytes = MaskInfo.first;
22144 unsigned ByteShift = MaskInfo.second;
22145 SelectionDAG &DAG = DC->getDAG();
22146
22147 // Check to see if IVal is all zeros in the part being masked in by the 'or'
22148 // that uses this. If not, this is not a replacement.
22149 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
22150 ByteShift*8, (ByteShift+NumBytes)*8);
22151 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
22152
22153 // Check that it is legal on the target to do this. It is legal if the new
22154 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
22155 // legalization. If the source type is legal, but the store type isn't, see
22156 // if we can use a truncating store.
22157 MVT VT = MVT::getIntegerVT(NumBytes * 8);
22158 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22159 bool UseTruncStore;
22160 if (DC->isTypeLegal(VT))
22161 UseTruncStore = false;
22162 else if (TLI.isTypeLegal(IVal.getValueType()) &&
22163 TLI.isTruncStoreLegal(IVal.getValueType(), VT, St->getAlign(),
22164 St->getAddressSpace()))
22165 UseTruncStore = true;
22166 else
22167 return SDValue();
22168
22169 // Can't do this for indexed stores.
22170 if (St->isIndexed())
22171 return SDValue();
22172
22173 // Check that the target doesn't think this is a bad idea.
22174 if (St->getMemOperand() &&
22175 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
22176 *St->getMemOperand()))
22177 return SDValue();
22178
22179 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
22180 // shifted by ByteShift and truncated down to NumBytes.
22181 if (ByteShift) {
22182 SDLoc DL(IVal);
22183 IVal = DAG.getNode(
22184 ISD::SRL, DL, IVal.getValueType(), IVal,
22185 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
22186 }
22187
22188 // Figure out the offset for the store and the alignment of the access.
22189 unsigned StOffset;
22190 if (DAG.getDataLayout().isLittleEndian())
22191 StOffset = ByteShift;
22192 else
22193 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
22194
22195 SDValue Ptr = St->getBasePtr();
22196 if (StOffset) {
22197 SDLoc DL(IVal);
22198 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL);
22199 }
22200
22201 ++OpsNarrowed;
22202 if (UseTruncStore)
22203 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
22204 St->getPointerInfo().getWithOffset(StOffset), VT,
22205 St->getBaseAlign());
22206
22207 // Truncate down to the new size.
22208 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
22209
22210 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
22211 St->getPointerInfo().getWithOffset(StOffset),
22212 St->getBaseAlign());
22213}
22214
22215/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
22216/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
22217/// narrowing the load and store if it would end up being a win for performance
22218/// or code size.
22219SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
22220 StoreSDNode *ST = cast<StoreSDNode>(N);
22221 if (!ST->isSimple())
22222 return SDValue();
22223
22224 SDValue Chain = ST->getChain();
22225 SDValue Value = ST->getValue();
22226 SDValue Ptr = ST->getBasePtr();
22227 EVT VT = Value.getValueType();
22228
22229 if (ST->isTruncatingStore() || VT.isVector())
22230 return SDValue();
22231
22232 unsigned Opc = Value.getOpcode();
22233
22234 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
22235 !Value.hasOneUse())
22236 return SDValue();
22237
22238 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
22239 // is a byte mask indicating a consecutive number of bytes, check to see if
22240 // Y is known to provide just those bytes. If so, we try to replace the
22241 // load + replace + store sequence with a single (narrower) store, which makes
22242 // the load dead.
22244 std::pair<unsigned, unsigned> MaskedLoad;
22245 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
22246 if (MaskedLoad.first)
22247 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
22248 Value.getOperand(1), ST,this))
22249 return NewST;
22250
22251 // Or is commutative, so try swapping X and Y.
22252 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
22253 if (MaskedLoad.first)
22254 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
22255 Value.getOperand(0), ST,this))
22256 return NewST;
22257 }
22258
22260 return SDValue();
22261
22262 if (Value.getOperand(1).getOpcode() != ISD::Constant)
22263 return SDValue();
22264
22265 SDValue N0 = Value.getOperand(0);
22266 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
22267 Chain == SDValue(N0.getNode(), 1)) {
22268 LoadSDNode *LD = cast<LoadSDNode>(N0);
22269 if (LD->getBasePtr() != Ptr ||
22270 LD->getPointerInfo().getAddrSpace() !=
22271 ST->getPointerInfo().getAddrSpace())
22272 return SDValue();
22273
22274 // Find the type NewVT to narrow the load / op / store to.
22275 SDValue N1 = Value.getOperand(1);
22276 unsigned BitWidth = N1.getValueSizeInBits();
22277 APInt Imm = N1->getAsAPIntVal();
22278 if (Opc == ISD::AND)
22279 Imm.flipAllBits();
22280 if (Imm == 0 || Imm.isAllOnes())
22281 return SDValue();
22282 // Find least/most significant bit that need to be part of the narrowed
22283 // operation. We assume target will need to address/access full bytes, so
22284 // we make sure to align LSB and MSB at byte boundaries.
22285 unsigned BitsPerByteMask = 7u;
22286 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
22287 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
22288 unsigned NewBW = NextPowerOf2(MSB - LSB);
22289 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
22290 // The narrowing should be profitable, the load/store operation should be
22291 // legal (or custom) and the store size should be equal to the NewVT width.
22292 while (NewBW < BitWidth &&
22293 (NewVT.getStoreSizeInBits() != NewBW ||
22294 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
22296 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
22297 NewBW = NextPowerOf2(NewBW);
22298 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
22299 }
22300 if (NewBW >= BitWidth)
22301 return SDValue();
22302
22303 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
22304 // large enough to cover all bits that should be modified. This type might
22305 // however be larger than really needed (such as i32 while we actually only
22306 // need to modify one byte). Now we need to find our how to align the memory
22307 // accesses to satisfy preferred alignments as well as avoiding to access
22308 // memory outside the store size of the orignal access.
22309
22310 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
22311
22312 // Let ShAmt denote amount of bits to skip, counted from the least
22313 // significant bits of Imm. And let PtrOff how much the pointer needs to be
22314 // offsetted (in bytes) for the new access.
22315 unsigned ShAmt = 0;
22316 uint64_t PtrOff = 0;
22317 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
22318 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
22319 if (ShAmt > LSB)
22320 return SDValue();
22321 if (ShAmt + NewBW < MSB)
22322 continue;
22323
22324 // Calculate PtrOff.
22325 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
22326 ? VTStoreSize - NewBW - ShAmt
22327 : ShAmt;
22328 PtrOff = PtrAdjustmentInBits / 8;
22329
22330 // Now check if narrow access is allowed and fast, considering alignments.
22331 unsigned IsFast = 0;
22332 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
22333 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
22334 LD->getAddressSpace(), NewAlign,
22335 LD->getMemOperand()->getFlags(), &IsFast) &&
22336 IsFast)
22337 break;
22338 }
22339 // If loop above did not find any accepted ShAmt we need to exit here.
22340 if (ShAmt + NewBW > VTStoreSize)
22341 return SDValue();
22342
22343 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
22344 if (Opc == ISD::AND)
22345 NewImm.flipAllBits();
22346 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
22347 SDValue NewPtr =
22348 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
22349 SDValue NewLD =
22350 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
22351 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
22352 LD->getMemOperand()->getFlags(), LD->getAAInfo());
22353 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
22354 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
22355 SDValue NewST =
22356 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
22357 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
22358
22359 AddToWorklist(NewPtr.getNode());
22360 AddToWorklist(NewLD.getNode());
22361 AddToWorklist(NewVal.getNode());
22362 WorklistRemover DeadNodes(*this);
22363 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
22364 ++OpsNarrowed;
22365 return NewST;
22366 }
22367
22368 return SDValue();
22369}
22370
22371/// For a given floating point load / store pair, if the load value isn't used
22372/// by any other operations, then consider transforming the pair to integer
22373/// load / store operations if the target deems the transformation profitable.
22374SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
22375 StoreSDNode *ST = cast<StoreSDNode>(N);
22376 SDValue Value = ST->getValue();
22377 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
22378 Value.hasOneUse()) {
22379 LoadSDNode *LD = cast<LoadSDNode>(Value);
22380 EVT VT = LD->getMemoryVT();
22381 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
22382 LD->isNonTemporal() || ST->isNonTemporal() ||
22383 LD->getPointerInfo().getAddrSpace() != 0 ||
22384 ST->getPointerInfo().getAddrSpace() != 0)
22385 return SDValue();
22386
22387 TypeSize VTSize = VT.getSizeInBits();
22388
22389 // We don't know the size of scalable types at compile time so we cannot
22390 // create an integer of the equivalent size.
22391 if (VTSize.isScalable())
22392 return SDValue();
22393
22394 unsigned FastLD = 0, FastST = 0;
22395 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
22396 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
22397 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
22400 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
22401 *LD->getMemOperand(), &FastLD) ||
22402 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
22403 *ST->getMemOperand(), &FastST) ||
22404 !FastLD || !FastST)
22405 return SDValue();
22406
22407 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
22408 LD->getBasePtr(), LD->getMemOperand());
22409
22410 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
22411 ST->getBasePtr(), ST->getMemOperand());
22412
22413 AddToWorklist(NewLD.getNode());
22414 AddToWorklist(NewST.getNode());
22415 WorklistRemover DeadNodes(*this);
22416 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
22417 ++LdStFP2Int;
22418 return NewST;
22419 }
22420
22421 return SDValue();
22422}
22423
22424// This is a helper function for visitMUL to check the profitability
22425// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
22426// MulNode is the original multiply, AddNode is (add x, c1),
22427// and ConstNode is c2.
22428//
22429// If the (add x, c1) has multiple uses, we could increase
22430// the number of adds if we make this transformation.
22431// It would only be worth doing this if we can remove a
22432// multiply in the process. Check for that here.
22433// To illustrate:
22434// (A + c1) * c3
22435// (A + c2) * c3
22436// We're checking for cases where we have common "c3 * A" expressions.
22437bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
22438 SDValue ConstNode) {
22439 // If the add only has one use, and the target thinks the folding is
22440 // profitable or does not lead to worse code, this would be OK to do.
22441 if (AddNode->hasOneUse() &&
22442 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
22443 return true;
22444
22445 // Walk all the users of the constant with which we're multiplying.
22446 for (SDNode *User : ConstNode->users()) {
22447 if (User == MulNode) // This use is the one we're on right now. Skip it.
22448 continue;
22449
22450 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
22451 SDNode *OtherOp;
22452 SDNode *MulVar = AddNode.getOperand(0).getNode();
22453
22454 // OtherOp is what we're multiplying against the constant.
22455 if (User->getOperand(0) == ConstNode)
22456 OtherOp = User->getOperand(1).getNode();
22457 else
22458 OtherOp = User->getOperand(0).getNode();
22459
22460 // Check to see if multiply is with the same operand of our "add".
22461 //
22462 // ConstNode = CONST
22463 // User = ConstNode * A <-- visiting User. OtherOp is A.
22464 // ...
22465 // AddNode = (A + c1) <-- MulVar is A.
22466 // = AddNode * ConstNode <-- current visiting instruction.
22467 //
22468 // If we make this transformation, we will have a common
22469 // multiply (ConstNode * A) that we can save.
22470 if (OtherOp == MulVar)
22471 return true;
22472
22473 // Now check to see if a future expansion will give us a common
22474 // multiply.
22475 //
22476 // ConstNode = CONST
22477 // AddNode = (A + c1)
22478 // ... = AddNode * ConstNode <-- current visiting instruction.
22479 // ...
22480 // OtherOp = (A + c2)
22481 // User = OtherOp * ConstNode <-- visiting User.
22482 //
22483 // If we make this transformation, we will have a common
22484 // multiply (CONST * A) after we also do the same transformation
22485 // to the "t2" instruction.
22486 if (OtherOp->getOpcode() == ISD::ADD &&
22488 OtherOp->getOperand(0).getNode() == MulVar)
22489 return true;
22490 }
22491 }
22492
22493 // Didn't find a case where this would be profitable.
22494 return false;
22495}
22496
22497SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
22498 unsigned NumStores) {
22500 SmallPtrSet<const SDNode *, 8> Visited;
22501 SDLoc StoreDL(StoreNodes[0].MemNode);
22502
22503 for (unsigned i = 0; i < NumStores; ++i) {
22504 Visited.insert(StoreNodes[i].MemNode);
22505 }
22506
22507 // don't include nodes that are children or repeated nodes.
22508 for (unsigned i = 0; i < NumStores; ++i) {
22509 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
22510 Chains.push_back(StoreNodes[i].MemNode->getChain());
22511 }
22512
22513 assert(!Chains.empty() && "Chain should have generated a chain");
22514 return DAG.getTokenFactor(StoreDL, Chains);
22515}
22516
22517bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
22518 const Value *UnderlyingObj = nullptr;
22519 for (const auto &MemOp : StoreNodes) {
22520 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
22521 // Pseudo value like stack frame has its own frame index and size, should
22522 // not use the first store's frame index for other frames.
22523 if (MMO->getPseudoValue())
22524 return false;
22525
22526 if (!MMO->getValue())
22527 return false;
22528
22529 const Value *Obj = getUnderlyingObject(MMO->getValue());
22530
22531 if (UnderlyingObj && UnderlyingObj != Obj)
22532 return false;
22533
22534 if (!UnderlyingObj)
22535 UnderlyingObj = Obj;
22536 }
22537
22538 return true;
22539}
22540
22541bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
22542 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
22543 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
22544 // Make sure we have something to merge.
22545 if (NumStores < 2)
22546 return false;
22547
22548 assert((!UseTrunc || !UseVector) &&
22549 "This optimization cannot emit a vector truncating store");
22550
22551 // The latest Node in the DAG.
22552 SDLoc DL(StoreNodes[0].MemNode);
22553
22554 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
22555 unsigned SizeInBits = NumStores * ElementSizeBits;
22556 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22557
22558 std::optional<MachineMemOperand::Flags> Flags;
22559 AAMDNodes AAInfo;
22560 for (unsigned I = 0; I != NumStores; ++I) {
22561 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
22562 if (!Flags) {
22563 Flags = St->getMemOperand()->getFlags();
22564 AAInfo = St->getAAInfo();
22565 continue;
22566 }
22567 // Skip merging if there's an inconsistent flag.
22568 if (Flags != St->getMemOperand()->getFlags())
22569 return false;
22570 // Concatenate AA metadata.
22571 AAInfo = AAInfo.concat(St->getAAInfo());
22572 }
22573
22574 EVT StoreTy;
22575 if (UseVector) {
22576 unsigned Elts = NumStores * NumMemElts;
22577 // Get the type for the merged vector store.
22578 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
22579 } else
22580 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
22581
22582 SDValue StoredVal;
22583 if (UseVector) {
22584 if (IsConstantSrc) {
22585 SmallVector<SDValue, 8> BuildVector;
22586 for (unsigned I = 0; I != NumStores; ++I) {
22587 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
22588 SDValue Val = St->getValue();
22589 // If constant is of the wrong type, convert it now. This comes up
22590 // when one of our stores was truncating.
22591 if (MemVT != Val.getValueType()) {
22592 Val = peekThroughBitcasts(Val);
22593 // Deal with constants of wrong size.
22594 if (ElementSizeBits != Val.getValueSizeInBits()) {
22595 auto *C = dyn_cast<ConstantSDNode>(Val);
22596 if (!C)
22597 // Not clear how to truncate FP values.
22598 // TODO: Handle truncation of build_vector constants
22599 return false;
22600
22601 EVT IntMemVT =
22603 Val = DAG.getConstant(C->getAPIntValue()
22604 .zextOrTrunc(Val.getValueSizeInBits())
22605 .zextOrTrunc(ElementSizeBits),
22606 SDLoc(C), IntMemVT);
22607 }
22608 // Make sure correctly size type is the correct type.
22609 Val = DAG.getBitcast(MemVT, Val);
22610 }
22611 BuildVector.push_back(Val);
22612 }
22613 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
22615 DL, StoreTy, BuildVector);
22616 } else {
22618 for (unsigned i = 0; i < NumStores; ++i) {
22619 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22621 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
22622 // type MemVT. If the underlying value is not the correct
22623 // type, but it is an extraction of an appropriate vector we
22624 // can recast Val to be of the correct type. This may require
22625 // converting between EXTRACT_VECTOR_ELT and
22626 // EXTRACT_SUBVECTOR.
22627 if ((MemVT != Val.getValueType()) &&
22630 EVT MemVTScalarTy = MemVT.getScalarType();
22631 // We may need to add a bitcast here to get types to line up.
22632 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
22633 Val = DAG.getBitcast(MemVT, Val);
22634 } else if (MemVT.isVector() &&
22636 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
22637 } else {
22638 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
22640 SDValue Vec = Val.getOperand(0);
22641 SDValue Idx = Val.getOperand(1);
22642 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
22643 }
22644 }
22645 Ops.push_back(Val);
22646 }
22647
22648 // Build the extracted vector elements back into a vector.
22649 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
22651 DL, StoreTy, Ops);
22652 }
22653 } else {
22654 // We should always use a vector store when merging extracted vector
22655 // elements, so this path implies a store of constants.
22656 assert(IsConstantSrc && "Merged vector elements should use vector store");
22657
22658 APInt StoreInt(SizeInBits, 0);
22659
22660 // Construct a single integer constant which is made of the smaller
22661 // constant inputs.
22662 bool IsLE = DAG.getDataLayout().isLittleEndian();
22663 for (unsigned i = 0; i < NumStores; ++i) {
22664 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
22665 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
22666
22667 SDValue Val = St->getValue();
22668 Val = peekThroughBitcasts(Val);
22669 StoreInt <<= ElementSizeBits;
22670 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
22671 StoreInt |= C->getAPIntValue()
22672 .zextOrTrunc(ElementSizeBits)
22673 .zextOrTrunc(SizeInBits);
22674 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
22675 StoreInt |= C->getValueAPF()
22676 .bitcastToAPInt()
22677 .zextOrTrunc(ElementSizeBits)
22678 .zextOrTrunc(SizeInBits);
22679 // If fp truncation is necessary give up for now.
22680 if (MemVT.getSizeInBits() != ElementSizeBits)
22681 return false;
22682 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
22684 // Not yet handled
22685 return false;
22686 } else {
22687 llvm_unreachable("Invalid constant element type");
22688 }
22689 }
22690
22691 // Create the new Load and Store operations.
22692 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
22693 }
22694
22695 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22696 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
22697 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22698
22699 // make sure we use trunc store if it's necessary to be legal.
22700 // When generate the new widen store, if the first store's pointer info can
22701 // not be reused, discard the pointer info except the address space because
22702 // now the widen store can not be represented by the original pointer info
22703 // which is for the narrow memory object.
22704 SDValue NewStore;
22705 if (!UseTrunc) {
22706 NewStore = DAG.getStore(
22707 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
22708 CanReusePtrInfo
22709 ? FirstInChain->getPointerInfo()
22710 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
22711 FirstInChain->getAlign(), *Flags, AAInfo);
22712 } else { // Must be realized as a trunc store
22713 EVT LegalizedStoredValTy =
22714 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
22715 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
22716 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
22717 SDValue ExtendedStoreVal =
22718 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
22719 LegalizedStoredValTy);
22720 NewStore = DAG.getTruncStore(
22721 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
22722 CanReusePtrInfo
22723 ? FirstInChain->getPointerInfo()
22724 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
22725 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
22726 AAInfo);
22727 }
22728
22729 // Replace all merged stores with the new store.
22730 for (unsigned i = 0; i < NumStores; ++i)
22731 CombineTo(StoreNodes[i].MemNode, NewStore);
22732
22733 AddToWorklist(NewChain.getNode());
22734 return true;
22735}
22736
22737SDNode *
22738DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
22739 SmallVectorImpl<MemOpLink> &StoreNodes) {
22740 // This holds the base pointer, index, and the offset in bytes from the base
22741 // pointer. We must have a base and an offset. Do not handle stores to undef
22742 // base pointers.
22743 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22744 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
22745 return nullptr;
22746
22748 StoreSource StoreSrc = getStoreSource(Val);
22749 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
22750
22751 // Match on loadbaseptr if relevant.
22752 EVT MemVT = St->getMemoryVT();
22753 BaseIndexOffset LBasePtr;
22754 EVT LoadVT;
22755 if (StoreSrc == StoreSource::Load) {
22756 auto *Ld = cast<LoadSDNode>(Val);
22757 LBasePtr = BaseIndexOffset::match(Ld, DAG);
22758 LoadVT = Ld->getMemoryVT();
22759 // Load and store should be the same type.
22760 if (MemVT != LoadVT)
22761 return nullptr;
22762 // Loads must only have one use.
22763 if (!Ld->hasNUsesOfValue(1, 0))
22764 return nullptr;
22765 // The memory operands must not be volatile/indexed/atomic.
22766 // TODO: May be able to relax for unordered atomics (see D66309)
22767 if (!Ld->isSimple() || Ld->isIndexed())
22768 return nullptr;
22769 }
22770 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
22771 int64_t &Offset) -> bool {
22772 // The memory operands must not be volatile/indexed/atomic.
22773 // TODO: May be able to relax for unordered atomics (see D66309)
22774 if (!Other->isSimple() || Other->isIndexed())
22775 return false;
22776 // Don't mix temporal stores with non-temporal stores.
22777 if (St->isNonTemporal() != Other->isNonTemporal())
22778 return false;
22780 return false;
22781 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
22782 // Allow merging constants of different types as integers.
22783 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
22784 : Other->getMemoryVT() != MemVT;
22785 switch (StoreSrc) {
22786 case StoreSource::Load: {
22787 if (NoTypeMatch)
22788 return false;
22789 // The Load's Base Ptr must also match.
22790 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
22791 if (!OtherLd)
22792 return false;
22793 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
22794 if (LoadVT != OtherLd->getMemoryVT())
22795 return false;
22796 // Loads must only have one use.
22797 if (!OtherLd->hasNUsesOfValue(1, 0))
22798 return false;
22799 // The memory operands must not be volatile/indexed/atomic.
22800 // TODO: May be able to relax for unordered atomics (see D66309)
22801 if (!OtherLd->isSimple() || OtherLd->isIndexed())
22802 return false;
22803 // Don't mix temporal loads with non-temporal loads.
22804 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
22805 return false;
22807 *OtherLd))
22808 return false;
22809 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
22810 return false;
22811 break;
22812 }
22813 case StoreSource::Constant:
22814 if (NoTypeMatch)
22815 return false;
22816 if (getStoreSource(OtherBC) != StoreSource::Constant)
22817 return false;
22818 break;
22819 case StoreSource::Extract:
22820 // Do not merge truncated stores here.
22821 if (Other->isTruncatingStore())
22822 return false;
22823 if (!MemVT.bitsEq(OtherBC.getValueType()))
22824 return false;
22825 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
22826 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
22827 return false;
22828 break;
22829 default:
22830 llvm_unreachable("Unhandled store source for merging");
22831 }
22832 Ptr = BaseIndexOffset::match(Other, DAG);
22833 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
22834 };
22835
22836 // We are looking for a root node which is an ancestor to all mergable
22837 // stores. We search up through a load, to our root and then down
22838 // through all children. For instance we will find Store{1,2,3} if
22839 // St is Store1, Store2. or Store3 where the root is not a load
22840 // which always true for nonvolatile ops. TODO: Expand
22841 // the search to find all valid candidates through multiple layers of loads.
22842 //
22843 // Root
22844 // |-------|-------|
22845 // Load Load Store3
22846 // | |
22847 // Store1 Store2
22848 //
22849 // FIXME: We should be able to climb and
22850 // descend TokenFactors to find candidates as well.
22851
22852 SDNode *RootNode = St->getChain().getNode();
22853 // Bail out if we already analyzed this root node and found nothing.
22854 if (ChainsWithoutMergeableStores.contains(RootNode))
22855 return nullptr;
22856
22857 // Check if the pair of StoreNode and the RootNode already bail out many
22858 // times which is over the limit in dependence check.
22859 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
22860 SDNode *RootNode) -> bool {
22861 auto RootCount = StoreRootCountMap.find(StoreNode);
22862 return RootCount != StoreRootCountMap.end() &&
22863 RootCount->second.first == RootNode &&
22864 RootCount->second.second > StoreMergeDependenceLimit;
22865 };
22866
22867 auto TryToAddCandidate = [&](SDUse &Use) {
22868 // This must be a chain use.
22869 if (Use.getOperandNo() != 0)
22870 return;
22871 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
22872 BaseIndexOffset Ptr;
22873 int64_t PtrDiff;
22874 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
22875 !OverLimitInDependenceCheck(OtherStore, RootNode))
22876 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
22877 }
22878 };
22879
22880 unsigned NumNodesExplored = 0;
22881 const unsigned MaxSearchNodes = 1024;
22882 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
22883 RootNode = Ldn->getChain().getNode();
22884 // Bail out if we already analyzed this root node and found nothing.
22885 if (ChainsWithoutMergeableStores.contains(RootNode))
22886 return nullptr;
22887 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
22888 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
22889 SDNode *User = I->getUser();
22890 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
22891 for (SDUse &U2 : User->uses())
22892 TryToAddCandidate(U2);
22893 }
22894 // Check stores that depend on the root (e.g. Store 3 in the chart above).
22895 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
22896 TryToAddCandidate(*I);
22897 }
22898 }
22899 } else {
22900 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
22901 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
22902 TryToAddCandidate(*I);
22903 }
22904
22905 return RootNode;
22906}
22907
22908// We need to check that merging these stores does not cause a loop in the
22909// DAG. Any store candidate may depend on another candidate indirectly through
22910// its operands. Check in parallel by searching up from operands of candidates.
22911bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
22912 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
22913 SDNode *RootNode) {
22914 // FIXME: We should be able to truncate a full search of
22915 // predecessors by doing a BFS and keeping tabs the originating
22916 // stores from which worklist nodes come from in a similar way to
22917 // TokenFactor simplfication.
22918
22919 SmallPtrSet<const SDNode *, 32> Visited;
22921
22922 // RootNode is a predecessor to all candidates so we need not search
22923 // past it. Add RootNode (peeking through TokenFactors). Do not count
22924 // these towards size check.
22925
22926 Worklist.push_back(RootNode);
22927 while (!Worklist.empty()) {
22928 auto N = Worklist.pop_back_val();
22929 if (!Visited.insert(N).second)
22930 continue; // Already present in Visited.
22931 if (N->getOpcode() == ISD::TokenFactor) {
22932 for (SDValue Op : N->ops())
22933 Worklist.push_back(Op.getNode());
22934 }
22935 }
22936
22937 // Don't count pruning nodes towards max.
22938 unsigned int Max = 1024 + Visited.size();
22939 // Search Ops of store candidates.
22940 for (unsigned i = 0; i < NumStores; ++i) {
22941 SDNode *N = StoreNodes[i].MemNode;
22942 // Of the 4 Store Operands:
22943 // * Chain (Op 0) -> We have already considered these
22944 // in candidate selection, but only by following the
22945 // chain dependencies. We could still have a chain
22946 // dependency to a load, that has a non-chain dep to
22947 // another load, that depends on a store, etc. So it is
22948 // possible to have dependencies that consist of a mix
22949 // of chain and non-chain deps, and we need to include
22950 // chain operands in the analysis here..
22951 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
22952 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
22953 // but aren't necessarily fromt the same base node, so
22954 // cycles possible (e.g. via indexed store).
22955 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
22956 // non-indexed stores). Not constant on all targets (e.g. ARM)
22957 // and so can participate in a cycle.
22958 for (const SDValue &Op : N->op_values())
22959 Worklist.push_back(Op.getNode());
22960 }
22961 // Search through DAG. We can stop early if we find a store node.
22962 for (unsigned i = 0; i < NumStores; ++i)
22963 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
22964 Max)) {
22965 // If the searching bail out, record the StoreNode and RootNode in the
22966 // StoreRootCountMap. If we have seen the pair many times over a limit,
22967 // we won't add the StoreNode into StoreNodes set again.
22968 if (Visited.size() >= Max) {
22969 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
22970 if (RootCount.first == RootNode)
22971 RootCount.second++;
22972 else
22973 RootCount = {RootNode, 1};
22974 }
22975 return false;
22976 }
22977 return true;
22978}
22979
22980bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
22981 SmallPtrSet<const SDNode *, 32> Visited;
22983 Worklist.emplace_back(St->getChain().getNode(), false);
22984
22985 while (!Worklist.empty()) {
22986 auto [Node, FoundCall] = Worklist.pop_back_val();
22987 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
22988 continue;
22989
22990 switch (Node->getOpcode()) {
22991 case ISD::CALLSEQ_END:
22992 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
22993 break;
22994 case ISD::TokenFactor:
22995 for (SDValue Op : Node->ops())
22996 Worklist.emplace_back(Op.getNode(), FoundCall);
22997 break;
22998 case ISD::LOAD:
22999 if (Node == Ld)
23000 return FoundCall;
23001 [[fallthrough]];
23002 default:
23003 assert(Node->getOperand(0).getValueType() == MVT::Other &&
23004 "Invalid chain type");
23005 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
23006 break;
23007 }
23008 }
23009 return false;
23010}
23011
23012unsigned
23013DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
23014 int64_t ElementSizeBytes) const {
23015 while (true) {
23016 // Find a store past the width of the first store.
23017 size_t StartIdx = 0;
23018 while ((StartIdx + 1 < StoreNodes.size()) &&
23019 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
23020 StoreNodes[StartIdx + 1].OffsetFromBase)
23021 ++StartIdx;
23022
23023 // Bail if we don't have enough candidates to merge.
23024 if (StartIdx + 1 >= StoreNodes.size())
23025 return 0;
23026
23027 // Trim stores that overlapped with the first store.
23028 if (StartIdx)
23029 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
23030
23031 // Scan the memory operations on the chain and find the first
23032 // non-consecutive store memory address.
23033 unsigned NumConsecutiveStores = 1;
23034 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
23035 // Check that the addresses are consecutive starting from the second
23036 // element in the list of stores.
23037 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
23038 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
23039 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
23040 break;
23041 NumConsecutiveStores = i + 1;
23042 }
23043 if (NumConsecutiveStores > 1)
23044 return NumConsecutiveStores;
23045
23046 // There are no consecutive stores at the start of the list.
23047 // Remove the first store and try again.
23048 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
23049 }
23050}
23051
23052bool DAGCombiner::tryStoreMergeOfConstants(
23053 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
23054 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
23055 LLVMContext &Context = *DAG.getContext();
23056 const DataLayout &DL = DAG.getDataLayout();
23057 int64_t ElementSizeBytes = MemVT.getStoreSize();
23058 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
23059 bool MadeChange = false;
23060
23061 // Store the constants into memory as one consecutive store.
23062 while (NumConsecutiveStores >= 2) {
23063 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
23064 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
23065 Align FirstStoreAlign = FirstInChain->getAlign();
23066 unsigned LastLegalType = 1;
23067 unsigned LastLegalVectorType = 1;
23068 bool LastIntegerTrunc = false;
23069 bool NonZero = false;
23070 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
23071 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
23072 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
23073 SDValue StoredVal = ST->getValue();
23074 bool IsElementZero = false;
23075 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
23076 IsElementZero = C->isZero();
23077 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
23078 IsElementZero = C->getConstantFPValue()->isNullValue();
23079 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
23080 IsElementZero = true;
23081 if (IsElementZero) {
23082 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
23083 FirstZeroAfterNonZero = i;
23084 }
23085 NonZero |= !IsElementZero;
23086
23087 // Find a legal type for the constant store.
23088 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
23089 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
23090 unsigned IsFast = 0;
23091
23092 // Break early when size is too large to be legal.
23093 if (StoreTy.getSizeInBits() > TLI.getMaximumLegalStoreInBits())
23094 break;
23095
23096 if (TLI.isTypeLegal(StoreTy) &&
23097 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
23098 DAG.getMachineFunction()) &&
23099 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23100 *FirstInChain->getMemOperand(), &IsFast) &&
23101 IsFast) {
23102 LastIntegerTrunc = false;
23103 LastLegalType = i + 1;
23104 // Or check whether a truncstore is legal.
23105 } else if (TLI.getTypeAction(Context, StoreTy) ==
23107 EVT LegalizedStoredValTy =
23108 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
23109 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy,
23110 FirstStoreAlign, FirstStoreAS) &&
23111 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
23112 DAG.getMachineFunction()) &&
23113 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23114 *FirstInChain->getMemOperand(), &IsFast) &&
23115 IsFast) {
23116 LastIntegerTrunc = true;
23117 LastLegalType = i + 1;
23118 }
23119 }
23120
23121 // We only use vectors if the target allows it and the function is not
23122 // marked with the noimplicitfloat attribute.
23123 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
23124 AllowVectors) {
23125 // Find a legal type for the vector store.
23126 unsigned Elts = (i + 1) * NumMemElts;
23127 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
23128 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
23129 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
23130 TLI.allowsMemoryAccess(Context, DL, Ty,
23131 *FirstInChain->getMemOperand(), &IsFast) &&
23132 IsFast)
23133 LastLegalVectorType = i + 1;
23134 }
23135 }
23136
23137 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
23138 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
23139 bool UseTrunc = LastIntegerTrunc && !UseVector;
23140
23141 // Check if we found a legal integer type that creates a meaningful
23142 // merge.
23143 if (NumElem < 2) {
23144 // We know that candidate stores are in order and of correct
23145 // shape. While there is no mergeable sequence from the
23146 // beginning one may start later in the sequence. The only
23147 // reason a merge of size N could have failed where another of
23148 // the same size would not have, is if the alignment has
23149 // improved or we've dropped a non-zero value. Drop as many
23150 // candidates as we can here.
23151 unsigned NumSkip = 1;
23152 while ((NumSkip < NumConsecutiveStores) &&
23153 (NumSkip < FirstZeroAfterNonZero) &&
23154 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
23155 NumSkip++;
23156
23157 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
23158 NumConsecutiveStores -= NumSkip;
23159 continue;
23160 }
23161
23162 // Check that we can merge these candidates without causing a cycle.
23163 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
23164 RootNode)) {
23165 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
23166 NumConsecutiveStores -= NumElem;
23167 continue;
23168 }
23169
23170 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
23171 /*IsConstantSrc*/ true,
23172 UseVector, UseTrunc);
23173
23174 // Remove merged stores for next iteration.
23175 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
23176 NumConsecutiveStores -= NumElem;
23177 }
23178 return MadeChange;
23179}
23180
23181bool DAGCombiner::tryStoreMergeOfExtracts(
23182 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
23183 EVT MemVT, SDNode *RootNode) {
23184 LLVMContext &Context = *DAG.getContext();
23185 const DataLayout &DL = DAG.getDataLayout();
23186 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
23187 bool MadeChange = false;
23188
23189 // Loop on Consecutive Stores on success.
23190 while (NumConsecutiveStores >= 2) {
23191 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
23192 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
23193 Align FirstStoreAlign = FirstInChain->getAlign();
23194 unsigned NumStoresToMerge = 1;
23195 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
23196 // Find a legal type for the vector store.
23197 unsigned Elts = (i + 1) * NumMemElts;
23198 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
23199 unsigned IsFast = 0;
23200
23201 // Break early when size is too large to be legal.
23203 break;
23204
23205 if (TLI.isTypeLegal(Ty) &&
23206 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
23207 TLI.allowsMemoryAccess(Context, DL, Ty,
23208 *FirstInChain->getMemOperand(), &IsFast) &&
23209 IsFast)
23210 NumStoresToMerge = i + 1;
23211 }
23212
23213 // Check if we found a legal integer type creating a meaningful
23214 // merge.
23215 if (NumStoresToMerge < 2) {
23216 // We know that candidate stores are in order and of correct
23217 // shape. While there is no mergeable sequence from the
23218 // beginning one may start later in the sequence. The only
23219 // reason a merge of size N could have failed where another of
23220 // the same size would not have, is if the alignment has
23221 // improved. Drop as many candidates as we can here.
23222 unsigned NumSkip = 1;
23223 while ((NumSkip < NumConsecutiveStores) &&
23224 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
23225 NumSkip++;
23226
23227 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
23228 NumConsecutiveStores -= NumSkip;
23229 continue;
23230 }
23231
23232 // Check that we can merge these candidates without causing a cycle.
23233 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
23234 RootNode)) {
23235 StoreNodes.erase(StoreNodes.begin(),
23236 StoreNodes.begin() + NumStoresToMerge);
23237 NumConsecutiveStores -= NumStoresToMerge;
23238 continue;
23239 }
23240
23241 MadeChange |= mergeStoresOfConstantsOrVecElts(
23242 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
23243 /*UseVector*/ true, /*UseTrunc*/ false);
23244
23245 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
23246 NumConsecutiveStores -= NumStoresToMerge;
23247 }
23248 return MadeChange;
23249}
23250
23251bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
23252 unsigned NumConsecutiveStores, EVT MemVT,
23253 SDNode *RootNode, bool AllowVectors,
23254 bool IsNonTemporalStore,
23255 bool IsNonTemporalLoad) {
23256 LLVMContext &Context = *DAG.getContext();
23257 const DataLayout &DL = DAG.getDataLayout();
23258 int64_t ElementSizeBytes = MemVT.getStoreSize();
23259 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
23260 bool MadeChange = false;
23261
23262 // Look for load nodes which are used by the stored values.
23263 SmallVector<MemOpLink, 8> LoadNodes;
23264
23265 // Find acceptable loads. Loads need to have the same chain (token factor),
23266 // must not be zext, volatile, indexed, and they must be consecutive.
23267 BaseIndexOffset LdBasePtr;
23268
23269 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
23270 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
23272 LoadSDNode *Ld = cast<LoadSDNode>(Val);
23273
23274 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
23275 // If this is not the first ptr that we check.
23276 int64_t LdOffset = 0;
23277 if (LdBasePtr.getBase().getNode()) {
23278 // The base ptr must be the same.
23279 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
23280 break;
23281 } else {
23282 // Check that all other base pointers are the same as this one.
23283 LdBasePtr = LdPtr;
23284 }
23285
23286 // We found a potential memory operand to merge.
23287 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
23288 }
23289
23290 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
23291 Align RequiredAlignment;
23292 bool NeedRotate = false;
23293 if (LoadNodes.size() == 2) {
23294 // If we have load/store pair instructions and we only have two values,
23295 // don't bother merging.
23296 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
23297 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
23298 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
23299 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
23300 break;
23301 }
23302 // If the loads are reversed, see if we can rotate the halves into place.
23303 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
23304 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
23305 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
23306 if (Offset0 - Offset1 == ElementSizeBytes &&
23307 (hasOperation(ISD::ROTL, PairVT) ||
23308 hasOperation(ISD::ROTR, PairVT))) {
23309 std::swap(LoadNodes[0], LoadNodes[1]);
23310 NeedRotate = true;
23311 }
23312 }
23313 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
23314 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
23315 Align FirstStoreAlign = FirstInChain->getAlign();
23316 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
23317
23318 // Scan the memory operations on the chain and find the first
23319 // non-consecutive load memory address. These variables hold the index in
23320 // the store node array.
23321
23322 unsigned LastConsecutiveLoad = 1;
23323
23324 // This variable refers to the size and not index in the array.
23325 unsigned LastLegalVectorType = 1;
23326 unsigned LastLegalIntegerType = 1;
23327 bool isDereferenceable = true;
23328 bool DoIntegerTruncate = false;
23329 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
23330 SDValue LoadChain = FirstLoad->getChain();
23331 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
23332 // All loads must share the same chain.
23333 if (LoadNodes[i].MemNode->getChain() != LoadChain)
23334 break;
23335
23336 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
23337 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
23338 break;
23339 LastConsecutiveLoad = i;
23340
23341 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
23342 isDereferenceable = false;
23343
23344 // Find a legal type for the vector store.
23345 unsigned Elts = (i + 1) * NumMemElts;
23346 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
23347
23348 // Break early when size is too large to be legal.
23349 if (StoreTy.getSizeInBits() > TLI.getMaximumLegalStoreInBits())
23350 break;
23351
23352 unsigned IsFastSt = 0;
23353 unsigned IsFastLd = 0;
23354 // Don't try vector types if we need a rotate. We may still fail the
23355 // legality checks for the integer type, but we can't handle the rotate
23356 // case with vectors.
23357 // FIXME: We could use a shuffle in place of the rotate.
23358 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
23359 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
23360 DAG.getMachineFunction()) &&
23361 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23362 *FirstInChain->getMemOperand(), &IsFastSt) &&
23363 IsFastSt &&
23364 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23365 *FirstLoad->getMemOperand(), &IsFastLd) &&
23366 IsFastLd) {
23367 LastLegalVectorType = i + 1;
23368 }
23369
23370 // Find a legal type for the integer store.
23371 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
23372 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
23373 if (TLI.isTypeLegal(StoreTy) &&
23374 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
23375 DAG.getMachineFunction()) &&
23376 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23377 *FirstInChain->getMemOperand(), &IsFastSt) &&
23378 IsFastSt &&
23379 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23380 *FirstLoad->getMemOperand(), &IsFastLd) &&
23381 IsFastLd) {
23382 LastLegalIntegerType = i + 1;
23383 DoIntegerTruncate = false;
23384 // Or check whether a truncstore and extload is legal.
23385 } else if (TLI.getTypeAction(Context, StoreTy) ==
23387 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
23388 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy,
23389 FirstStoreAlign, FirstStoreAS) &&
23390 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
23391 DAG.getMachineFunction()) &&
23392 TLI.isLoadLegal(LegalizedStoredValTy, StoreTy,
23393 FirstLoad->getAlign(), FirstLoad->getAddressSpace(),
23394 ISD::ZEXTLOAD, false) &&
23395 TLI.isLoadLegal(LegalizedStoredValTy, StoreTy,
23396 FirstLoad->getAlign(), FirstLoad->getAddressSpace(),
23397 ISD::SEXTLOAD, false) &&
23398 TLI.isLoadLegal(LegalizedStoredValTy, StoreTy,
23399 FirstLoad->getAlign(), FirstLoad->getAddressSpace(),
23400 ISD::EXTLOAD, false) &&
23401 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23402 *FirstInChain->getMemOperand(), &IsFastSt) &&
23403 IsFastSt &&
23404 TLI.allowsMemoryAccess(Context, DL, StoreTy,
23405 *FirstLoad->getMemOperand(), &IsFastLd) &&
23406 IsFastLd) {
23407 LastLegalIntegerType = i + 1;
23408 DoIntegerTruncate = true;
23409 }
23410 }
23411 }
23412
23413 // Only use vector types if the vector type is larger than the integer
23414 // type. If they are the same, use integers.
23415 bool UseVectorTy =
23416 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
23417 unsigned LastLegalType =
23418 std::max(LastLegalVectorType, LastLegalIntegerType);
23419
23420 // We add +1 here because the LastXXX variables refer to location while
23421 // the NumElem refers to array/index size.
23422 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
23423 NumElem = std::min(LastLegalType, NumElem);
23424 Align FirstLoadAlign = FirstLoad->getAlign();
23425
23426 if (NumElem < 2) {
23427 // We know that candidate stores are in order and of correct
23428 // shape. While there is no mergeable sequence from the
23429 // beginning one may start later in the sequence. The only
23430 // reason a merge of size N could have failed where another of
23431 // the same size would not have is if the alignment or either
23432 // the load or store has improved. Drop as many candidates as we
23433 // can here.
23434 unsigned NumSkip = 1;
23435 while ((NumSkip < LoadNodes.size()) &&
23436 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
23437 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
23438 NumSkip++;
23439 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
23440 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
23441 NumConsecutiveStores -= NumSkip;
23442 continue;
23443 }
23444
23445 // Check that we can merge these candidates without causing a cycle.
23446 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
23447 RootNode)) {
23448 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
23449 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
23450 NumConsecutiveStores -= NumElem;
23451 continue;
23452 }
23453
23454 // Find if it is better to use vectors or integers to load and store
23455 // to memory.
23456 EVT JointMemOpVT;
23457 if (UseVectorTy) {
23458 // Find a legal type for the vector store.
23459 unsigned Elts = NumElem * NumMemElts;
23460 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
23461 } else {
23462 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
23463 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
23464 }
23465
23466 // Check if there is a call in the load/store chain.
23467 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
23468 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
23469 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
23470 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
23471 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
23472 NumConsecutiveStores -= NumElem;
23473 continue;
23474 }
23475
23476 SDLoc LoadDL(LoadNodes[0].MemNode);
23477 SDLoc StoreDL(StoreNodes[0].MemNode);
23478
23479 // The merged loads are required to have the same incoming chain, so
23480 // using the first's chain is acceptable.
23481
23482 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
23483 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
23484 AddToWorklist(NewStoreChain.getNode());
23485
23486 MachineMemOperand::Flags LdMMOFlags =
23487 isDereferenceable ? MachineMemOperand::MODereferenceable
23489 if (IsNonTemporalLoad)
23491
23492 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
23493
23494 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
23497
23498 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
23499
23500 SDValue NewLoad, NewStore;
23501 if (UseVectorTy || !DoIntegerTruncate) {
23502 NewLoad = DAG.getLoad(
23503 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
23504 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
23505 SDValue StoreOp = NewLoad;
23506 if (NeedRotate) {
23507 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
23508 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
23509 "Unexpected type for rotate-able load pair");
23510 SDValue RotAmt =
23511 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
23512 // Target can convert to the identical ROTR if it does not have ROTL.
23513 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
23514 }
23515 NewStore = DAG.getStore(
23516 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
23517 CanReusePtrInfo ? FirstInChain->getPointerInfo()
23518 : MachinePointerInfo(FirstStoreAS),
23519 FirstStoreAlign, StMMOFlags);
23520 } else { // This must be the truncstore/extload case
23521 EVT ExtendedTy =
23522 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
23523 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
23524 FirstLoad->getChain(), FirstLoad->getBasePtr(),
23525 FirstLoad->getPointerInfo(), JointMemOpVT,
23526 FirstLoadAlign, LdMMOFlags);
23527 NewStore = DAG.getTruncStore(
23528 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
23529 CanReusePtrInfo ? FirstInChain->getPointerInfo()
23530 : MachinePointerInfo(FirstStoreAS),
23531 JointMemOpVT, FirstInChain->getAlign(),
23532 FirstInChain->getMemOperand()->getFlags());
23533 }
23534
23535 // Transfer chain users from old loads to the new load.
23536 for (unsigned i = 0; i < NumElem; ++i) {
23537 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
23539 SDValue(NewLoad.getNode(), 1));
23540 }
23541
23542 // Replace all stores with the new store. Recursively remove corresponding
23543 // values if they are no longer used.
23544 for (unsigned i = 0; i < NumElem; ++i) {
23545 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
23546 CombineTo(StoreNodes[i].MemNode, NewStore);
23547 if (Val->use_empty())
23548 recursivelyDeleteUnusedNodes(Val.getNode());
23549 }
23550
23551 MadeChange = true;
23552 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
23553 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
23554 NumConsecutiveStores -= NumElem;
23555 }
23556 return MadeChange;
23557}
23558
23559bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
23560 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
23561 return false;
23562
23563 // TODO: Extend this function to merge stores of scalable vectors.
23564 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
23565 // store since we know <vscale x 16 x i8> is exactly twice as large as
23566 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
23567 EVT MemVT = St->getMemoryVT();
23568 if (MemVT.isScalableVT())
23569 return false;
23570 if (!MemVT.isSimple() ||
23571 MemVT.getSizeInBits() * 2 > TLI.getMaximumLegalStoreInBits())
23572 return false;
23573
23574 // This function cannot currently deal with non-byte-sized memory sizes.
23575 int64_t ElementSizeBytes = MemVT.getStoreSize();
23576 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
23577 return false;
23578
23579 // Do not bother looking at stored values that are not constants, loads, or
23580 // extracted vector elements.
23581 SDValue StoredVal = peekThroughBitcasts(St->getValue());
23582 const StoreSource StoreSrc = getStoreSource(StoredVal);
23583 if (StoreSrc == StoreSource::Unknown)
23584 return false;
23585
23586 SmallVector<MemOpLink, 8> StoreNodes;
23587 // Find potential store merge candidates by searching through chain sub-DAG
23588 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
23589
23590 // Check if there is anything to merge.
23591 if (StoreNodes.size() < 2)
23592 return false;
23593
23594 // Sort the memory operands according to their distance from the
23595 // base pointer.
23596 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
23597 return LHS.OffsetFromBase < RHS.OffsetFromBase;
23598 });
23599
23600 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
23601 Attribute::NoImplicitFloat);
23602 bool IsNonTemporalStore = St->isNonTemporal();
23603 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
23604 cast<LoadSDNode>(StoredVal)->isNonTemporal();
23605
23606 // Store Merge attempts to merge the lowest stores. This generally
23607 // works out as if successful, as the remaining stores are checked
23608 // after the first collection of stores is merged. However, in the
23609 // case that a non-mergeable store is found first, e.g., {p[-2],
23610 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
23611 // mergeable cases. To prevent this, we prune such stores from the
23612 // front of StoreNodes here.
23613 bool MadeChange = false;
23614 while (StoreNodes.size() > 1) {
23615 unsigned NumConsecutiveStores =
23616 getConsecutiveStores(StoreNodes, ElementSizeBytes);
23617 // There are no more stores in the list to examine.
23618 if (NumConsecutiveStores == 0)
23619 return MadeChange;
23620
23621 // We have at least 2 consecutive stores. Try to merge them.
23622 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
23623 switch (StoreSrc) {
23624 case StoreSource::Constant:
23625 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
23626 MemVT, RootNode, AllowVectors);
23627 break;
23628
23629 case StoreSource::Extract:
23630 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
23631 MemVT, RootNode);
23632 break;
23633
23634 case StoreSource::Load:
23635 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
23636 MemVT, RootNode, AllowVectors,
23637 IsNonTemporalStore, IsNonTemporalLoad);
23638 break;
23639
23640 default:
23641 llvm_unreachable("Unhandled store source type");
23642 }
23643 }
23644
23645 // Remember if we failed to optimize, to save compile time.
23646 if (!MadeChange)
23647 ChainsWithoutMergeableStores.insert(RootNode);
23648
23649 return MadeChange;
23650}
23651
23652SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
23653 SDLoc SL(ST);
23654 SDValue ReplStore;
23655
23656 // Replace the chain to avoid dependency.
23657 if (ST->isTruncatingStore()) {
23658 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
23659 ST->getBasePtr(), ST->getMemoryVT(),
23660 ST->getMemOperand());
23661 } else {
23662 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
23663 ST->getMemOperand());
23664 }
23665
23666 // Create token to keep both nodes around.
23667 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
23668 MVT::Other, ST->getChain(), ReplStore);
23669
23670 // Make sure the new and old chains are cleaned up.
23671 AddToWorklist(Token.getNode());
23672
23673 // Don't add users to work list.
23674 return CombineTo(ST, Token, false);
23675}
23676
23677SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
23678 SDValue Value = ST->getValue();
23679 if (Value.getOpcode() == ISD::TargetConstantFP)
23680 return SDValue();
23681
23682 if (!ISD::isNormalStore(ST))
23683 return SDValue();
23684
23685 SDLoc DL(ST);
23686
23687 SDValue Chain = ST->getChain();
23688 SDValue Ptr = ST->getBasePtr();
23689
23690 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
23691
23692 // NOTE: If the original store is volatile, this transform must not increase
23693 // the number of stores. For example, on x86-32 an f64 can be stored in one
23694 // processor operation but an i64 (which is not legal) requires two. So the
23695 // transform should not be done in this case.
23696
23697 SDValue Tmp;
23698 switch (CFP->getSimpleValueType(0).SimpleTy) {
23699 default:
23700 llvm_unreachable("Unknown FP type");
23701 case MVT::f16: // We don't do this for these yet.
23702 case MVT::bf16:
23703 case MVT::f80:
23704 case MVT::f128:
23705 case MVT::ppcf128:
23706 return SDValue();
23707 case MVT::f32:
23708 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
23709 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
23710 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
23711 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
23712 MVT::i32);
23713 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
23714 }
23715
23716 return SDValue();
23717 case MVT::f64:
23718 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
23719 ST->isSimple()) ||
23720 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
23721 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
23722 getZExtValue(), SDLoc(CFP), MVT::i64);
23723 return DAG.getStore(Chain, DL, Tmp,
23724 Ptr, ST->getMemOperand());
23725 }
23726
23727 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
23728 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
23729 // Many FP stores are not made apparent until after legalize, e.g. for
23730 // argument passing. Since this is so common, custom legalize the
23731 // 64-bit integer store into two 32-bit stores.
23732 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
23733 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
23734 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
23735 if (DAG.getDataLayout().isBigEndian())
23736 std::swap(Lo, Hi);
23737
23738 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23739 AAMDNodes AAInfo = ST->getAAInfo();
23740
23741 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23742 ST->getBaseAlign(), MMOFlags, AAInfo);
23743 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL);
23744 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
23745 ST->getPointerInfo().getWithOffset(4),
23746 ST->getBaseAlign(), MMOFlags, AAInfo);
23747 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
23748 St0, St1);
23749 }
23750
23751 return SDValue();
23752 }
23753}
23754
23755// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
23756//
23757// If a store of a load with an element inserted into it has no other
23758// uses in between the chain, then we can consider the vector store
23759// dead and replace it with just the single scalar element store.
23760SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
23761 SDLoc DL(ST);
23762 SDValue Value = ST->getValue();
23763 SDValue Ptr = ST->getBasePtr();
23764 SDValue Chain = ST->getChain();
23765 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
23766 return SDValue();
23767
23768 SDValue Elt = Value.getOperand(1);
23769 SDValue Idx = Value.getOperand(2);
23770
23771 // If the element isn't byte sized or is implicitly truncated then we can't
23772 // compute an offset.
23773 EVT EltVT = Elt.getValueType();
23774 if (!EltVT.isByteSized() ||
23775 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
23776 return SDValue();
23777
23778 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
23779 if (!Ld || Ld->getBasePtr() != Ptr ||
23780 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
23781 !ISD::isNormalStore(ST) ||
23782 Ld->getAddressSpace() != ST->getAddressSpace() ||
23784 return SDValue();
23785
23786 unsigned IsFast;
23787 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23788 Elt.getValueType(), ST->getAddressSpace(),
23789 ST->getAlign(), ST->getMemOperand()->getFlags(),
23790 &IsFast) ||
23791 !IsFast)
23792 return SDValue();
23793
23794 MachinePointerInfo PointerInfo(ST->getAddressSpace());
23795
23796 // If the offset is a known constant then try to recover the pointer
23797 // info
23798 SDValue NewPtr;
23799 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
23800 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
23801 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
23802 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
23803 } else {
23804 // The original DAG loaded the entire vector from memory, so arithmetic
23805 // within it must be inbounds.
23806 NewPtr = TLI.getInboundsVectorElementPointer(DAG, Ptr, Value.getValueType(),
23807 Idx);
23808 }
23809
23810 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
23811 ST->getMemOperand()->getFlags());
23812}
23813
23814SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
23815 AtomicSDNode *ST = cast<AtomicSDNode>(N);
23816 SDValue Val = ST->getVal();
23817 EVT VT = Val.getValueType();
23818 EVT MemVT = ST->getMemoryVT();
23819
23820 if (MemVT.bitsLT(VT)) { // Is truncating store
23821 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
23822 MemVT.getScalarSizeInBits());
23823 // See if we can simplify the operation with SimplifyDemandedBits, which
23824 // only works if the value has a single use.
23825 if (SimplifyDemandedBits(Val, TruncDemandedBits))
23826 return SDValue(N, 0);
23827 }
23828
23829 return SDValue();
23830}
23831
23833 const SDLoc &Dl) {
23834 if (!Store->isSimple() || !ISD::isNormalStore(Store))
23835 return SDValue();
23836
23837 SDValue StoredVal = Store->getValue();
23838 SDValue StorePtr = Store->getBasePtr();
23839 SDValue StoreOffset = Store->getOffset();
23840 EVT VT = Store->getMemoryVT();
23841
23842 // Skip this combine for non-vector types and for <1 x ty> vectors, as they
23843 // will be scalarized later.
23844 if (!VT.isVector() || VT.isScalableVector() || VT.getVectorNumElements() == 1)
23845 return SDValue();
23846
23847 unsigned AddrSpace = Store->getAddressSpace();
23848 Align Alignment = Store->getAlign();
23849 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23850
23851 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
23852 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
23853 return SDValue();
23854
23855 SDValue Mask, OtherVec, LoadCh;
23856 unsigned LoadPos;
23857 if (sd_match(StoredVal,
23858 m_VSelect(m_Value(Mask), m_Value(OtherVec),
23859 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
23860 m_Specific(StoreOffset))))) {
23861 LoadPos = 2;
23862 } else if (sd_match(StoredVal,
23863 m_VSelect(m_Value(Mask),
23864 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
23865 m_Specific(StoreOffset)),
23866 m_Value(OtherVec)))) {
23867 LoadPos = 1;
23868 } else {
23869 return SDValue();
23870 }
23871
23872 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
23873 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
23874 Load->getAddressSpace() != AddrSpace)
23875 return SDValue();
23876
23877 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
23878 return SDValue();
23879
23880 if (LoadPos == 1)
23881 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
23882
23883 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
23884 StoreOffset, Mask, VT, Store->getMemOperand(),
23885 Store->getAddressingMode());
23886}
23887
23888SDValue DAGCombiner::visitSTORE(SDNode *N) {
23889 StoreSDNode *ST = cast<StoreSDNode>(N);
23890 SDValue Chain = ST->getChain();
23891 SDValue Value = ST->getValue();
23892 SDValue Ptr = ST->getBasePtr();
23893
23894 // If this is a store of a bit convert, store the input value if the
23895 // resultant store does not need a higher alignment than the original.
23896 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
23897 ST->isUnindexed()) {
23898 EVT SVT = Value.getOperand(0).getValueType();
23899 // If the store is volatile, we only want to change the store type if the
23900 // resulting store is legal. Otherwise we might increase the number of
23901 // memory accesses. We don't care if the original type was legal or not
23902 // as we assume software couldn't rely on the number of accesses of an
23903 // illegal type.
23904 // TODO: May be able to relax for unordered atomics (see D66309)
23905 if (((!LegalOperations && ST->isSimple()) ||
23906 TLI.isOperationLegal(ISD::STORE, SVT)) &&
23907 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
23908 DAG, *ST->getMemOperand())) {
23909 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
23910 ST->getMemOperand());
23911 }
23912 }
23913
23914 // Turn 'store undef, Ptr' -> nothing.
23915 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
23916 return Chain;
23917
23918 // Try to infer better alignment information than the store already has.
23919 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
23920 !ST->isAtomic()) {
23921 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
23922 if (*Alignment > ST->getAlign() &&
23923 isAligned(*Alignment, ST->getSrcValueOffset())) {
23924 SDValue NewStore =
23925 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
23926 ST->getMemoryVT(), *Alignment,
23927 ST->getMemOperand()->getFlags(), ST->getAAInfo());
23928 // NewStore will always be N as we are only refining the alignment
23929 assert(NewStore.getNode() == N);
23930 (void)NewStore;
23931 }
23932 }
23933 }
23934
23935 // Try transforming a pair floating point load / store ops to integer
23936 // load / store ops.
23937 if (SDValue NewST = TransformFPLoadStorePair(N))
23938 return NewST;
23939
23940 // Try transforming several stores into STORE (BSWAP).
23941 if (SDValue Store = mergeTruncStores(ST))
23942 return Store;
23943
23944 if (ST->isUnindexed()) {
23945 // Walk up chain skipping non-aliasing memory nodes, on this store and any
23946 // adjacent stores.
23947 if (findBetterNeighborChains(ST)) {
23948 // replaceStoreChain uses CombineTo, which handled all of the worklist
23949 // manipulation. Return the original node to not do anything else.
23950 return SDValue(ST, 0);
23951 }
23952 Chain = ST->getChain();
23953 }
23954
23955 // FIXME: is there such a thing as a truncating indexed store?
23956 if (ST->isTruncatingStore() && ST->isUnindexed() &&
23957 Value.getValueType().isInteger() &&
23959 !cast<ConstantSDNode>(Value)->isOpaque())) {
23960 // Convert a truncating store of a extension into a standard store.
23961 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
23962 Value.getOpcode() == ISD::SIGN_EXTEND ||
23963 Value.getOpcode() == ISD::ANY_EXTEND) &&
23964 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
23965 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
23966 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
23967 ST->getMemOperand());
23968
23969 APInt TruncDemandedBits =
23970 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
23971 ST->getMemoryVT().getScalarSizeInBits());
23972
23973 // See if we can simplify the operation with SimplifyDemandedBits, which
23974 // only works if the value has a single use.
23975 AddToWorklist(Value.getNode());
23976 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
23977 // Re-visit the store if anything changed and the store hasn't been merged
23978 // with another node (N is deleted) SimplifyDemandedBits will add Value's
23979 // node back to the worklist if necessary, but we also need to re-visit
23980 // the Store node itself.
23981 if (N->getOpcode() != ISD::DELETED_NODE)
23982 AddToWorklist(N);
23983 return SDValue(N, 0);
23984 }
23985
23986 // Otherwise, see if we can simplify the input to this truncstore with
23987 // knowledge that only the low bits are being used. For example:
23988 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
23989 if (SDValue Shorter =
23990 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
23991 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
23992 ST->getMemOperand());
23993
23994 // If we're storing a truncated constant, see if we can simplify it.
23995 // TODO: Move this to targetShrinkDemandedConstant?
23996 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
23997 if (!Cst->isOpaque()) {
23998 const APInt &CValue = Cst->getAPIntValue();
23999 APInt NewVal = CValue & TruncDemandedBits;
24000 if (NewVal != CValue) {
24001 SDValue Shorter =
24002 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
24003 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
24004 ST->getMemoryVT(), ST->getMemOperand());
24005 }
24006 }
24007 }
24008
24009 // If this is a load followed by a store to the same location, then the store
24010 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
24011 // TODO: Add big-endian truncate support with test coverage.
24012 // TODO: Can relax for unordered atomics (see D66309)
24013 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
24015 : Value;
24016 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
24017 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
24018 ST->isUnindexed() && ST->isSimple() &&
24019 Ld->getAddressSpace() == ST->getAddressSpace() &&
24020 // There can't be any side effects between the load and store, such as
24021 // a call or store.
24023 // The store is dead, remove it.
24024 return Chain;
24025 }
24026 }
24027
24028 // Try scalarizing vector stores of loads where we only change one element
24029 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
24030 return NewST;
24031
24032 // TODO: Can relax for unordered atomics (see D66309)
24033 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
24034 if (ST->isUnindexed() && ST->isSimple() &&
24035 ST1->isUnindexed() && ST1->isSimple()) {
24036 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
24037 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
24038 ST->getAddressSpace() == ST1->getAddressSpace()) {
24039 // If this is a store followed by a store with the same value to the
24040 // same location, then the store is dead/noop.
24041 return Chain;
24042 }
24043
24044 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
24045 !ST1->getBasePtr().isUndef() &&
24046 ST->getAddressSpace() == ST1->getAddressSpace()) {
24047 // If we consider two stores and one smaller in size is a scalable
24048 // vector type and another one a bigger size store with a fixed type,
24049 // then we could not allow the scalable store removal because we don't
24050 // know its final size in the end.
24051 if (ST->getMemoryVT().isScalableVector() ||
24052 ST1->getMemoryVT().isScalableVector()) {
24053 if (ST1->getBasePtr() == Ptr &&
24054 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
24055 ST->getMemoryVT().getStoreSize())) {
24056 CombineTo(ST1, ST1->getChain());
24057 return SDValue(N, 0);
24058 }
24059 } else {
24060 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
24061 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
24062 // If this is a store who's preceding store to a subset of the current
24063 // location and no one other node is chained to that store we can
24064 // effectively drop the store. Do not remove stores to undef as they
24065 // may be used as data sinks.
24066 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
24067 ChainBase,
24068 ST1->getMemoryVT().getFixedSizeInBits())) {
24069 CombineTo(ST1, ST1->getChain());
24070 return SDValue(N, 0);
24071 }
24072 }
24073 }
24074 }
24075 }
24076
24077 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
24078 // truncating store. We can do this even if this is already a truncstore.
24079 if ((Value.getOpcode() == ISD::FP_ROUND ||
24080 Value.getOpcode() == ISD::TRUNCATE) &&
24081 Value->hasOneUse() && ST->isUnindexed() &&
24082 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
24083 ST->getMemoryVT(), ST->getAlign(),
24084 ST->getAddressSpace(), LegalOperations)) {
24085 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
24086 ST->getMemoryVT(), ST->getMemOperand());
24087 }
24088
24089 // Always perform this optimization before types are legal. If the target
24090 // prefers, also try this after legalization to catch stores that were created
24091 // by intrinsics or other nodes.
24092 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
24093 while (true) {
24094 // There can be multiple store sequences on the same chain.
24095 // Keep trying to merge store sequences until we are unable to do so
24096 // or until we merge the last store on the chain.
24097 bool Changed = mergeConsecutiveStores(ST);
24098 if (!Changed) break;
24099 // Return N as merge only uses CombineTo and no worklist clean
24100 // up is necessary.
24101 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
24102 return SDValue(N, 0);
24103 }
24104 }
24105
24106 // Try transforming N to an indexed store.
24107 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
24108 return SDValue(N, 0);
24109
24110 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
24111 //
24112 // Make sure to do this only after attempting to merge stores in order to
24113 // avoid changing the types of some subset of stores due to visit order,
24114 // preventing their merging.
24115 if (isa<ConstantFPSDNode>(ST->getValue())) {
24116 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
24117 return NewSt;
24118 }
24119
24120 if (SDValue NewSt = splitMergedValStore(ST))
24121 return NewSt;
24122
24123 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
24124 return MaskedStore;
24125
24126 return ReduceLoadOpStoreWidth(N);
24127}
24128
24129SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
24130 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
24131 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
24132
24133 // We walk up the chains to find stores.
24134 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
24135 while (!Chains.empty()) {
24136 SDValue Chain = Chains.pop_back_val();
24137 if (!Chain.hasOneUse())
24138 continue;
24139 switch (Chain.getOpcode()) {
24140 case ISD::TokenFactor:
24141 for (unsigned Nops = Chain.getNumOperands(); Nops;)
24142 Chains.push_back(Chain.getOperand(--Nops));
24143 break;
24145 case ISD::LIFETIME_END:
24146 // We can forward past any lifetime start/end that can be proven not to
24147 // alias the node.
24148 if (!mayAlias(Chain.getNode(), N))
24149 Chains.push_back(Chain.getOperand(0));
24150 break;
24151 case ISD::STORE: {
24152 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
24153 // TODO: Can relax for unordered atomics (see D66309)
24154 if (!ST->isSimple() || ST->isIndexed())
24155 continue;
24156 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
24157 // The bounds of a scalable store are not known until runtime, so this
24158 // store cannot be elided.
24159 if (StoreSize.isScalable())
24160 continue;
24161 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
24162 // If we store purely within object bounds just before its lifetime ends,
24163 // we can remove the store.
24164 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
24165 if (LifetimeEndBase.contains(
24166 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
24167 StoreBase, StoreSize.getFixedValue() * 8)) {
24168 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
24169 dbgs() << "\nwithin LIFETIME_END of : ";
24170 LifetimeEndBase.dump(); dbgs() << "\n");
24171 CombineTo(ST, ST->getChain());
24172 return SDValue(N, 0);
24173 }
24174 }
24175 }
24176 }
24177 return SDValue();
24178}
24179
24180/// For the instruction sequence of store below, F and I values
24181/// are bundled together as an i64 value before being stored into memory.
24182/// Sometimes it is more efficent to generate separate stores for F and I,
24183/// which can remove the bitwise instructions or sink them to colder places.
24184///
24185/// (store (or (zext (bitcast F to i32) to i64),
24186/// (shl (zext I to i64), 32)), addr) -->
24187/// (store F, addr) and (store I, addr+4)
24188///
24189/// Similarly, splitting for other merged store can also be beneficial, like:
24190/// For pair of {i32, i32}, i64 store --> two i32 stores.
24191/// For pair of {i32, i16}, i64 store --> two i32 stores.
24192/// For pair of {i16, i16}, i32 store --> two i16 stores.
24193/// For pair of {i16, i8}, i32 store --> two i16 stores.
24194/// For pair of {i8, i8}, i16 store --> two i8 stores.
24195///
24196/// We allow each target to determine specifically which kind of splitting is
24197/// supported.
24198///
24199/// The store patterns are commonly seen from the simple code snippet below
24200/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
24201/// void goo(const std::pair<int, float> &);
24202/// hoo() {
24203/// ...
24204/// goo(std::make_pair(tmp, ftmp));
24205/// ...
24206/// }
24207///
24208SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
24209 if (OptLevel == CodeGenOptLevel::None)
24210 return SDValue();
24211
24212 // Can't change the number of memory accesses for a volatile store or break
24213 // atomicity for an atomic one.
24214 if (!ST->isSimple())
24215 return SDValue();
24216
24217 SDValue Val = ST->getValue();
24218 SDLoc DL(ST);
24219
24220 // Match OR operand.
24221 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
24222 return SDValue();
24223
24224 // Match SHL operand and get Lower and Higher parts of Val.
24225 SDValue Op1 = Val.getOperand(0);
24226 SDValue Op2 = Val.getOperand(1);
24227 SDValue Lo, Hi;
24228 if (Op1.getOpcode() != ISD::SHL) {
24229 std::swap(Op1, Op2);
24230 if (Op1.getOpcode() != ISD::SHL)
24231 return SDValue();
24232 }
24233 Lo = Op2;
24234 Hi = Op1.getOperand(0);
24235 if (!Op1.hasOneUse())
24236 return SDValue();
24237
24238 // Match shift amount to HalfValBitSize.
24239 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
24240 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
24241 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
24242 return SDValue();
24243
24244 // Lo and Hi are zero-extended from int with size less equal than 32
24245 // to i64.
24246 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
24247 !Lo.getOperand(0).getValueType().isScalarInteger() ||
24248 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
24249 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
24250 !Hi.getOperand(0).getValueType().isScalarInteger() ||
24251 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
24252 return SDValue();
24253
24254 // Use the EVT of low and high parts before bitcast as the input
24255 // of target query.
24256 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
24257 ? Lo.getOperand(0).getValueType()
24258 : Lo.getValueType();
24259 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
24260 ? Hi.getOperand(0).getValueType()
24261 : Hi.getValueType();
24262 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
24263 return SDValue();
24264
24265 // Start to split store.
24266 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
24267 AAMDNodes AAInfo = ST->getAAInfo();
24268
24269 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
24270 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
24271 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
24272 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
24273
24274 SDValue Chain = ST->getChain();
24275 SDValue Ptr = ST->getBasePtr();
24276 // Lower value store.
24277 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
24278 ST->getBaseAlign(), MMOFlags, AAInfo);
24279 Ptr =
24280 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
24281 // Higher value store.
24282 SDValue St1 = DAG.getStore(
24283 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
24284 ST->getBaseAlign(), MMOFlags, AAInfo);
24285 return St1;
24286}
24287
24288// Merge an insertion into an existing shuffle:
24289// (insert_vector_elt (vector_shuffle X, Y, Mask),
24290// .(extract_vector_elt X, N), InsIndex)
24291// --> (vector_shuffle X, Y, NewMask)
24292// and variations where shuffle operands may be CONCAT_VECTORS.
24294 SmallVectorImpl<int> &NewMask, SDValue Elt,
24295 unsigned InsIndex) {
24296 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
24298 return false;
24299
24300 // Vec's operand 0 is using indices from 0 to N-1 and
24301 // operand 1 from N to 2N - 1, where N is the number of
24302 // elements in the vectors.
24303 SDValue InsertVal0 = Elt.getOperand(0);
24304 int ElementOffset = -1;
24305
24306 // We explore the inputs of the shuffle in order to see if we find the
24307 // source of the extract_vector_elt. If so, we can use it to modify the
24308 // shuffle rather than perform an insert_vector_elt.
24310 ArgWorkList.emplace_back(Mask.size(), Y);
24311 ArgWorkList.emplace_back(0, X);
24312
24313 while (!ArgWorkList.empty()) {
24314 int ArgOffset;
24315 SDValue ArgVal;
24316 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
24317
24318 if (ArgVal == InsertVal0) {
24319 ElementOffset = ArgOffset;
24320 break;
24321 }
24322
24323 // Peek through concat_vector.
24324 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
24325 int CurrentArgOffset =
24326 ArgOffset + ArgVal.getValueType().getVectorNumElements();
24327 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
24328 for (SDValue Op : reverse(ArgVal->ops())) {
24329 CurrentArgOffset -= Step;
24330 ArgWorkList.emplace_back(CurrentArgOffset, Op);
24331 }
24332
24333 // Make sure we went through all the elements and did not screw up index
24334 // computation.
24335 assert(CurrentArgOffset == ArgOffset);
24336 }
24337 }
24338
24339 // If we failed to find a match, see if we can replace an UNDEF shuffle
24340 // operand.
24341 if (ElementOffset == -1) {
24342 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
24343 return false;
24344 ElementOffset = Mask.size();
24345 Y = InsertVal0;
24346 }
24347
24348 NewMask.assign(Mask.begin(), Mask.end());
24349 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
24350 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
24351 "NewMask[InsIndex] is out of bound");
24352 return true;
24353}
24354
24355// Merge an insertion into an existing shuffle:
24356// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
24357// InsIndex)
24358// --> (vector_shuffle X, Y) and variations where shuffle operands may be
24359// CONCAT_VECTORS.
24360SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
24361 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
24362 "Expected extract_vector_elt");
24363 SDValue InsertVal = N->getOperand(1);
24364 SDValue Vec = N->getOperand(0);
24365
24366 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
24367 if (!SVN || !Vec.hasOneUse())
24368 return SDValue();
24369
24370 ArrayRef<int> Mask = SVN->getMask();
24371 SDValue X = Vec.getOperand(0);
24372 SDValue Y = Vec.getOperand(1);
24373
24374 SmallVector<int, 16> NewMask(Mask);
24375 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
24376 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
24377 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
24378 if (LegalShuffle)
24379 return LegalShuffle;
24380 }
24381
24382 return SDValue();
24383}
24384
24385// Convert a disguised subvector insertion into a shuffle:
24386// insert_vector_elt V, (bitcast X from vector type), IdxC -->
24387// bitcast(shuffle (bitcast V), (extended X), Mask)
24388// Note: We do not use an insert_subvector node because that requires a
24389// legal subvector type.
24390SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
24391 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
24392 "Expected extract_vector_elt");
24393 SDValue InsertVal = N->getOperand(1);
24394
24395 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
24396 !InsertVal.getOperand(0).getValueType().isVector())
24397 return SDValue();
24398
24399 SDValue SubVec = InsertVal.getOperand(0);
24400 SDValue DestVec = N->getOperand(0);
24401 EVT SubVecVT = SubVec.getValueType();
24402 EVT VT = DestVec.getValueType();
24403 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
24404 // Bail out if the inserted value is larger than the vector element, as
24405 // insert_vector_elt performs an implicit truncation in this case.
24406 if (InsertVal.getValueType() != VT.getVectorElementType())
24407 return SDValue();
24408 // If the source only has a single vector element, the cost of creating adding
24409 // it to a vector is likely to exceed the cost of a insert_vector_elt.
24410 if (NumSrcElts == 1)
24411 return SDValue();
24412 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
24413 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
24414
24415 // Step 1: Create a shuffle mask that implements this insert operation. The
24416 // vector that we are inserting into will be operand 0 of the shuffle, so
24417 // those elements are just 'i'. The inserted subvector is in the first
24418 // positions of operand 1 of the shuffle. Example:
24419 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
24420 SmallVector<int, 16> Mask(NumMaskVals);
24421 for (unsigned i = 0; i != NumMaskVals; ++i) {
24422 if (i / NumSrcElts == InsIndex)
24423 Mask[i] = (i % NumSrcElts) + NumMaskVals;
24424 else
24425 Mask[i] = i;
24426 }
24427
24428 // Bail out if the target can not handle the shuffle we want to create.
24429 EVT SubVecEltVT = SubVecVT.getVectorElementType();
24430 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
24431 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
24432 return SDValue();
24433
24434 // Step 2: Create a wide vector from the inserted source vector by appending
24435 // poison elements. This is the same size as our destination vector.
24436 SDLoc DL(N);
24437 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getPOISON(SubVecVT));
24438 ConcatOps[0] = SubVec;
24439 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
24440
24441 // Step 3: Shuffle in the padded subvector.
24442 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
24443 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
24444 AddToWorklist(PaddedSubV.getNode());
24445 AddToWorklist(DestVecBC.getNode());
24446 AddToWorklist(Shuf.getNode());
24447 return DAG.getBitcast(VT, Shuf);
24448}
24449
24450// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
24451// possible and the new load will be quick. We use more loads but less shuffles
24452// and inserts.
24453SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
24454 EVT VT = N->getValueType(0);
24455
24456 // InsIndex is expected to be the first of last lane.
24457 if (!VT.isFixedLengthVector() ||
24458 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
24459 return SDValue();
24460
24461 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
24462 // depending on the InsIndex.
24463 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
24464 SDValue Scalar = N->getOperand(1);
24465 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
24466 return InsIndex == P.index() || P.value() < 0 ||
24467 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
24468 (InsIndex == VT.getVectorNumElements() - 1 &&
24469 P.value() == (int)P.index() + 1);
24470 }))
24471 return SDValue();
24472
24473 // We optionally skip over an extend so long as both loads are extended in the
24474 // same way from the same type.
24475 unsigned Extend = 0;
24476 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
24477 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
24478 Scalar.getOpcode() == ISD::ANY_EXTEND) {
24479 Extend = Scalar.getOpcode();
24480 Scalar = Scalar.getOperand(0);
24481 }
24482
24483 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
24484 if (!ScalarLoad)
24485 return SDValue();
24486
24487 SDValue Vec = Shuffle->getOperand(0);
24488 if (Extend) {
24489 if (Vec.getOpcode() != Extend)
24490 return SDValue();
24491 Vec = Vec.getOperand(0);
24492 }
24493 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
24494 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
24495 return SDValue();
24496
24497 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
24498 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
24499 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
24500 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
24501 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
24502 return SDValue();
24503
24504 // Check that the offset between the pointers to produce a single continuous
24505 // load.
24506 if (InsIndex == 0) {
24507 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
24508 -1))
24509 return SDValue();
24510 } else {
24512 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
24513 return SDValue();
24514 }
24515
24516 // And that the new unaligned load will be fast.
24517 unsigned IsFast = 0;
24518 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
24519 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
24520 Vec.getValueType(), VecLoad->getAddressSpace(),
24521 NewAlign, VecLoad->getMemOperand()->getFlags(),
24522 &IsFast) ||
24523 !IsFast)
24524 return SDValue();
24525
24526 // Calculate the new Ptr and create the new load.
24527 SDLoc DL(N);
24528 SDValue Ptr = ScalarLoad->getBasePtr();
24529 if (InsIndex != 0)
24530 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
24531 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
24532 MachinePointerInfo PtrInfo =
24533 InsIndex == 0 ? ScalarLoad->getPointerInfo()
24534 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
24535
24536 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
24537 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
24538 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
24539 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
24540 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
24541}
24542
24543SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
24544 SDValue InVec = N->getOperand(0);
24545 SDValue InVal = N->getOperand(1);
24546 SDValue EltNo = N->getOperand(2);
24547 SDLoc DL(N);
24548
24549 EVT VT = InVec.getValueType();
24550 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
24551
24552 // Insert into out-of-bounds element is poison.
24553 if (IndexC && VT.isFixedLengthVector() &&
24554 IndexC->getZExtValue() >= VT.getVectorNumElements())
24555 return DAG.getPOISON(VT);
24556
24557 // Remove redundant insertions:
24558 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
24559 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24560 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
24561 return InVec;
24562
24563 // Remove insert of UNDEF/POISON elements.
24564 if (InVal.isUndef()) {
24565 if (InVal.getOpcode() == ISD::POISON || InVec.getOpcode() == ISD::UNDEF)
24566 return InVec;
24567 return DAG.getFreeze(InVec);
24568 }
24569
24570 if (!IndexC) {
24571 // If this is variable insert to undef vector, it might be better to splat:
24572 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
24573 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
24574 return DAG.getSplat(VT, DL, InVal);
24575
24576 // Extend this type to be byte-addressable
24577 EVT OldVT = VT;
24578 EVT EltVT = VT.getVectorElementType();
24579 bool IsByteSized = EltVT.isByteSized();
24580 if (!IsByteSized) {
24581 EltVT =
24583 VT = VT.changeElementType(*DAG.getContext(), EltVT);
24584 }
24585
24586 // Check if this operation will be handled the default way for its type.
24587 auto IsTypeDefaultHandled = [this](EVT VT) {
24588 return TLI.getTypeAction(*DAG.getContext(), VT) ==
24591 };
24592
24593 // Check if this operation is illegal and will be handled the default way,
24594 // even after extending the type to be byte-addressable.
24595 if (IsTypeDefaultHandled(OldVT) && IsTypeDefaultHandled(VT)) {
24596 // For each dynamic insertelt, the default way will save the vector to
24597 // the stack, store at an offset, and load the modified vector. This can
24598 // dramatically increase code size if we have a chain of insertelts on a
24599 // large vector: requiring O(V*C) stores/loads where V = length of
24600 // vector and C is length of chain. If each insertelt is only fed into the
24601 // next, the vector is write-only across this chain, and we can just
24602 // save once before the chain and load after in O(V + C) operations.
24604 unsigned NumDynamic = 1;
24605 while (true) {
24606 SDValue InVec = Seq.back()->getOperand(0);
24607 if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
24608 break;
24609 Seq.push_back(InVec.getNode());
24610 NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
24611 }
24612
24613 // It always and only makes sense to lower this sequence when we have more
24614 // than one dynamic insertelt, since we will not have more than V constant
24615 // insertelts, so we will be reducing the total number of stores+loads.
24616 if (NumDynamic > 1) {
24617 // In cases where the vector is illegal it will be broken down into
24618 // parts and stored in parts - we should use the alignment for the
24619 // smallest part.
24620 Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
24622 DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
24623 auto &MF = DAG.getMachineFunction();
24624 int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
24625 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
24626
24627 // Save the vector to the stack
24628 SDValue InVec = Seq.back()->getOperand(0);
24629 if (!IsByteSized)
24630 InVec = DAG.getNode(ISD::ANY_EXTEND, DL, VT, InVec);
24631 SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
24632 PtrInfo, SmallestAlign);
24633
24634 // Lower each dynamic insertelt to a store
24635 for (SDNode *N : reverse(Seq)) {
24636 SDValue Elmnt = N->getOperand(1);
24637 SDValue Index = N->getOperand(2);
24638
24639 // Check if we have to extend the element type
24640 if (!IsByteSized && Elmnt.getValueType().bitsLT(EltVT))
24641 Elmnt = DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Elmnt);
24642
24643 // Store the new element. This may be larger than the vector element
24644 // type, so use a truncating store.
24645 SDValue EltPtr =
24646 TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
24647 EVT EltVT = Elmnt.getValueType();
24648 Store = DAG.getTruncStore(
24649 Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
24650 EltVT,
24651 commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
24652 }
24653
24654 // Load the saved vector from the stack
24655 SDValue Load =
24656 DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
24657 SDValue LoadV = Load.getValue(0);
24658 return IsByteSized ? LoadV : DAG.getAnyExtOrTrunc(LoadV, DL, OldVT);
24659 }
24660 }
24661
24662 return SDValue();
24663 }
24664
24665 if (VT.isScalableVector())
24666 return SDValue();
24667
24668 unsigned NumElts = VT.getVectorNumElements();
24669
24670 // We must know which element is being inserted for folds below here.
24671 unsigned Elt = IndexC->getZExtValue();
24672
24673 // Handle <1 x ???> vector insertion special cases.
24674 if (NumElts == 1) {
24675 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
24676 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24677 InVal.getOperand(0).getValueType() == VT &&
24678 isNullConstant(InVal.getOperand(1)))
24679 return InVal.getOperand(0);
24680 }
24681
24682 // Canonicalize insert_vector_elt dag nodes.
24683 // Example:
24684 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
24685 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
24686 //
24687 // Do this only if the child insert_vector node has one use; also
24688 // do this only if indices are both constants and Idx1 < Idx0.
24689 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
24690 && isa<ConstantSDNode>(InVec.getOperand(2))) {
24691 unsigned OtherElt = InVec.getConstantOperandVal(2);
24692 if (Elt < OtherElt) {
24693 // Swap nodes.
24694 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
24695 InVec.getOperand(0), InVal, EltNo);
24696 AddToWorklist(NewOp.getNode());
24697 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
24698 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
24699 }
24700 }
24701
24702 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
24703 return Shuf;
24704
24705 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
24706 return Shuf;
24707
24708 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
24709 return Shuf;
24710
24711 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
24712 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
24713 // vXi1 vector - we don't need to recurse.
24714 if (NumElts == 1)
24715 return DAG.getBuildVector(VT, DL, {InVal});
24716
24717 // If we haven't already collected the element, insert into the op list.
24718 EVT MaxEltVT = InVal.getValueType();
24719 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
24720 unsigned Idx) {
24721 if (!Ops[Idx]) {
24722 Ops[Idx] = Elt;
24723 if (VT.isInteger()) {
24724 EVT EltVT = Elt.getValueType();
24725 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
24726 }
24727 }
24728 };
24729
24730 // Ensure all the operands are the same value type, fill any missing
24731 // operands with UNDEF and create the BUILD_VECTOR.
24732 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
24733 bool FreezeUndef = false) {
24734 assert(Ops.size() == NumElts && "Unexpected vector size");
24735 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
24736 : DAG.getUNDEF(MaxEltVT);
24737 for (SDValue &Op : Ops) {
24738 if (Op)
24739 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
24740 else
24741 Op = UndefOp;
24742 }
24743 return DAG.getBuildVector(VT, DL, Ops);
24744 };
24745
24747 Ops[Elt] = InVal;
24748
24749 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
24750 for (SDValue CurVec = InVec; CurVec;) {
24751 // UNDEF - build new BUILD_VECTOR from already inserted operands.
24752 if (CurVec.isUndef())
24753 return CanonicalizeBuildVector(Ops);
24754
24755 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
24756 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
24757 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
24758
24759 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
24760 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
24761 for (unsigned I = 0; I != NumElts; ++I)
24762 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
24763 return CanonicalizeBuildVector(Ops);
24764 }
24765
24766 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
24767 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
24768 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
24769 return CanonicalizeBuildVector(Ops);
24770 }
24771
24772 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
24773 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
24774 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
24775 if (CurIdx->getAPIntValue().ult(NumElts)) {
24776 unsigned Idx = CurIdx->getZExtValue();
24777 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
24778
24779 // Found entire BUILD_VECTOR.
24780 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
24781 return CanonicalizeBuildVector(Ops);
24782
24783 CurVec = CurVec->getOperand(0);
24784 continue;
24785 }
24786
24787 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
24788 // update the shuffle mask (and second operand if we started with unary
24789 // shuffle) and create a new legal shuffle.
24790 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
24791 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
24792 SDValue LHS = SVN->getOperand(0);
24793 SDValue RHS = SVN->getOperand(1);
24794 SmallVector<int, 16> Mask(SVN->getMask());
24795 bool Merged = true;
24796 for (auto I : enumerate(Ops)) {
24797 SDValue &Op = I.value();
24798 if (Op) {
24799 SmallVector<int, 16> NewMask;
24800 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
24801 Merged = false;
24802 break;
24803 }
24804 Mask = std::move(NewMask);
24805 }
24806 }
24807 if (Merged)
24808 if (SDValue NewShuffle =
24809 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
24810 return NewShuffle;
24811 }
24812
24813 if (!LegalOperations) {
24814 bool IsNull = llvm::isNullConstant(InVal);
24815 // We can convert to AND/OR mask if all insertions are zero or -1
24816 // respectively.
24817 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
24818 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
24819 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
24820 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
24821 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
24823
24824 // Build the mask and return the corresponding DAG node.
24825 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
24826 unsigned MaskOpcode) {
24827 APInt InsertedEltMask = APInt::getZero(NumElts);
24828 for (unsigned I = 0; I != NumElts; ++I) {
24829 Mask[I] = Ops[I] ? TrueVal : FalseVal;
24830 if (Ops[I])
24831 InsertedEltMask.setBit(I);
24832 }
24833 // Make sure to freeze the source vector in case any of the elements
24834 // overwritten by the insert may be poison. Otherwise those elements
24835 // could end up being poison instead of 0/-1 after the AND/OR.
24836 CurVec = DAG.getFreeze(CurVec, InsertedEltMask,
24838 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
24839 DAG.getBuildVector(VT, DL, Mask));
24840 };
24841
24842 // If all elements are zero, we can use AND with all ones.
24843 if (IsNull)
24844 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
24845
24846 // If all elements are -1, we can use OR with zero.
24847 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
24848 }
24849 }
24850
24851 // Failed to find a match in the chain - bail.
24852 break;
24853 }
24854
24855 // See if we can fill in the missing constant elements as zeros.
24856 // TODO: Should we do this for any constant?
24857 APInt DemandedZeroElts = APInt::getZero(NumElts);
24858 for (unsigned I = 0; I != NumElts; ++I)
24859 if (!Ops[I])
24860 DemandedZeroElts.setBit(I);
24861
24862 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
24863 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
24864 : DAG.getConstantFP(0, DL, MaxEltVT);
24865 for (unsigned I = 0; I != NumElts; ++I)
24866 if (!Ops[I])
24867 Ops[I] = Zero;
24868
24869 return CanonicalizeBuildVector(Ops);
24870 }
24871 }
24872
24873 return SDValue();
24874}
24875
24876/// Transform a vector binary operation into a scalar binary operation by moving
24877/// the math/logic after an extract element of a vector.
24879 const SDLoc &DL, bool LegalTypes) {
24880 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24881 SDValue Vec = ExtElt->getOperand(0);
24882 SDValue Index = ExtElt->getOperand(1);
24883 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24884 unsigned Opc = Vec.getOpcode();
24885 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
24886 Vec->getNumValues() != 1)
24887 return SDValue();
24888
24889 // Targets may want to avoid this to prevent an expensive register transfer.
24890 if (!TLI.shouldScalarizeBinop(Vec))
24891 return SDValue();
24892
24893 EVT ResVT = ExtElt->getValueType(0);
24894 if (Opc == ISD::SETCC &&
24895 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
24896 return SDValue();
24897
24898 // Extracting an element of a vector constant is constant-folded, so this
24899 // transform is just replacing a vector op with a scalar op while moving the
24900 // extract.
24901 auto IsExtractFree = [](SDValue Op) {
24902 APInt SplatVal;
24903 return isAnyConstantBuildVector(Op, true) ||
24904 ISD::isConstantSplatVector(Op.getNode(), SplatVal);
24905 };
24906 SDValue Op0 = Vec.getOperand(0);
24907 SDValue Op1 = Vec.getOperand(1);
24908 if (!IsExtractFree(Op0) && !IsExtractFree(Op1))
24909 return SDValue();
24910
24911 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
24912 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
24913 if (Opc == ISD::SETCC) {
24914 EVT OpVT = Op0.getValueType().getVectorElementType();
24915 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
24916 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
24917 SDValue NewVal = DAG.getSetCC(
24918 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
24919 // We may need to sign- or zero-extend the result to match the same
24920 // behaviour as the vector version of SETCC.
24921 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
24922 if (ResVT != MVT::i1 &&
24923 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
24924 VecBoolContents != TLI.getBooleanContents(ResVT)) {
24926 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
24927 DAG.getValueType(MVT::i1));
24928 else
24929 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
24930 }
24931 return NewVal;
24932 }
24933 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
24934 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
24935 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
24936}
24937
24938// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
24939// recursively analyse all of it's users. and try to model themselves as
24940// bit sequence extractions. If all of them agree on the new, narrower element
24941// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
24942// new element type, do so now.
24943// This is mainly useful to recover from legalization that scalarized
24944// the vector as wide elements, but tries to rebuild it with narrower elements.
24945//
24946// Some more nodes could be modelled if that helps cover interesting patterns.
24947bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
24948 SDNode *N) {
24949 // We perform this optimization post type-legalization because
24950 // the type-legalizer often scalarizes integer-promoted vectors.
24951 // Performing this optimization before may cause legalizaton cycles.
24952 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24953 return false;
24954
24955 // TODO: Add support for big-endian.
24956 if (DAG.getDataLayout().isBigEndian())
24957 return false;
24958
24959 SDValue VecOp = N->getOperand(0);
24960 EVT VecVT = VecOp.getValueType();
24961 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
24962
24963 // We must start with a constant extraction index.
24964 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
24965 if (!IndexC)
24966 return false;
24967
24968 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
24969 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
24970
24971 // TODO: deal with the case of implicit anyext of the extraction.
24972 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
24973 EVT ScalarVT = N->getValueType(0);
24974 if (VecVT.getScalarType() != ScalarVT)
24975 return false;
24976
24977 // TODO: deal with the cases other than everything being integer-typed.
24978 if (!ScalarVT.isScalarInteger())
24979 return false;
24980
24981 struct Entry {
24982 SDNode *Producer;
24983
24984 // Which bits of VecOp does it contain?
24985 unsigned BitPos;
24986 int NumBits;
24987 // NOTE: the actual width of \p Producer may be wider than NumBits!
24988
24989 Entry(Entry &&) = default;
24990 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
24991 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
24992
24993 Entry() = delete;
24994 Entry(const Entry &) = delete;
24995 Entry &operator=(const Entry &) = delete;
24996 Entry &operator=(Entry &&) = delete;
24997 };
24998 SmallVector<Entry, 32> Worklist;
25000
25001 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
25002 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
25003 /*NumBits=*/VecEltBitWidth);
25004
25005 while (!Worklist.empty()) {
25006 Entry E = Worklist.pop_back_val();
25007 // Does the node not even use any of the VecOp bits?
25008 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
25009 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
25010 return false; // Let's allow the other combines clean this up first.
25011 // Did we fail to model any of the users of the Producer?
25012 bool ProducerIsLeaf = false;
25013 // Look at each user of this Producer.
25014 for (SDNode *User : E.Producer->users()) {
25015 switch (User->getOpcode()) {
25016 // TODO: support ISD::BITCAST
25017 // TODO: support ISD::ANY_EXTEND
25018 // TODO: support ISD::ZERO_EXTEND
25019 // TODO: support ISD::SIGN_EXTEND
25020 case ISD::TRUNCATE:
25021 // Truncation simply means we keep position, but extract less bits.
25022 Worklist.emplace_back(User, E.BitPos,
25023 /*NumBits=*/User->getValueSizeInBits(0));
25024 break;
25025 // TODO: support ISD::SRA
25026 // TODO: support ISD::SHL
25027 case ISD::SRL:
25028 // We should be shifting the Producer by a constant amount.
25029 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
25030 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
25031 // Logical right-shift means that we start extraction later,
25032 // but stop it at the same position we did previously.
25033 unsigned ShAmt = ShAmtC->getZExtValue();
25034 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
25035 break;
25036 }
25037 [[fallthrough]];
25038 default:
25039 // We can not model this user of the Producer.
25040 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
25041 ProducerIsLeaf = true;
25042 // Profitability check: all users that we can not model
25043 // must be ISD::BUILD_VECTOR's.
25044 if (User->getOpcode() != ISD::BUILD_VECTOR)
25045 return false;
25046 break;
25047 }
25048 }
25049 if (ProducerIsLeaf)
25050 Leafs.emplace_back(std::move(E));
25051 }
25052
25053 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
25054
25055 // If we are still at the same element granularity, give up,
25056 if (NewVecEltBitWidth == VecEltBitWidth)
25057 return false;
25058
25059 // The vector width must be a multiple of the new element width.
25060 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
25061 return false;
25062
25063 // All leafs must agree on the new element width.
25064 // All leafs must not expect any "padding" bits ontop of that width.
25065 // All leafs must start extraction from multiple of that width.
25066 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
25067 return (unsigned)E.NumBits == NewVecEltBitWidth &&
25068 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
25069 E.BitPos % NewVecEltBitWidth == 0;
25070 }))
25071 return false;
25072
25073 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
25074 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
25075 VecVT.getSizeInBits() / NewVecEltBitWidth);
25076
25077 if (LegalTypes &&
25078 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
25079 return false;
25080
25081 if (LegalOperations &&
25082 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
25084 return false;
25085
25086 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
25087 for (const Entry &E : Leafs) {
25088 SDLoc DL(E.Producer);
25089 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
25090 assert(NewIndex < NewVecVT.getVectorNumElements() &&
25091 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
25092 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
25093 DAG.getVectorIdxConstant(NewIndex, DL));
25094 CombineTo(E.Producer, V);
25095 }
25096
25097 return true;
25098}
25099
25100SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
25101 SDValue VecOp = N->getOperand(0);
25102 SDValue Index = N->getOperand(1);
25103 EVT ScalarVT = N->getValueType(0);
25104 EVT VecVT = VecOp.getValueType();
25105 if (VecOp.isUndef())
25106 return DAG.getUNDEF(ScalarVT);
25107
25108 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
25109 //
25110 // This only really matters if the index is non-constant since other combines
25111 // on the constant elements already work.
25112 SDLoc DL(N);
25113 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
25114 Index == VecOp.getOperand(2)) {
25115 SDValue Elt = VecOp.getOperand(1);
25116 AddUsersToWorklist(VecOp.getNode());
25117 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
25118 }
25119
25120 // (vextract (scalar_to_vector val, 0) -> val
25121 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25122 // Only 0'th element of SCALAR_TO_VECTOR is defined.
25123 if (DAG.isKnownNeverZero(Index))
25124 return DAG.getPOISON(ScalarVT);
25125
25126 // Check if the result type doesn't match the inserted element type.
25127 // The inserted element and extracted element may have mismatched bitwidth.
25128 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
25129 SDValue InOp = VecOp.getOperand(0);
25130 if (InOp.getValueType() != ScalarVT) {
25131 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
25132 if (InOp.getValueType().bitsGT(ScalarVT))
25133 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
25134 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
25135 }
25136 return InOp;
25137 }
25138
25139 // extract_vector_elt of out-of-bounds element -> POISON
25140 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
25141 if (IndexC && VecVT.isFixedLengthVector() &&
25142 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
25143 return DAG.getPOISON(ScalarVT);
25144
25145 // extract_vector_elt (build_vector x, y), 1 -> y
25146 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
25147 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
25148 TLI.isTypeLegal(VecVT)) {
25149 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
25150 VecVT.isFixedLengthVector()) &&
25151 "BUILD_VECTOR used for scalable vectors");
25152 unsigned IndexVal =
25153 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
25154 SDValue Elt = VecOp.getOperand(IndexVal);
25155 EVT InEltVT = Elt.getValueType();
25156
25157 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
25158 isNullConstant(Elt)) {
25159 // Sometimes build_vector's scalar input types do not match result type.
25160 if (ScalarVT == InEltVT)
25161 return Elt;
25162
25163 // TODO: It may be useful to truncate if free if the build_vector
25164 // implicitly converts.
25165 }
25166 }
25167
25168 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
25169 return BO;
25170
25171 if (VecVT.isScalableVector())
25172 return SDValue();
25173
25174 // All the code from this point onwards assumes fixed width vectors, but it's
25175 // possible that some of the combinations could be made to work for scalable
25176 // vectors too.
25177 unsigned NumElts = VecVT.getVectorNumElements();
25178 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
25179
25180 // See if the extracted element is constant, in which case fold it if its
25181 // a legal fp immediate.
25182 if (IndexC && ScalarVT.isFloatingPoint()) {
25183 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
25184 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
25185 if (KnownElt.isConstant()) {
25186 APFloat CstFP =
25187 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
25188 if (TLI.isFPImmLegal(CstFP, ScalarVT))
25189 return DAG.getConstantFP(CstFP, DL, ScalarVT);
25190 }
25191 }
25192
25193 // TODO: These transforms should not require the 'hasOneUse' restriction, but
25194 // there are regressions on multiple targets without it. We can end up with a
25195 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
25196 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
25197 VecOp.hasOneUse()) {
25198 // The vector index of the LSBs of the source depend on the endian-ness.
25199 bool IsLE = DAG.getDataLayout().isLittleEndian();
25200 unsigned ExtractIndex = IndexC->getZExtValue();
25201 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
25202 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
25203 SDValue BCSrc = VecOp.getOperand(0);
25204 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
25205 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
25206
25207 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
25208 if (LegalTypes && BCSrc.getValueType().isInteger() &&
25209 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25210 BCSrc.getScalarValueSizeInBits() ==
25212 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
25213 // trunc i64 X to i32
25214 SDValue X = BCSrc.getOperand(0);
25215 EVT XVT = X.getValueType();
25216 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
25217 "Extract element and scalar to vector can't change element type "
25218 "from FP to integer.");
25219 unsigned XBitWidth = X.getValueSizeInBits();
25220 unsigned Scale = XBitWidth / VecEltBitWidth;
25221 BCTruncElt = IsLE ? 0 : Scale - 1;
25222
25223 // An extract element return value type can be wider than its vector
25224 // operand element type. In that case, the high bits are undefined, so
25225 // it's possible that we may need to extend rather than truncate.
25226 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
25227 assert(XBitWidth % VecEltBitWidth == 0 &&
25228 "Scalar bitwidth must be a multiple of vector element bitwidth");
25229
25230 if (ExtractIndex != BCTruncElt) {
25231 unsigned ShiftIndex =
25232 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
25233 X = DAG.getNode(
25234 ISD::SRL, DL, XVT, X,
25235 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
25236 }
25237
25238 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
25239 }
25240 }
25241 }
25242
25243 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
25244 // We only perform this optimization before the op legalization phase because
25245 // we may introduce new vector instructions which are not backed by TD
25246 // patterns. For example on AVX, extracting elements from a wide vector
25247 // without using extract_subvector. However, if we can find an underlying
25248 // scalar value, then we can always use that.
25249 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
25250 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
25251 // Find the new index to extract from.
25252 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
25253
25254 // Extracting an undef index is undef.
25255 if (OrigElt == -1)
25256 return DAG.getUNDEF(ScalarVT);
25257
25258 // Select the right vector half to extract from.
25259 SDValue SVInVec;
25260 if (OrigElt < (int)NumElts) {
25261 SVInVec = VecOp.getOperand(0);
25262 } else {
25263 SVInVec = VecOp.getOperand(1);
25264 OrigElt -= NumElts;
25265 }
25266
25267 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
25268 // TODO: Check if shuffle mask is legal?
25269 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
25270 !VecOp.hasOneUse())
25271 return SDValue();
25272
25273 SDValue InOp = SVInVec.getOperand(OrigElt);
25274 if (InOp.getValueType() != ScalarVT) {
25275 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
25276 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
25277 }
25278
25279 return InOp;
25280 }
25281
25282 // FIXME: We should handle recursing on other vector shuffles and
25283 // scalar_to_vector here as well.
25284
25285 if (!LegalOperations ||
25286 // FIXME: Should really be just isOperationLegalOrCustom.
25289 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
25290 DAG.getVectorIdxConstant(OrigElt, DL));
25291 }
25292 }
25293
25294 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
25295 // simplify it based on the (valid) extraction indices.
25296 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
25297 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
25298 Use->getOperand(0) == VecOp &&
25299 isa<ConstantSDNode>(Use->getOperand(1));
25300 })) {
25301 APInt DemandedElts = APInt::getZero(NumElts);
25302 for (SDNode *User : VecOp->users()) {
25303 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
25304 if (CstElt->getAPIntValue().ult(NumElts))
25305 DemandedElts.setBit(CstElt->getZExtValue());
25306 }
25307 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
25308 // We simplified the vector operand of this extract element. If this
25309 // extract is not dead, visit it again so it is folded properly.
25310 if (N->getOpcode() != ISD::DELETED_NODE)
25311 AddToWorklist(N);
25312 return SDValue(N, 0);
25313 }
25314 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
25315 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
25316 // We simplified the vector operand of this extract element. If this
25317 // extract is not dead, visit it again so it is folded properly.
25318 if (N->getOpcode() != ISD::DELETED_NODE)
25319 AddToWorklist(N);
25320 return SDValue(N, 0);
25321 }
25322 }
25323
25324 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
25325 return SDValue(N, 0);
25326
25327 // Everything under here is trying to match an extract of a loaded value.
25328 // If the result of load has to be truncated, then it's not necessarily
25329 // profitable.
25330 bool BCNumEltsChanged = false;
25331 EVT ExtVT = VecVT.getVectorElementType();
25332 EVT LVT = ExtVT;
25333 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
25334 return SDValue();
25335
25336 if (VecOp.getOpcode() == ISD::BITCAST) {
25337 // Don't duplicate a load with other uses.
25338 if (!VecOp.hasOneUse())
25339 return SDValue();
25340
25341 EVT BCVT = VecOp.getOperand(0).getValueType();
25342 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
25343 return SDValue();
25344 if (NumElts != BCVT.getVectorNumElements())
25345 BCNumEltsChanged = true;
25346 VecOp = VecOp.getOperand(0);
25347 ExtVT = BCVT.getVectorElementType();
25348 }
25349
25350 // extract (vector load $addr), i --> load $addr + i * size
25351 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
25352 ISD::isNormalLoad(VecOp.getNode()) &&
25353 !Index->hasPredecessor(VecOp.getNode())) {
25354 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
25355 if (VecLoad && VecLoad->isSimple()) {
25356 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
25357 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
25358 ++OpsNarrowed;
25359 return Scalarized;
25360 }
25361 }
25362 }
25363
25364 // Perform only after legalization to ensure build_vector / vector_shuffle
25365 // optimizations have already been done.
25366 if (!LegalOperations || !IndexC)
25367 return SDValue();
25368
25369 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
25370 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
25371 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
25372 int Elt = IndexC->getZExtValue();
25373 LoadSDNode *LN0 = nullptr;
25374 if (ISD::isNormalLoad(VecOp.getNode())) {
25375 LN0 = cast<LoadSDNode>(VecOp);
25376 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25377 VecOp.getOperand(0).getValueType() == ExtVT &&
25378 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
25379 // Don't duplicate a load with other uses.
25380 if (!VecOp.hasOneUse())
25381 return SDValue();
25382
25383 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
25384 }
25385 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
25386 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
25387 // =>
25388 // (load $addr+1*size)
25389
25390 // Don't duplicate a load with other uses.
25391 if (!VecOp.hasOneUse())
25392 return SDValue();
25393
25394 // If the bit convert changed the number of elements, it is unsafe
25395 // to examine the mask.
25396 if (BCNumEltsChanged)
25397 return SDValue();
25398
25399 // Select the input vector, guarding against out of range extract vector.
25400 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
25401 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
25402
25403 if (VecOp.getOpcode() == ISD::BITCAST) {
25404 // Don't duplicate a load with other uses.
25405 if (!VecOp.hasOneUse())
25406 return SDValue();
25407
25408 VecOp = VecOp.getOperand(0);
25409 }
25410 if (ISD::isNormalLoad(VecOp.getNode())) {
25411 LN0 = cast<LoadSDNode>(VecOp);
25412 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
25413 Index = DAG.getConstant(Elt, DL, Index.getValueType());
25414 }
25415 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
25416 VecVT.getVectorElementType() == ScalarVT &&
25417 (!LegalTypes ||
25418 TLI.isTypeLegal(
25420 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
25421 // -> extract_vector_elt a, 0
25422 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
25423 // -> extract_vector_elt a, 1
25424 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
25425 // -> extract_vector_elt b, 0
25426 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
25427 // -> extract_vector_elt b, 1
25428 EVT ConcatVT = VecOp.getOperand(0).getValueType();
25429 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
25430 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
25431 Index.getValueType());
25432
25433 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
25435 ConcatVT.getVectorElementType(),
25436 ConcatOp, NewIdx);
25437 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
25438 }
25439
25440 // Make sure we found a non-volatile load and the extractelement is
25441 // the only use.
25442 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
25443 return SDValue();
25444
25445 // If Idx was -1 above, Elt is going to be -1, so just return undef.
25446 if (Elt == -1)
25447 return DAG.getUNDEF(LVT);
25448
25449 if (SDValue Scalarized =
25450 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
25451 ++OpsNarrowed;
25452 return Scalarized;
25453 }
25454
25455 return SDValue();
25456}
25457
25458// Simplify (build_vec (ext )) to (bitcast (build_vec ))
25459SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
25460 // We perform this optimization post type-legalization because
25461 // the type-legalizer often scalarizes integer-promoted vectors.
25462 // Performing this optimization before may create bit-casts which
25463 // will be type-legalized to complex code sequences.
25464 // We perform this optimization only before the operation legalizer because we
25465 // may introduce illegal operations.
25466 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
25467 return SDValue();
25468
25469 unsigned NumInScalars = N->getNumOperands();
25470 SDLoc DL(N);
25471 EVT VT = N->getValueType(0);
25472
25473 // Check to see if this is a BUILD_VECTOR of a bunch of values
25474 // which come from any_extend or zero_extend nodes. If so, we can create
25475 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
25476 // optimizations. We do not handle sign-extend because we can't fill the sign
25477 // using shuffles.
25478 EVT SourceType = MVT::Other;
25479 bool AllAnyExt = true;
25480
25481 for (unsigned i = 0; i != NumInScalars; ++i) {
25482 SDValue In = N->getOperand(i);
25483 // Ignore undef inputs.
25484 if (In.isUndef()) continue;
25485
25486 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
25487 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
25488
25489 // Abort if the element is not an extension.
25490 if (!ZeroExt && !AnyExt) {
25491 SourceType = MVT::Other;
25492 break;
25493 }
25494
25495 // The input is a ZeroExt or AnyExt. Check the original type.
25496 EVT InTy = In.getOperand(0).getValueType();
25497
25498 // Check that all of the widened source types are the same.
25499 if (SourceType == MVT::Other)
25500 // First time.
25501 SourceType = InTy;
25502 else if (InTy != SourceType) {
25503 // Multiple income types. Abort.
25504 SourceType = MVT::Other;
25505 break;
25506 }
25507
25508 // Check if all of the extends are ANY_EXTENDs.
25509 AllAnyExt &= AnyExt;
25510 }
25511
25512 // In order to have valid types, all of the inputs must be extended from the
25513 // same source type and all of the inputs must be any or zero extend.
25514 // Scalar sizes must be a power of two.
25515 EVT OutScalarTy = VT.getScalarType();
25516 bool ValidTypes =
25517 SourceType != MVT::Other &&
25520
25521 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
25522 // turn into a single shuffle instruction.
25523 if (!ValidTypes)
25524 return SDValue();
25525
25526 // If we already have a splat buildvector, then don't fold it if it means
25527 // introducing zeros.
25528 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
25529 return SDValue();
25530
25531 bool isLE = DAG.getDataLayout().isLittleEndian();
25532 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
25533 assert(ElemRatio > 1 && "Invalid element size ratio");
25534 SDValue Filler = AllAnyExt ? DAG.getPOISON(SourceType)
25535 : DAG.getConstant(0, DL, SourceType);
25536
25537 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
25538 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
25539
25540 // Populate the new build_vector
25541 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
25542 SDValue Cast = N->getOperand(i);
25543 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
25544 Cast.getOpcode() == ISD::ZERO_EXTEND ||
25545 Cast.isUndef()) && "Invalid cast opcode");
25546 SDValue In;
25547 if (Cast.isUndef())
25548 In = DAG.getUNDEF(SourceType);
25549 else
25550 In = Cast->getOperand(0);
25551 unsigned Index = isLE ? (i * ElemRatio) :
25552 (i * ElemRatio + (ElemRatio - 1));
25553
25554 assert(Index < Ops.size() && "Invalid index");
25555 Ops[Index] = In;
25556 }
25557
25558 // The type of the new BUILD_VECTOR node.
25559 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
25560 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
25561 "Invalid vector size");
25562 // Check if the new vector type is legal.
25563 if (!isTypeLegal(VecVT) ||
25564 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
25566 return SDValue();
25567
25568 // Make the new BUILD_VECTOR.
25569 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
25570
25571 // The new BUILD_VECTOR node has the potential to be further optimized.
25572 AddToWorklist(BV.getNode());
25573 // Bitcast to the desired type.
25574 return DAG.getBitcast(VT, BV);
25575}
25576
25577// Simplify (build_vec (trunc $1)
25578// (trunc (srl $1 half-width))
25579// (trunc (srl $1 (2 * half-width))))
25580// to (bitcast $1)
25581SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
25582 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
25583
25584 EVT VT = N->getValueType(0);
25585
25586 // Don't run this before LegalizeTypes if VT is legal.
25587 // Targets may have other preferences.
25588 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
25589 return SDValue();
25590
25591 // Only for little endian
25592 if (!DAG.getDataLayout().isLittleEndian())
25593 return SDValue();
25594
25595 EVT OutScalarTy = VT.getScalarType();
25596 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
25597
25598 // Only for power of two types to be sure that bitcast works well
25599 if (!isPowerOf2_64(ScalarTypeBitsize))
25600 return SDValue();
25601
25602 unsigned NumInScalars = N->getNumOperands();
25603
25604 // Look through bitcasts
25605 auto PeekThroughBitcast = [](SDValue Op) {
25606 if (Op.getOpcode() == ISD::BITCAST)
25607 return Op.getOperand(0);
25608 return Op;
25609 };
25610
25611 // The source value where all the parts are extracted.
25612 SDValue Src;
25613 for (unsigned i = 0; i != NumInScalars; ++i) {
25614 SDValue In = PeekThroughBitcast(N->getOperand(i));
25615 // Ignore undef inputs.
25616 if (In.isUndef()) continue;
25617
25618 if (In.getOpcode() != ISD::TRUNCATE)
25619 return SDValue();
25620
25621 In = PeekThroughBitcast(In.getOperand(0));
25622
25623 if (In.getOpcode() != ISD::SRL) {
25624 // For now only build_vec without shuffling, handle shifts here in the
25625 // future.
25626 if (i != 0)
25627 return SDValue();
25628
25629 Src = In;
25630 } else {
25631 // In is SRL
25632 SDValue part = PeekThroughBitcast(In.getOperand(0));
25633
25634 if (!Src) {
25635 Src = part;
25636 } else if (Src != part) {
25637 // Vector parts do not stem from the same variable
25638 return SDValue();
25639 }
25640
25641 SDValue ShiftAmtVal = In.getOperand(1);
25642 if (!isa<ConstantSDNode>(ShiftAmtVal))
25643 return SDValue();
25644
25645 uint64_t ShiftAmt = In.getConstantOperandVal(1);
25646
25647 // The extracted value is not extracted at the right position
25648 if (ShiftAmt != i * ScalarTypeBitsize)
25649 return SDValue();
25650 }
25651 }
25652
25653 // Only cast if the size is the same
25654 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
25655 return SDValue();
25656
25657 return DAG.getBitcast(VT, Src);
25658}
25659
25660SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
25661 ArrayRef<int> VectorMask,
25662 SDValue VecIn1, SDValue VecIn2,
25663 unsigned LeftIdx, bool DidSplitVec) {
25664 EVT VT = N->getValueType(0);
25665 EVT InVT1 = VecIn1.getValueType();
25666 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
25667
25668 unsigned NumElems = VT.getVectorNumElements();
25669 unsigned ShuffleNumElems = NumElems;
25670
25671 // If we artificially split a vector in two already, then the offsets in the
25672 // operands will all be based off of VecIn1, even those in VecIn2.
25673 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
25674
25675 uint64_t VTSize = VT.getFixedSizeInBits();
25676 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
25677 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
25678
25679 assert(InVT2Size <= InVT1Size &&
25680 "Inputs must be sorted to be in non-increasing vector size order.");
25681
25682 // We can't generate a shuffle node with mismatched input and output types.
25683 // Try to make the types match the type of the output.
25684 if (InVT1 != VT || InVT2 != VT) {
25685 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
25686 // If the output vector length is a multiple of both input lengths,
25687 // we can concatenate them and pad the rest with poison.
25688 unsigned NumConcats = VTSize / InVT1Size;
25689 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
25690 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getPOISON(InVT1));
25691 ConcatOps[0] = VecIn1;
25692 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getPOISON(InVT1);
25693 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
25694 VecIn2 = SDValue();
25695 } else if (InVT1Size == VTSize * 2) {
25696 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
25697 return SDValue();
25698
25699 if (!VecIn2.getNode()) {
25700 // If we only have one input vector, and it's twice the size of the
25701 // output, split it in two.
25702 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
25703 DAG.getVectorIdxConstant(NumElems, DL));
25704 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
25705 // Since we now have shorter input vectors, adjust the offset of the
25706 // second vector's start.
25707 Vec2Offset = NumElems;
25708 } else {
25709 assert(InVT2Size <= InVT1Size &&
25710 "Second input is not going to be larger than the first one.");
25711
25712 // VecIn1 is wider than the output, and we have another, possibly
25713 // smaller input. Pad the smaller input with undefs, shuffle at the
25714 // input vector width, and extract the output.
25715 // The shuffle type is different than VT, so check legality again.
25716 if (LegalOperations &&
25718 return SDValue();
25719
25720 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
25721 // lower it back into a BUILD_VECTOR. So if the inserted type is
25722 // illegal, don't even try.
25723 if (InVT1 != InVT2) {
25724 if (!TLI.isTypeLegal(InVT2))
25725 return SDValue();
25726 VecIn2 = DAG.getInsertSubvector(DL, DAG.getPOISON(InVT1), VecIn2, 0);
25727 }
25728 ShuffleNumElems = NumElems * 2;
25729 }
25730 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
25731 SmallVector<SDValue, 2> ConcatOps(2, DAG.getPOISON(InVT2));
25732 ConcatOps[0] = VecIn2;
25733 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
25734 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
25735 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
25736 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
25737 return SDValue();
25738 // If dest vector has less than two elements, then use shuffle and extract
25739 // from larger regs will cost even more.
25740 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
25741 return SDValue();
25742 assert(InVT2Size <= InVT1Size &&
25743 "Second input is not going to be larger than the first one.");
25744
25745 // VecIn1 is wider than the output, and we have another, possibly
25746 // smaller input. Pad the smaller input with undefs, shuffle at the
25747 // input vector width, and extract the output.
25748 // The shuffle type is different than VT, so check legality again.
25749 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
25750 return SDValue();
25751
25752 if (InVT1 != InVT2) {
25753 VecIn2 = DAG.getInsertSubvector(DL, DAG.getPOISON(InVT1), VecIn2, 0);
25754 }
25755 ShuffleNumElems = InVT1Size / VTSize * NumElems;
25756 } else {
25757 // TODO: Support cases where the length mismatch isn't exactly by a
25758 // factor of 2.
25759 // TODO: Move this check upwards, so that if we have bad type
25760 // mismatches, we don't create any DAG nodes.
25761 return SDValue();
25762 }
25763 }
25764
25765 // Initialize mask to undef.
25766 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
25767
25768 // Only need to run up to the number of elements actually used, not the
25769 // total number of elements in the shuffle - if we are shuffling a wider
25770 // vector, the high lanes should be set to undef.
25771 for (unsigned i = 0; i != NumElems; ++i) {
25772 if (VectorMask[i] <= 0)
25773 continue;
25774
25775 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
25776 if (VectorMask[i] == (int)LeftIdx) {
25777 Mask[i] = ExtIndex;
25778 } else if (VectorMask[i] == (int)LeftIdx + 1) {
25779 Mask[i] = Vec2Offset + ExtIndex;
25780 }
25781 }
25782
25783 // The type the input vectors may have changed above.
25784 InVT1 = VecIn1.getValueType();
25785
25786 // If we already have a VecIn2, it should have the same type as VecIn1.
25787 // If we don't, get an poison/zero vector of the appropriate type.
25788 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getPOISON(InVT1);
25789 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
25790
25791 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
25792 if (ShuffleNumElems > NumElems)
25793 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
25794
25795 return Shuffle;
25796}
25797
25799 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
25800
25801 // First, determine where the build vector is not undef.
25802 // TODO: We could extend this to handle zero elements as well as undefs.
25803 int NumBVOps = BV->getNumOperands();
25804 int ZextElt = -1;
25805 for (int i = 0; i != NumBVOps; ++i) {
25806 SDValue Op = BV->getOperand(i);
25807 if (Op.isUndef())
25808 continue;
25809 if (ZextElt == -1)
25810 ZextElt = i;
25811 else
25812 return SDValue();
25813 }
25814 // Bail out if there's no non-undef element.
25815 if (ZextElt == -1)
25816 return SDValue();
25817
25818 // The build vector contains some number of undef elements and exactly
25819 // one other element. That other element must be a zero-extended scalar
25820 // extracted from a vector at a constant index to turn this into a shuffle.
25821 // Also, require that the build vector does not implicitly truncate/extend
25822 // its elements.
25823 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
25824 EVT VT = BV->getValueType(0);
25825 SDValue Zext = BV->getOperand(ZextElt);
25826 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
25830 return SDValue();
25831
25832 // The zero-extend must be a multiple of the source size, and we must be
25833 // building a vector of the same size as the source of the extract element.
25834 SDValue Extract = Zext.getOperand(0);
25835 unsigned DestSize = Zext.getValueSizeInBits();
25836 unsigned SrcSize = Extract.getValueSizeInBits();
25837 if (DestSize % SrcSize != 0 ||
25838 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
25839 return SDValue();
25840
25841 // Create a shuffle mask that will combine the extracted element with zeros
25842 // and undefs.
25843 int ZextRatio = DestSize / SrcSize;
25844 int NumMaskElts = NumBVOps * ZextRatio;
25845 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
25846 for (int i = 0; i != NumMaskElts; ++i) {
25847 if (i / ZextRatio == ZextElt) {
25848 // The low bits of the (potentially translated) extracted element map to
25849 // the source vector. The high bits map to zero. We will use a zero vector
25850 // as the 2nd source operand of the shuffle, so use the 1st element of
25851 // that vector (mask value is number-of-elements) for the high bits.
25852 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
25853 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
25854 : NumMaskElts;
25855 }
25856
25857 // Undef elements of the build vector remain undef because we initialize
25858 // the shuffle mask with -1.
25859 }
25860
25861 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
25862 // bitcast (shuffle V, ZeroVec, VectorMask)
25863 SDLoc DL(BV);
25864 EVT VecVT = Extract.getOperand(0).getValueType();
25865 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
25866 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25867 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
25868 ZeroVec, ShufMask, DAG);
25869 if (!Shuf)
25870 return SDValue();
25871 return DAG.getBitcast(VT, Shuf);
25872}
25873
25874// FIXME: promote to STLExtras.
25875template <typename R, typename T>
25876static auto getFirstIndexOf(R &&Range, const T &Val) {
25877 auto I = find(Range, Val);
25878 if (I == Range.end())
25879 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
25880 return std::distance(Range.begin(), I);
25881}
25882
25883// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
25884// operations. If the types of the vectors we're extracting from allow it,
25885// turn this into a vector_shuffle node.
25886SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
25887 SDLoc DL(N);
25888 EVT VT = N->getValueType(0);
25889
25890 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
25891 if (!isTypeLegal(VT))
25892 return SDValue();
25893
25895 return V;
25896
25897 // May only combine to shuffle after legalize if shuffle is legal.
25898 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
25899 return SDValue();
25900
25901 bool UsesZeroVector = false;
25902 unsigned NumElems = N->getNumOperands();
25903
25904 // Record, for each element of the newly built vector, which input vector
25905 // that element comes from. -1 stands for undef, 0 for the zero vector,
25906 // and positive values for the input vectors.
25907 // VectorMask maps each element to its vector number, and VecIn maps vector
25908 // numbers to their initial SDValues.
25909
25910 SmallVector<int, 8> VectorMask(NumElems, -1);
25912 VecIn.push_back(SDValue());
25913
25914 // If we have a single extract_element with a constant index, track the index
25915 // value.
25916 unsigned OneConstExtractIndex = ~0u;
25917
25918 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
25919 unsigned NumExtracts = 0;
25920
25921 for (unsigned i = 0; i != NumElems; ++i) {
25922 SDValue Op = N->getOperand(i);
25923
25924 if (Op.isUndef())
25925 continue;
25926
25927 // See if we can use a blend with a zero vector.
25928 // TODO: Should we generalize this to a blend with an arbitrary constant
25929 // vector?
25931 UsesZeroVector = true;
25932 VectorMask[i] = 0;
25933 continue;
25934 }
25935
25936 // Not an undef or zero. If the input is something other than an
25937 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
25938 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
25939 return SDValue();
25940
25941 SDValue ExtractedFromVec = Op.getOperand(0);
25942 if (ExtractedFromVec.getValueType().isScalableVector())
25943 return SDValue();
25944 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
25945 if (!ExtractIdx)
25946 return SDValue();
25947
25948 if (ExtractIdx->getAsAPIntVal().uge(
25949 ExtractedFromVec.getValueType().getVectorNumElements()))
25950 return SDValue();
25951
25952 // All inputs must have the same element type as the output.
25953 if (VT.getVectorElementType() !=
25954 ExtractedFromVec.getValueType().getVectorElementType())
25955 return SDValue();
25956
25957 OneConstExtractIndex = ExtractIdx->getZExtValue();
25958 ++NumExtracts;
25959
25960 // Have we seen this input vector before?
25961 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
25962 // a map back from SDValues to numbers isn't worth it.
25963 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
25964 if (Idx == -1) { // A new source vector?
25965 Idx = VecIn.size();
25966 VecIn.push_back(ExtractedFromVec);
25967 }
25968
25969 VectorMask[i] = Idx;
25970 }
25971
25972 // If we didn't find at least one input vector, bail out.
25973 if (VecIn.size() < 2)
25974 return SDValue();
25975
25976 // If all the Operands of BUILD_VECTOR extract from same
25977 // vector, then split the vector efficiently based on the maximum
25978 // vector access index and adjust the VectorMask and
25979 // VecIn accordingly.
25980 bool DidSplitVec = false;
25981 if (VecIn.size() == 2) {
25982 // If we only found a single constant indexed extract_vector_elt feeding the
25983 // build_vector, do not produce a more complicated shuffle if the extract is
25984 // cheap with other constant/undef elements. Skip broadcast patterns with
25985 // multiple uses in the build_vector.
25986
25987 // TODO: This should be more aggressive about skipping the shuffle
25988 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
25989 // index.
25990 if (NumExtracts == 1 &&
25993 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
25994 return SDValue();
25995
25996 unsigned MaxIndex = 0;
25997 unsigned NearestPow2 = 0;
25998 SDValue Vec = VecIn.back();
25999 EVT InVT = Vec.getValueType();
26000 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
26001
26002 for (unsigned i = 0; i < NumElems; i++) {
26003 if (VectorMask[i] <= 0)
26004 continue;
26005 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
26006 IndexVec[i] = Index;
26007 MaxIndex = std::max(MaxIndex, Index);
26008 }
26009
26010 NearestPow2 = PowerOf2Ceil(MaxIndex);
26011 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
26012 NumElems * 2 < NearestPow2) {
26013 unsigned SplitSize = NearestPow2 / 2;
26014 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
26015 InVT.getVectorElementType(), SplitSize);
26016 if (TLI.isTypeLegal(SplitVT) &&
26017 SplitSize + SplitVT.getVectorNumElements() <=
26018 InVT.getVectorNumElements()) {
26019 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
26020 DAG.getVectorIdxConstant(SplitSize, DL));
26021 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
26022 DAG.getVectorIdxConstant(0, DL));
26023 VecIn.pop_back();
26024 VecIn.push_back(VecIn1);
26025 VecIn.push_back(VecIn2);
26026 DidSplitVec = true;
26027
26028 for (unsigned i = 0; i < NumElems; i++) {
26029 if (VectorMask[i] <= 0)
26030 continue;
26031 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
26032 }
26033 }
26034 }
26035 }
26036
26037 // Sort input vectors by decreasing vector element count,
26038 // while preserving the relative order of equally-sized vectors.
26039 // Note that we keep the first "implicit zero vector as-is.
26040 SmallVector<SDValue, 8> SortedVecIn(VecIn);
26041 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
26042 [](const SDValue &a, const SDValue &b) {
26043 return a.getValueType().getVectorNumElements() >
26044 b.getValueType().getVectorNumElements();
26045 });
26046
26047 // We now also need to rebuild the VectorMask, because it referenced element
26048 // order in VecIn, and we just sorted them.
26049 for (int &SourceVectorIndex : VectorMask) {
26050 if (SourceVectorIndex <= 0)
26051 continue;
26052 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
26053 assert(Idx > 0 && Idx < SortedVecIn.size() &&
26054 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
26055 SourceVectorIndex = Idx;
26056 }
26057
26058 VecIn = std::move(SortedVecIn);
26059
26060 // TODO: Should this fire if some of the input vectors has illegal type (like
26061 // it does now), or should we let legalization run its course first?
26062
26063 // Shuffle phase:
26064 // Take pairs of vectors, and shuffle them so that the result has elements
26065 // from these vectors in the correct places.
26066 // For example, given:
26067 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
26068 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
26069 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
26070 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
26071 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
26072 // We will generate:
26073 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
26074 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
26075 SmallVector<SDValue, 4> Shuffles;
26076 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
26077 unsigned LeftIdx = 2 * In + 1;
26078 SDValue VecLeft = VecIn[LeftIdx];
26079 SDValue VecRight =
26080 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
26081
26082 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
26083 VecRight, LeftIdx, DidSplitVec))
26084 Shuffles.push_back(Shuffle);
26085 else
26086 return SDValue();
26087 }
26088
26089 // If we need the zero vector as an "ingredient" in the blend tree, add it
26090 // to the list of shuffles.
26091 if (UsesZeroVector)
26092 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
26093 : DAG.getConstantFP(0.0, DL, VT));
26094
26095 // If we only have one shuffle, we're done.
26096 if (Shuffles.size() == 1)
26097 return Shuffles[0];
26098
26099 // Update the vector mask to point to the post-shuffle vectors.
26100 for (int &Vec : VectorMask)
26101 if (Vec == 0)
26102 Vec = Shuffles.size() - 1;
26103 else
26104 Vec = (Vec - 1) / 2;
26105
26106 // More than one shuffle. Generate a binary tree of blends, e.g. if from
26107 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
26108 // generate:
26109 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
26110 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
26111 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
26112 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
26113 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
26114 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
26115 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
26116
26117 // Make sure the initial size of the shuffle list is even.
26118 if (Shuffles.size() % 2)
26119 Shuffles.push_back(DAG.getPOISON(VT));
26120
26121 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
26122 if (CurSize % 2) {
26123 Shuffles[CurSize] = DAG.getPOISON(VT);
26124 CurSize++;
26125 }
26126 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
26127 int Left = 2 * In;
26128 int Right = 2 * In + 1;
26129 SmallVector<int, 8> Mask(NumElems, -1);
26130 SDValue L = Shuffles[Left];
26131 ArrayRef<int> LMask;
26132 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
26133 L.use_empty() && L.getOperand(1).isUndef() &&
26134 L.getOperand(0).getValueType() == L.getValueType();
26135 if (IsLeftShuffle) {
26136 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
26137 L = L.getOperand(0);
26138 }
26139 SDValue R = Shuffles[Right];
26140 ArrayRef<int> RMask;
26141 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
26142 R.use_empty() && R.getOperand(1).isUndef() &&
26143 R.getOperand(0).getValueType() == R.getValueType();
26144 if (IsRightShuffle) {
26145 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
26146 R = R.getOperand(0);
26147 }
26148 for (unsigned I = 0; I != NumElems; ++I) {
26149 if (VectorMask[I] == Left) {
26150 Mask[I] = I;
26151 if (IsLeftShuffle)
26152 Mask[I] = LMask[I];
26153 VectorMask[I] = In;
26154 } else if (VectorMask[I] == Right) {
26155 Mask[I] = I + NumElems;
26156 if (IsRightShuffle)
26157 Mask[I] = RMask[I] + NumElems;
26158 VectorMask[I] = In;
26159 }
26160 }
26161
26162 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
26163 }
26164 }
26165 return Shuffles[0];
26166}
26167
26168// Try to turn a build vector of zero/sign extends of extract vector elts into
26169// a vector zero/sign extend and possibly an extract subvector.
26170// TODO: Allow undef elements?
26171SDValue DAGCombiner::convertBuildVecExtToExt(SDNode *N) {
26172 if (LegalOperations)
26173 return SDValue();
26174
26175 EVT VT = N->getValueType(0);
26176
26177 bool FoundZeroExtend = false;
26178 bool FoundSignExtend = false;
26179 SDValue Op0 = N->getOperand(0);
26180 auto checkElem = [&](SDValue Op) -> int64_t {
26181 unsigned Opc = Op.getOpcode();
26182 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
26183 FoundSignExtend |= (Opc == ISD::SIGN_EXTEND);
26184 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
26185 Opc == ISD::ANY_EXTEND) &&
26186 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26187 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
26188 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
26189 return C->getZExtValue();
26190 return -1;
26191 };
26192
26193 // Make sure the first element matches
26194 // (zext (extract_vector_elt X, C))
26195 // Offset must be a constant multiple of the
26196 // known-minimum vector length of the result type.
26197 int64_t Offset = checkElem(Op0);
26198 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
26199 return SDValue();
26200
26201 unsigned NumElems = N->getNumOperands();
26202 SDValue In = Op0.getOperand(0).getOperand(0);
26203 EVT InSVT = In.getValueType().getScalarType();
26204 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
26205
26206 // Don't create an illegal input type after type legalization.
26207 if (LegalTypes && !TLI.isTypeLegal(InVT))
26208 return SDValue();
26209
26210 // Ensure all the elements come from the same vector and are adjacent.
26211 for (unsigned i = 1; i != NumElems; ++i) {
26212 if ((Offset + i) != checkElem(N->getOperand(i)))
26213 return SDValue();
26214 }
26215
26216 // Can't mix zero and sign extends in the same build_vector.
26217 if (FoundZeroExtend && FoundSignExtend)
26218 return SDValue();
26219
26220 unsigned ExtOpc = ISD::ANY_EXTEND;
26221 if (FoundSignExtend)
26222 ExtOpc = ISD::SIGN_EXTEND;
26223 else if (FoundZeroExtend)
26224 ExtOpc = ISD::ZERO_EXTEND;
26225
26226 SDLoc DL(N);
26227 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
26228 Op0.getOperand(0).getOperand(1));
26229 return DAG.getNode(ExtOpc, DL, VT, In);
26230}
26231
26232// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
26233// and all other elements being constant zero's, granularize the BUILD_VECTOR's
26234// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
26235// This patten can appear during legalization.
26236//
26237// NOTE: This can be generalized to allow more than a single
26238// non-constant-zero op, UNDEF's, and to be KnownBits-based,
26239SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
26240 // Don't run this after legalization. Targets may have other preferences.
26241 if (Level >= AfterLegalizeDAG)
26242 return SDValue();
26243
26244 // FIXME: support big-endian.
26245 if (DAG.getDataLayout().isBigEndian())
26246 return SDValue();
26247
26248 EVT VT = N->getValueType(0);
26249 EVT OpVT = N->getOperand(0).getValueType();
26250 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
26251
26252 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
26253
26254 if (!TLI.isTypeLegal(OpIntVT) ||
26255 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
26256 return SDValue();
26257
26258 unsigned EltBitwidth = VT.getScalarSizeInBits();
26259 // NOTE: the actual width of operands may be wider than that!
26260
26261 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
26262 // active bits they all have? We'll want to truncate them all to that width.
26263 unsigned ActiveBits = 0;
26264 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
26265 for (auto I : enumerate(N->ops())) {
26266 SDValue Op = I.value();
26267 // FIXME: support UNDEF elements?
26268 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
26269 unsigned OpActiveBits =
26270 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
26271 if (OpActiveBits == 0) {
26272 KnownZeroOps.setBit(I.index());
26273 continue;
26274 }
26275 // Profitability check: don't allow non-zero constant operands.
26276 return SDValue();
26277 }
26278 // Profitability check: there must only be a single non-zero operand,
26279 // and it must be the first operand of the BUILD_VECTOR.
26280 if (I.index() != 0)
26281 return SDValue();
26282 // The operand must be a zero-extension itself.
26283 // FIXME: this could be generalized to known leading zeros check.
26284 if (Op.getOpcode() != ISD::ZERO_EXTEND)
26285 return SDValue();
26286 unsigned CurrActiveBits =
26287 Op.getOperand(0).getValueSizeInBits().getFixedValue();
26288 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
26289 ActiveBits = CurrActiveBits;
26290 // We want to at least halve the element size.
26291 if (2 * ActiveBits > EltBitwidth)
26292 return SDValue();
26293 }
26294
26295 // This BUILD_VECTOR must have at least one non-constant-zero operand.
26296 if (ActiveBits == 0)
26297 return SDValue();
26298
26299 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
26300 // into how many chunks can we split our element width?
26301 EVT NewScalarIntVT, NewIntVT;
26302 std::optional<unsigned> Factor;
26303 // We can split the element into at least two chunks, but not into more
26304 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
26305 // for which the element width is a multiple of it,
26306 // and the resulting types/operations on that chunk width are legal.
26307 assert(2 * ActiveBits <= EltBitwidth &&
26308 "We know that half or less bits of the element are active.");
26309 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
26310 if (EltBitwidth % Scale != 0)
26311 continue;
26312 unsigned ChunkBitwidth = EltBitwidth / Scale;
26313 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
26314 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
26315 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
26316 Scale * N->getNumOperands());
26317 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
26318 (LegalOperations &&
26319 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
26321 continue;
26322 Factor = Scale;
26323 break;
26324 }
26325 if (!Factor)
26326 return SDValue();
26327
26328 SDLoc DL(N);
26329 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
26330
26331 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
26333 NewOps.reserve(NewIntVT.getVectorNumElements());
26334 for (auto I : enumerate(N->ops())) {
26335 SDValue Op = I.value();
26336 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
26337 unsigned SrcOpIdx = I.index();
26338 if (KnownZeroOps[SrcOpIdx]) {
26339 NewOps.append(*Factor, ZeroOp);
26340 continue;
26341 }
26342 Op = DAG.getBitcast(OpIntVT, Op);
26343 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
26344 NewOps.emplace_back(Op);
26345 NewOps.append(*Factor - 1, ZeroOp);
26346 }
26347 assert(NewOps.size() == NewIntVT.getVectorNumElements());
26348 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
26349 NewBV = DAG.getBitcast(VT, NewBV);
26350 return NewBV;
26351}
26352
26353SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
26354 EVT VT = N->getValueType(0);
26355
26356 // A vector built entirely of undefs is undef.
26358 return DAG.getUNDEF(VT);
26359
26360 // If this is a splat of a bitcast from another vector, change to a
26361 // concat_vector.
26362 // For example:
26363 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
26364 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
26365 //
26366 // If X is a build_vector itself, the concat can become a larger build_vector.
26367 // TODO: Maybe this is useful for non-splat too?
26368 if (!LegalOperations) {
26369 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
26370 // Only change build_vector to a concat_vector if the splat value type is
26371 // same as the vector element type.
26372 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
26374 EVT SrcVT = Splat.getValueType();
26375 if (SrcVT.isVector()) {
26376 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
26377 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
26378 SrcVT.getVectorElementType(), NumElts);
26379 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
26380 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
26381 SDValue Concat =
26382 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
26383 return DAG.getBitcast(VT, Concat);
26384 }
26385 }
26386 }
26387 }
26388
26389 // Check if we can express BUILD VECTOR via subvector extract.
26390 if (!LegalTypes && (N->getNumOperands() > 1)) {
26391 SDValue Op0 = N->getOperand(0);
26392 auto checkElem = [&](SDValue Op) -> uint64_t {
26393 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
26394 (Op0.getOperand(0) == Op.getOperand(0)))
26395 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
26396 return CNode->getZExtValue();
26397 return -1;
26398 };
26399
26400 int Offset = checkElem(Op0);
26401 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
26402 if (Offset + i != checkElem(N->getOperand(i))) {
26403 Offset = -1;
26404 break;
26405 }
26406 }
26407
26408 if ((Offset == 0) &&
26409 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
26410 return Op0.getOperand(0);
26411 if ((Offset != -1) &&
26412 ((Offset % N->getValueType(0).getVectorNumElements()) ==
26413 0)) // IDX must be multiple of output size.
26414 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
26415 Op0.getOperand(0), Op0.getOperand(1));
26416 }
26417
26418 if (SDValue V = convertBuildVecExtToExt(N))
26419 return V;
26420
26421 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
26422 return V;
26423
26424 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
26425 return V;
26426
26427 if (SDValue V = reduceBuildVecTruncToBitCast(N))
26428 return V;
26429
26430 if (SDValue V = reduceBuildVecToShuffle(N))
26431 return V;
26432
26433 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
26434 // Do this late as some of the above may replace the splat.
26437 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
26438 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
26439 }
26440
26441 return SDValue();
26442}
26443
26445 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26446 EVT OpVT = N->getOperand(0).getValueType();
26447
26448 // If the operands are legal vectors, leave them alone.
26449 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
26450 return SDValue();
26451
26452 SDLoc DL(N);
26453 EVT VT = N->getValueType(0);
26455 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
26456
26457 // Keep track of what we encounter.
26458 EVT AnyFPVT;
26459
26460 for (const SDValue &Op : N->ops()) {
26461 if (ISD::BITCAST == Op.getOpcode() &&
26462 !Op.getOperand(0).getValueType().isVector())
26463 Ops.push_back(Op.getOperand(0));
26464 else if (Op.isUndef())
26465 Ops.push_back(DAG.getNode(Op.getOpcode(), DL, SVT));
26466 else
26467 return SDValue();
26468
26469 // Note whether we encounter an integer or floating point scalar.
26470 // If it's neither, bail out, it could be something weird like x86mmx.
26471 EVT LastOpVT = Ops.back().getValueType();
26472 if (LastOpVT.isFloatingPoint())
26473 AnyFPVT = LastOpVT;
26474 else if (!LastOpVT.isInteger())
26475 return SDValue();
26476 }
26477
26478 // If any of the operands is a floating point scalar bitcast to a vector,
26479 // use floating point types throughout, and bitcast everything.
26480 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
26481 if (AnyFPVT != EVT()) {
26482 SVT = AnyFPVT;
26483 for (SDValue &Op : Ops) {
26484 if (Op.getValueType() == SVT)
26485 continue;
26486 if (Op.isUndef())
26487 Op = DAG.getNode(Op.getOpcode(), DL, SVT);
26488 else
26489 Op = DAG.getBitcast(SVT, Op);
26490 }
26491 }
26492
26493 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
26494 VT.getSizeInBits() / SVT.getSizeInBits());
26495 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
26496}
26497
26498// Attempt to merge nested concat_vectors/undefs.
26499// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
26500// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
26502 SelectionDAG &DAG) {
26503 EVT VT = N->getValueType(0);
26504
26505 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
26506 EVT SubVT;
26507 SDValue FirstConcat;
26508 for (const SDValue &Op : N->ops()) {
26509 if (Op.isUndef())
26510 continue;
26511 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
26512 return SDValue();
26513 if (!FirstConcat) {
26514 SubVT = Op.getOperand(0).getValueType();
26515 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
26516 return SDValue();
26517 FirstConcat = Op;
26518 continue;
26519 }
26520 if (SubVT != Op.getOperand(0).getValueType())
26521 return SDValue();
26522 }
26523 assert(FirstConcat && "Concat of all-undefs found");
26524
26525 SmallVector<SDValue> ConcatOps;
26526 for (const SDValue &Op : N->ops()) {
26527 if (Op.isUndef()) {
26528 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getPOISON(SubVT));
26529 continue;
26530 }
26531 ConcatOps.append(Op->op_begin(), Op->op_end());
26532 }
26533 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
26534}
26535
26536// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
26537// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
26538// most two distinct vectors the same size as the result, attempt to turn this
26539// into a legal shuffle.
26541 EVT VT = N->getValueType(0);
26542 EVT OpVT = N->getOperand(0).getValueType();
26543
26544 // We currently can't generate an appropriate shuffle for a scalable vector.
26545 if (VT.isScalableVector())
26546 return SDValue();
26547
26548 int NumElts = VT.getVectorNumElements();
26549 int NumOpElts = OpVT.getVectorNumElements();
26550
26551 SDValue SV0 = DAG.getPOISON(VT), SV1 = DAG.getPOISON(VT);
26553
26554 for (SDValue Op : N->ops()) {
26556
26557 // UNDEF nodes convert to UNDEF shuffle mask values.
26558 if (Op.isUndef()) {
26559 Mask.append((unsigned)NumOpElts, -1);
26560 continue;
26561 }
26562
26563 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
26564 return SDValue();
26565
26566 // What vector are we extracting the subvector from and at what index?
26567 SDValue ExtVec = Op.getOperand(0);
26568 int ExtIdx = Op.getConstantOperandVal(1);
26569
26570 // We want the EVT of the original extraction to correctly scale the
26571 // extraction index.
26572 EVT ExtVT = ExtVec.getValueType();
26573 ExtVec = peekThroughBitcasts(ExtVec);
26574
26575 // UNDEF nodes convert to UNDEF shuffle mask values.
26576 if (ExtVec.isUndef()) {
26577 Mask.append((unsigned)NumOpElts, -1);
26578 continue;
26579 }
26580
26581 // Ensure that we are extracting a subvector from a vector the same
26582 // size as the result.
26583 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
26584 return SDValue();
26585
26586 // Scale the subvector index to account for any bitcast.
26587 int NumExtElts = ExtVT.getVectorNumElements();
26588 if (0 == (NumExtElts % NumElts))
26589 ExtIdx /= (NumExtElts / NumElts);
26590 else if (0 == (NumElts % NumExtElts))
26591 ExtIdx *= (NumElts / NumExtElts);
26592 else
26593 return SDValue();
26594
26595 // At most we can reference 2 inputs in the final shuffle.
26596 if (SV0.isUndef() || SV0 == ExtVec) {
26597 SV0 = ExtVec;
26598 for (int i = 0; i != NumOpElts; ++i)
26599 Mask.push_back(i + ExtIdx);
26600 } else if (SV1.isUndef() || SV1 == ExtVec) {
26601 SV1 = ExtVec;
26602 for (int i = 0; i != NumOpElts; ++i)
26603 Mask.push_back(i + ExtIdx + NumElts);
26604 } else {
26605 return SDValue();
26606 }
26607 }
26608
26609 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26610 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
26611 DAG.getBitcast(VT, SV1), Mask, DAG);
26612}
26613
26615 unsigned CastOpcode = N->getOperand(0).getOpcode();
26616 switch (CastOpcode) {
26617 case ISD::SINT_TO_FP:
26618 case ISD::UINT_TO_FP:
26619 case ISD::FP_TO_SINT:
26620 case ISD::FP_TO_UINT:
26621 // TODO: Allow more opcodes?
26622 // case ISD::BITCAST:
26623 // case ISD::TRUNCATE:
26624 // case ISD::ZERO_EXTEND:
26625 // case ISD::SIGN_EXTEND:
26626 // case ISD::FP_EXTEND:
26627 break;
26628 default:
26629 return SDValue();
26630 }
26631
26632 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
26633 if (!SrcVT.isVector())
26634 return SDValue();
26635
26636 // All operands of the concat must be the same kind of cast from the same
26637 // source type.
26639 for (SDValue Op : N->ops()) {
26640 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
26641 Op.getOperand(0).getValueType() != SrcVT)
26642 return SDValue();
26643 SrcOps.push_back(Op.getOperand(0));
26644 }
26645
26646 // The wider cast must be supported by the target. This is unusual because
26647 // the operation support type parameter depends on the opcode. In addition,
26648 // check the other type in the cast to make sure this is really legal.
26649 EVT VT = N->getValueType(0);
26650 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
26651 EVT ConcatSrcVT = SrcVT.changeVectorElementCount(*DAG.getContext(), NumElts);
26652 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26653 switch (CastOpcode) {
26654 case ISD::SINT_TO_FP:
26655 case ISD::UINT_TO_FP:
26656 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
26657 !TLI.isTypeLegal(VT))
26658 return SDValue();
26659 break;
26660 case ISD::FP_TO_SINT:
26661 case ISD::FP_TO_UINT:
26662 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
26663 !TLI.isTypeLegal(ConcatSrcVT))
26664 return SDValue();
26665 break;
26666 default:
26667 llvm_unreachable("Unexpected cast opcode");
26668 }
26669
26670 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
26671 SDLoc DL(N);
26672 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
26673 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
26674}
26675
26676// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
26677// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
26678// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
26680 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26681 bool LegalOperations) {
26682 EVT VT = N->getValueType(0);
26683 EVT OpVT = N->getOperand(0).getValueType();
26684 if (VT.isScalableVector())
26685 return SDValue();
26686
26687 // For now, only allow simple 2-operand concatenations.
26688 if (N->getNumOperands() != 2)
26689 return SDValue();
26690
26691 // Don't create illegal types/shuffles when not allowed to.
26692 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
26693 (LegalOperations &&
26695 return SDValue();
26696
26697 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
26698 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
26699 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
26700 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
26701 // (4) and for now, the SHUFFLE_VECTOR must be unary.
26702 ShuffleVectorSDNode *SVN = nullptr;
26703 for (SDValue Op : N->ops()) {
26704 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
26705 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
26706 all_of(N->ops(), [CurSVN](SDValue Op) {
26707 // FIXME: can we allow UNDEF operands?
26708 return !Op.isUndef() &&
26709 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
26710 })) {
26711 SVN = CurSVN;
26712 break;
26713 }
26714 }
26715 if (!SVN)
26716 return SDValue();
26717
26718 // We are going to pad the shuffle operands, so any indice, that was picking
26719 // from the second operand, must be adjusted.
26720 SmallVector<int, 16> AdjustedMask(SVN->getMask());
26721 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
26722
26723 // Identity masks for the operands of the (padded) shuffle.
26724 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
26725 MutableArrayRef<int> FirstShufOpIdentityMask =
26726 MutableArrayRef<int>(IdentityMask)
26728 MutableArrayRef<int> SecondShufOpIdentityMask =
26730 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
26731 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
26733
26734 // New combined shuffle mask.
26736 Mask.reserve(VT.getVectorNumElements());
26737 for (SDValue Op : N->ops()) {
26738 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
26739 if (Op.getNode() == SVN) {
26740 append_range(Mask, AdjustedMask);
26741 continue;
26742 }
26743 if (Op == SVN->getOperand(0)) {
26744 append_range(Mask, FirstShufOpIdentityMask);
26745 continue;
26746 }
26747 if (Op == SVN->getOperand(1)) {
26748 append_range(Mask, SecondShufOpIdentityMask);
26749 continue;
26750 }
26751 llvm_unreachable("Unexpected operand!");
26752 }
26753
26754 // Don't create illegal shuffle masks.
26755 if (!TLI.isShuffleMaskLegal(Mask, VT))
26756 return SDValue();
26757
26758 // Pad the shuffle operands with poison.
26759 SDLoc dl(N);
26760 std::array<SDValue, 2> ShufOps;
26761 for (auto I : zip(SVN->ops(), ShufOps)) {
26762 SDValue ShufOp = std::get<0>(I);
26763 SDValue &NewShufOp = std::get<1>(I);
26764 if (ShufOp.isUndef())
26765 NewShufOp = DAG.getPOISON(VT);
26766 else {
26767 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
26768 DAG.getPOISON(OpVT));
26769 ShufOpParts[0] = ShufOp;
26770 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
26771 }
26772 }
26773 // Finally, create the new wide shuffle.
26774 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
26775}
26776
26778 const TargetLowering &TLI,
26779 bool LegalTypes,
26780 bool LegalOperations) {
26781 EVT VT = N->getValueType(0);
26782
26783 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
26784 // the type and operation is legal. The Hexagon target has custom
26785 // legalization for SPLAT_VECTOR that splits the operation into two parts and
26786 // concatenates them. Therefore, custom lowering must also be rejected in
26787 // order to avoid an infinite loop.
26788 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
26789 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
26790 return SDValue();
26791
26792 SDValue Op0 = N->getOperand(0);
26793 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
26794 return SDValue();
26795
26796 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
26797}
26798
26799SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
26800 // If we only have one input vector, we don't need to do any concatenation.
26801 if (N->getNumOperands() == 1)
26802 return N->getOperand(0);
26803
26804 // Check if all of the operands are undefs.
26805 EVT VT = N->getValueType(0);
26807 return DAG.getUNDEF(VT);
26808
26809 // Optimize concat_vectors where all but the first of the vectors are undef.
26810 if (all_of(drop_begin(N->ops()),
26811 [](const SDValue &Op) { return Op.isUndef(); })) {
26812 SDValue In = N->getOperand(0);
26813 assert(In.getValueType().isVector() && "Must concat vectors");
26814
26815 // If the input is a concat_vectors, just make a larger concat by padding
26816 // with smaller undefs.
26817 //
26818 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
26819 // here could cause an infinite loop. That legalizing happens when LegalDAG
26820 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
26821 // scalable.
26822 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
26823 !(LegalDAG && In.getValueType().isScalableVector())) {
26824 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
26826 Ops.resize(NumOps, DAG.getPOISON(Ops[0].getValueType()));
26827 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26828 }
26829
26831
26832 // concat_vectors(scalar_to_vector(scalar), undef) ->
26833 // scalar_to_vector(scalar)
26834 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
26835 Scalar.hasOneUse()) {
26836 EVT SVT = Scalar.getValueType().getVectorElementType();
26837 if (SVT == Scalar.getOperand(0).getValueType())
26838 Scalar = Scalar.getOperand(0);
26839 }
26840
26841 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
26842 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
26843 // If the bitcast type isn't legal, it might be a trunc of a legal type;
26844 // look through the trunc so we can still do the transform:
26845 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
26846 // However, this is only equivalent on little-endian targets.
26847 if (Scalar->getOpcode() == ISD::TRUNCATE &&
26848 !TLI.isTypeLegal(Scalar.getValueType()) &&
26849 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()) &&
26851 Scalar = Scalar->getOperand(0);
26852
26853 EVT SclTy = Scalar.getValueType();
26854
26855 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
26856 return SDValue();
26857
26858 // Bail out if the vector size is not a multiple of the scalar size.
26859 if (VT.getSizeInBits() % SclTy.getSizeInBits())
26860 return SDValue();
26861
26862 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
26863 if (VNTNumElms < 2)
26864 return SDValue();
26865
26866 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
26867 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
26868 return SDValue();
26869
26870 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
26871 return DAG.getBitcast(VT, Res);
26872 }
26873 }
26874
26875 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
26876 // We have already tested above for an UNDEF only concatenation.
26877 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
26878 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
26879 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
26880 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
26881 };
26882 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
26884 EVT SVT = VT.getScalarType();
26885
26886 EVT MinVT = SVT;
26887 if (!SVT.isFloatingPoint()) {
26888 // If BUILD_VECTOR are from built from integer, they may have different
26889 // operand types. Get the smallest type and truncate all operands to it.
26890 bool FoundMinVT = false;
26891 for (const SDValue &Op : N->ops())
26892 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
26893 EVT OpSVT = Op.getOperand(0).getValueType();
26894 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
26895 FoundMinVT = true;
26896 }
26897 assert(FoundMinVT && "Concat vector type mismatch");
26898 }
26899
26900 for (const SDValue &Op : N->ops()) {
26901 EVT OpVT = Op.getValueType();
26902 unsigned NumElts = OpVT.getVectorNumElements();
26903
26904 if (Op.isUndef())
26905 Opnds.append(NumElts, DAG.getPOISON(MinVT));
26906
26907 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
26908 if (SVT.isFloatingPoint()) {
26909 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
26910 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
26911 } else {
26912 for (unsigned i = 0; i != NumElts; ++i)
26913 Opnds.push_back(
26914 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
26915 }
26916 }
26917 }
26918
26919 assert(VT.getVectorNumElements() == Opnds.size() &&
26920 "Concat vector type mismatch");
26921 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
26922 }
26923
26924 if (SDValue V =
26925 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
26926 return V;
26927
26928 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
26929 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
26931 return V;
26932
26933 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
26934 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
26936 return V;
26937
26938 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
26940 return V;
26941 }
26942
26943 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
26944 return V;
26945
26947 N, DAG, TLI, LegalTypes, LegalOperations))
26948 return V;
26949
26950 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
26951 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
26952 // operands and look for a CONCAT operations that place the incoming vectors
26953 // at the exact same location.
26954 //
26955 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
26956 SDValue SingleSource = SDValue();
26957 unsigned PartNumElem =
26958 N->getOperand(0).getValueType().getVectorMinNumElements();
26959
26960 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
26961 SDValue Op = N->getOperand(i);
26962
26963 if (Op.isUndef())
26964 continue;
26965
26966 // Check if this is the identity extract:
26967 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
26968 return SDValue();
26969
26970 // Find the single incoming vector for the extract_subvector.
26971 if (SingleSource.getNode()) {
26972 if (Op.getOperand(0) != SingleSource)
26973 return SDValue();
26974 } else {
26975 SingleSource = Op.getOperand(0);
26976
26977 // Check the source type is the same as the type of the result.
26978 // If not, this concat may extend the vector, so we can not
26979 // optimize it away.
26980 if (SingleSource.getValueType() != N->getValueType(0))
26981 return SDValue();
26982 }
26983
26984 // Check that we are reading from the identity index.
26985 unsigned IdentityIndex = i * PartNumElem;
26986 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
26987 return SDValue();
26988 }
26989
26990 if (SingleSource.getNode())
26991 return SingleSource;
26992
26993 return SDValue();
26994}
26995
26996SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
26997 // Check to see if all operands are identical.
26998 if (!llvm::all_equal(N->op_values()))
26999 return SDValue();
27000
27001 // Check to see if the identical operand is a splat.
27002 if (!DAG.isSplatValue(N->getOperand(0)))
27003 return SDValue();
27004
27005 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
27007 Ops.append(N->op_values().begin(), N->op_values().end());
27008 return CombineTo(N, &Ops);
27009}
27010
27011// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
27012// if the subvector can be sourced for free.
27013static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
27014 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
27015 V.getOperand(1).getValueType() == SubVT &&
27016 V.getConstantOperandAPInt(2) == Index) {
27017 return V.getOperand(1);
27018 }
27019 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
27020 V.getOperand(0).getValueType() == SubVT &&
27021 (Index % SubVT.getVectorMinNumElements()) == 0) {
27022 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
27023 return V.getOperand(SubIdx);
27024 }
27025 return SDValue();
27026}
27027
27029 unsigned Index, const SDLoc &DL,
27030 SelectionDAG &DAG,
27031 bool LegalOperations) {
27032 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27033 unsigned BinOpcode = BinOp.getOpcode();
27034 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
27035 return SDValue();
27036
27037 EVT VecVT = BinOp.getValueType();
27038 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
27039 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
27040 return SDValue();
27041 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
27042 return SDValue();
27043
27044 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
27045 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
27046
27047 // TODO: We could handle the case where only 1 operand is being inserted by
27048 // creating an extract of the other operand, but that requires checking
27049 // number of uses and/or costs.
27050 if (!Sub0 || !Sub1)
27051 return SDValue();
27052
27053 // We are inserting both operands of the wide binop only to extract back
27054 // to the narrow vector size. Eliminate all of the insert/extract:
27055 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
27056 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
27057}
27058
27059/// If we are extracting a subvector produced by a wide binary operator try
27060/// to use a narrow binary operator and/or avoid concatenation and extraction.
27061static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
27062 const SDLoc &DL, SelectionDAG &DAG,
27063 bool LegalOperations) {
27064 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
27065 // some of these bailouts with other transforms.
27066
27067 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
27068 LegalOperations))
27069 return V;
27070
27071 // We are looking for an optionally bitcasted wide vector binary operator
27072 // feeding an extract subvector.
27073 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27074 SDValue BinOp = peekThroughBitcasts(Src);
27075 unsigned BOpcode = BinOp.getOpcode();
27076 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
27077 return SDValue();
27078
27079 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
27080 // reduced to the unary fneg when it is visited, and we probably want to deal
27081 // with fneg in a target-specific way.
27082 if (BOpcode == ISD::FSUB) {
27083 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
27084 if (C && C->getValueAPF().isNegZero())
27085 return SDValue();
27086 }
27087
27088 // The binop must be a vector type, so we can extract some fraction of it.
27089 EVT WideBVT = BinOp.getValueType();
27090 // The optimisations below currently assume we are dealing with fixed length
27091 // vectors. It is possible to add support for scalable vectors, but at the
27092 // moment we've done no analysis to prove whether they are profitable or not.
27093 if (!WideBVT.isFixedLengthVector())
27094 return SDValue();
27095
27096 assert((Index % VT.getVectorNumElements()) == 0 &&
27097 "Extract index is not a multiple of the vector length.");
27098
27099 // Bail out if this is not a proper multiple width extraction.
27100 unsigned WideWidth = WideBVT.getSizeInBits();
27101 unsigned NarrowWidth = VT.getSizeInBits();
27102 if (WideWidth % NarrowWidth != 0)
27103 return SDValue();
27104
27105 // Bail out if we are extracting a fraction of a single operation. This can
27106 // occur because we potentially looked through a bitcast of the binop.
27107 unsigned NarrowingRatio = WideWidth / NarrowWidth;
27108 unsigned WideNumElts = WideBVT.getVectorNumElements();
27109 if (WideNumElts % NarrowingRatio != 0)
27110 return SDValue();
27111
27112 // Bail out if the target does not support a narrower version of the binop.
27113 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
27114 WideNumElts / NarrowingRatio);
27115 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
27116 LegalOperations))
27117 return SDValue();
27118
27119 // If extraction is cheap, we don't need to look at the binop operands
27120 // for concat ops. The narrow binop alone makes this transform profitable.
27121 // We can't just reuse the original extract index operand because we may have
27122 // bitcasted.
27123 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
27124 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
27125 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
27126 BinOp.hasOneUse() && Src->hasOneUse()) {
27127 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
27128 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
27129 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
27130 BinOp.getOperand(0), NewExtIndex);
27131 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
27132 BinOp.getOperand(1), NewExtIndex);
27133 SDValue NarrowBinOp =
27134 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
27135 return DAG.getBitcast(VT, NarrowBinOp);
27136 }
27137
27138 // Only handle the case where we are doubling and then halving. A larger ratio
27139 // may require more than two narrow binops to replace the wide binop.
27140 if (NarrowingRatio != 2)
27141 return SDValue();
27142
27143 // TODO: The motivating case for this transform is an x86 AVX1 target. That
27144 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
27145 // flavors, but no other 256-bit integer support. This could be extended to
27146 // handle any binop, but that may require fixing/adding other folds to avoid
27147 // codegen regressions.
27148 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
27149 return SDValue();
27150
27151 // We need at least one concatenation operation of a binop operand to make
27152 // this transform worthwhile. The concat must double the input vector sizes.
27153 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
27154 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
27155 return V.getOperand(ConcatOpNum);
27156 return SDValue();
27157 };
27158 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
27159 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
27160
27161 if (SubVecL || SubVecR) {
27162 // If a binop operand was not the result of a concat, we must extract a
27163 // half-sized operand for our new narrow binop:
27164 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
27165 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
27166 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
27167 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
27168 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
27169 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
27170 BinOp.getOperand(0), IndexC);
27171
27172 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
27173 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
27174 BinOp.getOperand(1), IndexC);
27175
27176 SDValue NarrowBinOp =
27177 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
27178 return DAG.getBitcast(VT, NarrowBinOp);
27179 }
27180
27181 return SDValue();
27182}
27183
27184/// If we are extracting a subvector from a wide vector load, convert to a
27185/// narrow load to eliminate the extraction:
27186/// (extract_subvector (load wide vector)) --> (load narrow vector)
27187static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
27188 const SDLoc &DL, SelectionDAG &DAG) {
27189 // TODO: Add support for big-endian. The offset calculation must be adjusted.
27190 if (DAG.getDataLayout().isBigEndian())
27191 return SDValue();
27192
27193 auto *Ld = dyn_cast<LoadSDNode>(Src);
27194 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
27195 return SDValue();
27196
27197 // We can only create byte sized loads.
27198 if (!VT.isByteSized())
27199 return SDValue();
27200
27201 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27203 return SDValue();
27204
27205 unsigned NumElts = VT.getVectorMinNumElements();
27206 // A fixed length vector being extracted from a scalable vector
27207 // may not be any *smaller* than the scalable one.
27208 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
27209 return SDValue();
27210
27211 // The definition of EXTRACT_SUBVECTOR states that the index must be a
27212 // multiple of the minimum number of elements in the result type.
27213 assert(Index % NumElts == 0 && "The extract subvector index is not a "
27214 "multiple of the result's element count");
27215
27216 // It's fine to use TypeSize here as we know the offset will not be negative.
27217 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
27218 std::optional<unsigned> ByteOffset;
27219 if (Offset.isFixed())
27220 ByteOffset = Offset.getFixedValue();
27221
27222 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
27223 return SDValue();
27224
27225 // The narrow load will be offset from the base address of the old load if
27226 // we are extracting from something besides index 0 (little-endian).
27227 // TODO: Use "BaseIndexOffset" to make this more effective.
27228 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
27229
27231 MachineMemOperand *MMO;
27232 if (Offset.isScalable()) {
27233 MachinePointerInfo MPI =
27235 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
27236 } else
27237 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
27238 VT.getStoreSize());
27239
27240 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
27241 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
27242 return NewLd;
27243}
27244
27245/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
27246/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
27247/// EXTRACT_SUBVECTOR(Op?, ?),
27248/// Mask'))
27249/// iff it is legal and profitable to do so. Notably, the trimmed mask
27250/// (containing only the elements that are extracted)
27251/// must reference at most two subvectors.
27253 unsigned Index,
27254 const SDLoc &DL,
27255 SelectionDAG &DAG,
27256 bool LegalOperations) {
27257 // Only deal with non-scalable vectors.
27258 EVT WideVT = Src.getValueType();
27259 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
27260 return SDValue();
27261
27262 // The operand must be a shufflevector.
27263 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
27264 if (!WideShuffleVector)
27265 return SDValue();
27266
27267 // The old shuffleneeds to go away.
27268 if (!WideShuffleVector->hasOneUse())
27269 return SDValue();
27270
27271 // And the narrow shufflevector that we'll form must be legal.
27272 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27273 if (LegalOperations &&
27275 return SDValue();
27276
27277 int NumEltsExtracted = NarrowVT.getVectorNumElements();
27278 assert((Index % NumEltsExtracted) == 0 &&
27279 "Extract index is not a multiple of the output vector length.");
27280
27281 int WideNumElts = WideVT.getVectorNumElements();
27282
27283 SmallVector<int, 16> NewMask;
27284 NewMask.reserve(NumEltsExtracted);
27285 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
27286 DemandedSubvectors;
27287
27288 // Try to decode the wide mask into narrow mask from at most two subvectors.
27289 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
27290 assert((M >= -1) && (M < (2 * WideNumElts)) &&
27291 "Out-of-bounds shuffle mask?");
27292
27293 if (M < 0) {
27294 // Does not depend on operands, does not require adjustment.
27295 NewMask.emplace_back(M);
27296 continue;
27297 }
27298
27299 // From which operand of the shuffle does this shuffle mask element pick?
27300 int WideShufOpIdx = M / WideNumElts;
27301 // Which element of that operand is picked?
27302 int OpEltIdx = M % WideNumElts;
27303
27304 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
27305 "Shuffle mask vector decomposition failure.");
27306
27307 // And which NumEltsExtracted-sized subvector of that operand is that?
27308 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
27309 // And which element within that subvector of that operand is that?
27310 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
27311
27312 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
27313 "Shuffle mask subvector decomposition failure.");
27314
27315 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
27316 WideShufOpIdx * WideNumElts) == M &&
27317 "Shuffle mask full decomposition failure.");
27318
27319 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
27320
27321 if (Op.isUndef()) {
27322 // Picking from an undef operand. Let's adjust mask instead.
27323 NewMask.emplace_back(-1);
27324 continue;
27325 }
27326
27327 const std::pair<SDValue, int> DemandedSubvector =
27328 std::make_pair(Op, OpSubvecIdx);
27329
27330 if (DemandedSubvectors.insert(DemandedSubvector)) {
27331 if (DemandedSubvectors.size() > 2)
27332 return SDValue(); // We can't handle more than two subvectors.
27333 // How many elements into the WideVT does this subvector start?
27334 int Index = NumEltsExtracted * OpSubvecIdx;
27335 // Bail out if the extraction isn't going to be cheap.
27336 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
27337 return SDValue();
27338 }
27339
27340 // Ok, but from which operand of the new shuffle will this element pick?
27341 int NewOpIdx =
27342 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
27343 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
27344
27345 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
27346 NewMask.emplace_back(AdjM);
27347 }
27348 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
27349 assert(DemandedSubvectors.size() <= 2 &&
27350 "Should have ended up demanding at most two subvectors.");
27351
27352 // Did we discover that the shuffle does not actually depend on operands?
27353 if (DemandedSubvectors.empty())
27354 return DAG.getPOISON(NarrowVT);
27355
27356 // Profitability check: only deal with extractions from the first subvector
27357 // unless the mask becomes an identity mask.
27358 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
27359 any_of(NewMask, [](int M) { return M < 0; }))
27360 for (auto &DemandedSubvector : DemandedSubvectors)
27361 if (DemandedSubvector.second != 0)
27362 return SDValue();
27363
27364 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
27365 // operand[s]/index[es], so there is no point in checking for it's legality.
27366
27367 // Do not turn a legal shuffle into an illegal one.
27368 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
27369 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
27370 return SDValue();
27371
27373 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
27374 &DemandedSubvector : DemandedSubvectors) {
27375 // How many elements into the WideVT does this subvector start?
27376 int Index = NumEltsExtracted * DemandedSubvector.second;
27377 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
27378 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
27379 DemandedSubvector.first, IndexC));
27380 }
27381 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
27382 "Should end up with either one or two ops");
27383
27384 // If we ended up with only one operand, pad with poison.
27385 if (NewOps.size() == 1)
27386 NewOps.emplace_back(DAG.getPOISON(NarrowVT));
27387
27388 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
27389}
27390
27391SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
27392 EVT NVT = N->getValueType(0);
27393 SDValue V = N->getOperand(0);
27394 uint64_t ExtIdx = N->getConstantOperandVal(1);
27395 SDLoc DL(N);
27396
27397 // Extract from UNDEF is UNDEF.
27398 if (V.isUndef())
27399 return DAG.getUNDEF(NVT);
27400
27401 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
27402 return NarrowLoad;
27403
27404 // Combine an extract of an extract into a single extract_subvector.
27405 // ext (ext X, C), 0 --> ext X, C
27406 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
27407 // Both indices must have the same scaling factor and C has to be a
27408 // multiple of the new result type's known minimum vector length.
27409 if (V.getValueType().isScalableVector() == NVT.isScalableVector() &&
27410 V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
27411 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
27412 V.getConstantOperandVal(1)) &&
27414 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
27415 V.getOperand(1));
27416 }
27417 }
27418
27419 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
27420 if (V.getOpcode() == ISD::SPLAT_VECTOR)
27421 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
27422 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
27423 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
27424
27425 // extract_subvector(insert_subvector(x,y,c1),c2)
27426 // --> extract_subvector(y,c2-c1)
27427 // iff we're just extracting from the inserted subvector.
27428 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
27429 SDValue InsSub = V.getOperand(1);
27430 EVT InsSubVT = InsSub.getValueType();
27431 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
27432 unsigned InsIdx = V.getConstantOperandVal(2);
27433 unsigned NumSubElts = NVT.getVectorMinNumElements();
27434 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
27435 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
27436 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
27437 V.getValueType().isFixedLengthVector())
27438 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
27439 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
27440 }
27441
27442 // Try to move vector bitcast after extract_subv by scaling extraction index:
27443 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
27444 if (V.getOpcode() == ISD::BITCAST &&
27445 V.getOperand(0).getValueType().isVector() &&
27446 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
27447 SDValue SrcOp = V.getOperand(0);
27448 EVT SrcVT = SrcOp.getValueType();
27449 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
27450 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
27451 if ((SrcNumElts % DestNumElts) == 0) {
27452 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
27453 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
27454 EVT NewExtVT =
27455 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
27457 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
27458 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
27459 V.getOperand(0), NewIndex);
27460 return DAG.getBitcast(NVT, NewExtract);
27461 }
27462 }
27463 if ((DestNumElts % SrcNumElts) == 0) {
27464 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
27465 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
27466 ElementCount NewExtEC =
27467 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
27468 EVT ScalarVT = SrcVT.getScalarType();
27469 if ((ExtIdx % DestSrcRatio) == 0) {
27470 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
27471 EVT NewExtVT =
27472 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
27474 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
27475 SDValue NewExtract =
27476 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
27477 V.getOperand(0), NewIndex);
27478 return DAG.getBitcast(NVT, NewExtract);
27479 }
27480 if (NewExtEC.isScalar() &&
27482 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
27483 SDValue NewExtract =
27484 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
27485 V.getOperand(0), NewIndex);
27486 return DAG.getBitcast(NVT, NewExtract);
27487 }
27488 }
27489 }
27490 }
27491 }
27492
27493 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
27494 unsigned ExtNumElts = NVT.getVectorMinNumElements();
27495 EVT ConcatSrcVT = V.getOperand(0).getValueType();
27496 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
27497 "Concat and extract subvector do not change element type");
27498
27499 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
27500 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
27501
27502 // If the concatenated source types match this extract, it's a direct
27503 // simplification:
27504 // extract_subvec (concat V1, V2, ...), i --> Vi
27505 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
27506 return V.getOperand(ConcatOpIdx);
27507
27508 // If the concatenated source vectors are a multiple length of this extract,
27509 // then extract a fraction of one of those source vectors directly from a
27510 // concat operand. Example:
27511 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
27512 // v2i8 extract_subvec v8i8 Y, 6
27513 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
27514 ConcatSrcNumElts % ExtNumElts == 0) {
27515 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
27516 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
27517 "Trying to extract from >1 concat operand?");
27518 assert(NewExtIdx % ExtNumElts == 0 &&
27519 "Extract index is not a multiple of the input vector length.");
27520 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
27521 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
27522 V.getOperand(ConcatOpIdx), NewIndexC);
27523 }
27524 }
27525
27527 NVT, V, ExtIdx, DL, DAG, LegalOperations))
27528 return Shuffle;
27529
27530 if (SDValue NarrowBOp =
27531 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
27532 return NarrowBOp;
27533
27535
27536 // If the input is a build vector. Try to make a smaller build vector.
27537 if (V.getOpcode() == ISD::BUILD_VECTOR) {
27538 EVT InVT = V.getValueType();
27539 unsigned ExtractSize = NVT.getSizeInBits();
27540 unsigned EltSize = InVT.getScalarSizeInBits();
27541 // Only do this if we won't split any elements.
27542 if (ExtractSize % EltSize == 0) {
27543 unsigned NumElems = ExtractSize / EltSize;
27544 EVT EltVT = InVT.getVectorElementType();
27545 EVT ExtractVT =
27546 NumElems == 1 ? EltVT
27547 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
27548 if ((Level < AfterLegalizeDAG ||
27549 (NumElems == 1 ||
27550 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
27551 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
27552 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
27553
27554 if (NumElems == 1) {
27555 SDValue Src = V->getOperand(IdxVal);
27556 if (EltVT != Src.getValueType())
27557 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
27558 return DAG.getBitcast(NVT, Src);
27559 }
27560
27561 // Extract the pieces from the original build_vector.
27562 SDValue BuildVec =
27563 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
27564 return DAG.getBitcast(NVT, BuildVec);
27565 }
27566 }
27567 }
27568
27569 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
27570 // Handle only simple case where vector being inserted and vector
27571 // being extracted are of same size.
27572 EVT SmallVT = V.getOperand(1).getValueType();
27573 if (NVT.bitsEq(SmallVT)) {
27574 // Combine:
27575 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
27576 // Into:
27577 // indices are equal or bit offsets are equal => V1
27578 // otherwise => (extract_subvec V1, ExtIdx)
27579 uint64_t InsIdx = V.getConstantOperandVal(2);
27580 if (InsIdx * SmallVT.getScalarSizeInBits() ==
27581 ExtIdx * NVT.getScalarSizeInBits()) {
27582 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
27583 return DAG.getBitcast(NVT, V.getOperand(1));
27584 } else {
27585 return DAG.getNode(
27587 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
27588 N->getOperand(1));
27589 }
27590 }
27591 }
27592
27593 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
27594 // simplify it based on the (valid) extractions.
27595 if (!V.getValueType().isScalableVector() &&
27596 llvm::all_of(V->users(), [&](SDNode *Use) {
27597 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
27598 Use->getOperand(0) == V;
27599 })) {
27600 unsigned NumElts = V.getValueType().getVectorNumElements();
27601 APInt DemandedElts = APInt::getZero(NumElts);
27602 for (SDNode *User : V->users()) {
27603 unsigned ExtIdx = User->getConstantOperandVal(1);
27604 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
27605 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
27606 }
27607 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
27608 // We simplified the vector operand of this extract subvector. If this
27609 // extract is not dead, visit it again so it is folded properly.
27610 if (N->getOpcode() != ISD::DELETED_NODE)
27611 AddToWorklist(N);
27612 return SDValue(N, 0);
27613 }
27614 } else {
27616 return SDValue(N, 0);
27617 }
27618
27619 return SDValue();
27620}
27621
27622/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
27623/// followed by concatenation. Narrow vector ops may have better performance
27624/// than wide ops, and this can unlock further narrowing of other vector ops.
27625/// Targets can invert this transform later if it is not profitable.
27627 SelectionDAG &DAG) {
27628 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
27629 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
27630 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
27631 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
27632 return SDValue();
27633
27634 // Split the wide shuffle mask into halves. Any mask element that is accessing
27635 // operand 1 is offset down to account for narrowing of the vectors.
27636 ArrayRef<int> Mask = Shuf->getMask();
27637 EVT VT = Shuf->getValueType(0);
27638 unsigned NumElts = VT.getVectorNumElements();
27639 unsigned HalfNumElts = NumElts / 2;
27640 SmallVector<int, 16> Mask0(HalfNumElts, -1);
27641 SmallVector<int, 16> Mask1(HalfNumElts, -1);
27642 for (unsigned i = 0; i != NumElts; ++i) {
27643 if (Mask[i] == -1)
27644 continue;
27645 // If we reference the upper (undef) subvector then the element is undef.
27646 if ((Mask[i] % NumElts) >= HalfNumElts)
27647 continue;
27648 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
27649 if (i < HalfNumElts)
27650 Mask0[i] = M;
27651 else
27652 Mask1[i - HalfNumElts] = M;
27653 }
27654
27655 // Ask the target if this is a valid transform.
27656 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27657 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
27658 HalfNumElts);
27659 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
27660 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
27661 return SDValue();
27662
27663 // shuffle (concat X, undef), (concat Y, undef), Mask -->
27664 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
27665 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
27666 SDLoc DL(Shuf);
27667 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
27668 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
27669 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
27670}
27671
27672// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
27673// or turn a shuffle of a single concat into simpler shuffle then concat.
27675 EVT VT = N->getValueType(0);
27676 unsigned NumElts = VT.getVectorNumElements();
27677
27678 SDValue N0 = N->getOperand(0);
27679 SDValue N1 = N->getOperand(1);
27681 ArrayRef<int> Mask = SVN->getMask();
27682
27684 EVT ConcatVT = N0.getOperand(0).getValueType();
27685 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
27686 unsigned NumConcats = NumElts / NumElemsPerConcat;
27687
27688 auto IsUndefMaskElt = [](int i) { return i == -1; };
27689
27690 // Special case: shuffle(concat(A,B)) can be more efficiently represented
27691 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
27692 // half vector elements.
27693 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
27694 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
27695 IsUndefMaskElt)) {
27696 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
27697 N0.getOperand(1),
27698 Mask.slice(0, NumElemsPerConcat));
27699 N1 = DAG.getPOISON(ConcatVT);
27700 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
27701 }
27702
27703 // Look at every vector that's inserted. We're looking for exact
27704 // subvector-sized copies from a concatenated vector
27705 for (unsigned I = 0; I != NumConcats; ++I) {
27706 unsigned Begin = I * NumElemsPerConcat;
27707 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
27708
27709 // Make sure we're dealing with a copy.
27710 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
27711 Ops.push_back(DAG.getUNDEF(ConcatVT));
27712 continue;
27713 }
27714
27715 int OpIdx = -1;
27716 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
27717 if (IsUndefMaskElt(SubMask[i]))
27718 continue;
27719 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
27720 return SDValue();
27721 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
27722 if (0 <= OpIdx && EltOpIdx != OpIdx)
27723 return SDValue();
27724 OpIdx = EltOpIdx;
27725 }
27726 assert(0 <= OpIdx && "Unknown concat_vectors op");
27727
27728 if (OpIdx < (int)N0.getNumOperands())
27729 Ops.push_back(N0.getOperand(OpIdx));
27730 else
27731 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
27732 }
27733
27734 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
27735}
27736
27737// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27738// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27739//
27740// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
27741// a simplification in some sense, but it isn't appropriate in general: some
27742// BUILD_VECTORs are substantially cheaper than others. The general case
27743// of a BUILD_VECTOR requires inserting each element individually (or
27744// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
27745// all constants is a single constant pool load. A BUILD_VECTOR where each
27746// element is identical is a splat. A BUILD_VECTOR where most of the operands
27747// are undef lowers to a small number of element insertions.
27748//
27749// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
27750// We don't fold shuffles where one side is a non-zero constant, and we don't
27751// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
27752// non-constant operands. This seems to work out reasonably well in practice.
27754 SelectionDAG &DAG,
27755 const TargetLowering &TLI) {
27756 EVT VT = SVN->getValueType(0);
27757 unsigned NumElts = VT.getVectorNumElements();
27758 SDValue N0 = SVN->getOperand(0);
27759 SDValue N1 = SVN->getOperand(1);
27760
27761 if (!N0->hasOneUse())
27762 return SDValue();
27763
27764 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
27765 // discussed above.
27766 if (!N1.isUndef()) {
27767 if (!N1->hasOneUse())
27768 return SDValue();
27769
27770 bool N0AnyConst = isAnyConstantBuildVector(N0);
27771 bool N1AnyConst = isAnyConstantBuildVector(N1);
27772 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
27773 return SDValue();
27774 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
27775 return SDValue();
27776 }
27777
27778 // If both inputs are splats of the same value then we can safely merge this
27779 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
27780 bool IsSplat = false;
27781 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
27782 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
27783 if (BV0 && BV1)
27784 if (SDValue Splat0 = BV0->getSplatValue())
27785 IsSplat = (Splat0 == BV1->getSplatValue());
27786
27788 SmallSet<SDValue, 16> DuplicateOps;
27789 for (int M : SVN->getMask()) {
27790 SDValue Op = DAG.getPOISON(VT.getScalarType());
27791 if (M >= 0) {
27792 int Idx = M < (int)NumElts ? M : M - NumElts;
27793 SDValue &S = (M < (int)NumElts ? N0 : N1);
27794 if (S.getOpcode() == ISD::BUILD_VECTOR) {
27795 Op = S.getOperand(Idx);
27796 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
27797 SDValue Op0 = S.getOperand(0);
27798 Op = Idx == 0 ? Op0 : DAG.getPOISON(Op0.getValueType());
27799 } else {
27800 // Operand can't be combined - bail out.
27801 return SDValue();
27802 }
27803 }
27804
27805 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
27806 // generating a splat; semantically, this is fine, but it's likely to
27807 // generate low-quality code if the target can't reconstruct an appropriate
27808 // shuffle.
27809 if (!Op.isUndef() && !isIntOrFPConstant(Op))
27810 if (!IsSplat && !DuplicateOps.insert(Op).second)
27811 return SDValue();
27812
27813 Ops.push_back(Op);
27814 }
27815
27816 // BUILD_VECTOR requires all inputs to be of the same type, find the
27817 // maximum type and extend them all.
27818 EVT SVT = VT.getScalarType();
27819 if (SVT.isInteger())
27820 for (SDValue &Op : Ops)
27821 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
27822 if (SVT != VT.getScalarType())
27823 for (SDValue &Op : Ops)
27824 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
27825 : (TLI.isZExtFree(Op.getValueType(), SVT)
27826 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
27827 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
27828 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
27829}
27830
27831// Match shuffles that can be converted to *_vector_extend_in_reg.
27832// This is often generated during legalization.
27833// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
27834// and returns the EVT to which the extension should be performed.
27835// NOTE: this assumes that the src is the first operand of the shuffle.
27837 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
27838 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
27839 bool LegalOperations) {
27840 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27841
27842 // TODO Add support for big-endian when we have a test case.
27843 if (!VT.isInteger() || IsBigEndian)
27844 return std::nullopt;
27845
27846 unsigned NumElts = VT.getVectorNumElements();
27847 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27848
27849 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
27850 // power-of-2 extensions as they are the most likely.
27851 // FIXME: should try Scale == NumElts case too,
27852 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
27853 // The vector width must be a multiple of Scale.
27854 if (NumElts % Scale != 0)
27855 continue;
27856
27857 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
27858 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
27859
27860 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
27861 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
27862 continue;
27863
27864 if (Match(Scale))
27865 return OutVT;
27866 }
27867
27868 return std::nullopt;
27869}
27870
27871// Match shuffles that can be converted to any_vector_extend_in_reg.
27872// This is often generated during legalization.
27873// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
27875 SelectionDAG &DAG,
27876 const TargetLowering &TLI,
27877 bool LegalOperations) {
27878 EVT VT = SVN->getValueType(0);
27879 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27880
27881 // TODO Add support for big-endian when we have a test case.
27882 if (!VT.isInteger() || IsBigEndian)
27883 return SDValue();
27884
27885 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
27886 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
27887 Mask = SVN->getMask()](unsigned Scale) {
27888 for (unsigned i = 0; i != NumElts; ++i) {
27889 if (Mask[i] < 0)
27890 continue;
27891 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
27892 continue;
27893 return false;
27894 }
27895 return true;
27896 };
27897
27898 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
27899 SDValue N0 = SVN->getOperand(0);
27900 // Never create an illegal type. Only create unsupported operations if we
27901 // are pre-legalization.
27902 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
27903 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
27904 if (!OutVT)
27905 return SDValue();
27906 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
27907}
27908
27909// Match shuffles that can be converted to zero_extend_vector_inreg.
27910// This is often generated during legalization.
27911// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
27913 SelectionDAG &DAG,
27914 const TargetLowering &TLI,
27915 bool LegalOperations) {
27916 bool LegalTypes = true;
27917 EVT VT = SVN->getValueType(0);
27918 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
27919 unsigned NumElts = VT.getVectorNumElements();
27920 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27921
27922 // TODO: add support for big-endian when we have a test case.
27923 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27924 if (!VT.isInteger() || IsBigEndian)
27925 return SDValue();
27926
27927 SmallVector<int, 16> Mask(SVN->getMask());
27928 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
27929 for (int &Indice : Mask) {
27930 if (Indice < 0)
27931 continue;
27932 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
27933 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
27934 Fn(Indice, OpIdx, OpEltIdx);
27935 }
27936 };
27937
27938 // Which elements of which operand does this shuffle demand?
27939 std::array<APInt, 2> OpsDemandedElts;
27940 for (APInt &OpDemandedElts : OpsDemandedElts)
27941 OpDemandedElts = APInt::getZero(NumElts);
27942 ForEachDecomposedIndice(
27943 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
27944 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
27945 });
27946
27947 // Element-wise(!), which of these demanded elements are know to be zero?
27948 std::array<APInt, 2> OpsKnownZeroElts;
27949 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
27950 std::get<2>(I) =
27951 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
27952
27953 // Manifest zeroable element knowledge in the shuffle mask.
27954 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
27955 // this is a local invention, but it won't leak into DAG.
27956 // FIXME: should we not manifest them, but just check when matching?
27957 bool HadZeroableElts = false;
27958 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
27959 int &Indice, int OpIdx, int OpEltIdx) {
27960 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
27961 Indice = -2; // Zeroable element.
27962 HadZeroableElts = true;
27963 }
27964 });
27965
27966 // Don't proceed unless we've refined at least one zeroable mask indice.
27967 // If we didn't, then we are still trying to match the same shuffle mask
27968 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
27969 // and evidently failed. Proceeding will lead to endless combine loops.
27970 if (!HadZeroableElts)
27971 return SDValue();
27972
27973 // The shuffle may be more fine-grained than we want. Widen elements first.
27974 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
27975 SmallVector<int, 16> ScaledMask;
27976 getShuffleMaskWithWidestElts(Mask, ScaledMask);
27977 assert(Mask.size() >= ScaledMask.size() &&
27978 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
27979 int Prescale = Mask.size() / ScaledMask.size();
27980
27981 NumElts = ScaledMask.size();
27982 EltSizeInBits *= Prescale;
27983
27984 EVT PrescaledVT = EVT::getVectorVT(
27985 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
27986 NumElts);
27987
27988 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
27989 return SDValue();
27990
27991 // For example,
27992 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
27993 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
27994 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
27995 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
27996 "Unexpected mask scaling factor.");
27997 ArrayRef<int> Mask = ScaledMask;
27998 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
27999 SrcElt != NumSrcElts; ++SrcElt) {
28000 // Analyze the shuffle mask in Scale-sized chunks.
28001 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
28002 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
28003 Mask = Mask.drop_front(MaskChunk.size());
28004 // The first indice in this chunk must be SrcElt, but not zero!
28005 // FIXME: undef should be fine, but that results in more-defined result.
28006 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
28007 return false;
28008 // The rest of the indices in this chunk must be zeros.
28009 // FIXME: undef should be fine, but that results in more-defined result.
28010 if (!all_of(MaskChunk.drop_front(1),
28011 [](int Indice) { return Indice == -2; }))
28012 return false;
28013 }
28014 assert(Mask.empty() && "Did not process the whole mask?");
28015 return true;
28016 };
28017
28018 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
28019 for (bool Commuted : {false, true}) {
28020 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
28021 if (Commuted)
28023 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
28024 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
28025 LegalOperations);
28026 if (OutVT)
28027 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
28028 DAG.getBitcast(PrescaledVT, Op)));
28029 }
28030 return SDValue();
28031}
28032
28033// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
28034// each source element of a large type into the lowest elements of a smaller
28035// destination type. This is often generated during legalization.
28036// If the source node itself was a '*_extend_vector_inreg' node then we should
28037// then be able to remove it.
28039 SelectionDAG &DAG) {
28040 EVT VT = SVN->getValueType(0);
28041 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
28042
28043 // TODO Add support for big-endian when we have a test case.
28044 if (!VT.isInteger() || IsBigEndian)
28045 return SDValue();
28046
28048
28049 unsigned Opcode = N0.getOpcode();
28050 if (!ISD::isExtVecInRegOpcode(Opcode))
28051 return SDValue();
28052
28053 SDValue N00 = N0.getOperand(0);
28054 ArrayRef<int> Mask = SVN->getMask();
28055 unsigned NumElts = VT.getVectorNumElements();
28056 unsigned EltSizeInBits = VT.getScalarSizeInBits();
28057 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
28058 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
28059
28060 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
28061 return SDValue();
28062 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
28063
28064 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
28065 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
28066 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
28067 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
28068 for (unsigned i = 0; i != NumElts; ++i) {
28069 if (Mask[i] < 0)
28070 continue;
28071 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
28072 continue;
28073 return false;
28074 }
28075 return true;
28076 };
28077
28078 // At the moment we just handle the case where we've truncated back to the
28079 // same size as before the extension.
28080 // TODO: handle more extension/truncation cases as cases arise.
28081 if (EltSizeInBits != ExtSrcSizeInBits)
28082 return SDValue();
28083 if (VT.getSizeInBits() != N00.getValueSizeInBits())
28084 return SDValue();
28085
28086 // We can remove *extend_vector_inreg only if the truncation happens at
28087 // the same scale as the extension.
28088 if (isTruncate(ExtScale))
28089 return DAG.getBitcast(VT, N00);
28090
28091 return SDValue();
28092}
28093
28094// Combine shuffles of splat-shuffles of the form:
28095// shuffle (shuffle V, undef, splat-mask), undef, M
28096// If splat-mask contains undef elements, we need to be careful about
28097// introducing undef's in the folded mask which are not the result of composing
28098// the masks of the shuffles.
28100 SelectionDAG &DAG) {
28101 EVT VT = Shuf->getValueType(0);
28102 unsigned NumElts = VT.getVectorNumElements();
28103
28104 if (!Shuf->getOperand(1).isUndef())
28105 return SDValue();
28106
28107 // See if this unary non-splat shuffle actually *is* a splat shuffle,
28108 // in disguise, with all demanded elements being identical.
28109 // FIXME: this can be done per-operand.
28110 if (!Shuf->isSplat()) {
28111 APInt DemandedElts(NumElts, 0);
28112 for (int Idx : Shuf->getMask()) {
28113 if (Idx < 0)
28114 continue; // Ignore sentinel indices.
28115 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
28116 DemandedElts.setBit(Idx);
28117 }
28118 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
28119 APInt UndefElts;
28120 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
28121 // Even if all demanded elements are splat, some of them could be undef.
28122 // Which lowest demanded element is *not* known-undef?
28123 std::optional<unsigned> MinNonUndefIdx;
28124 for (int Idx : Shuf->getMask()) {
28125 if (Idx < 0 || UndefElts[Idx])
28126 continue; // Ignore sentinel indices, and undef elements.
28127 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
28128 }
28129 if (!MinNonUndefIdx)
28130 return DAG.getUNDEF(VT); // All undef - result is undef.
28131 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
28132 SmallVector<int, 8> SplatMask(Shuf->getMask());
28133 for (int &Idx : SplatMask) {
28134 if (Idx < 0)
28135 continue; // Passthrough sentinel indices.
28136 // Otherwise, just pick the lowest demanded non-undef element.
28137 // Or sentinel undef, if we know we'd pick a known-undef element.
28138 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
28139 }
28140 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
28141 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
28142 Shuf->getOperand(1), SplatMask);
28143 }
28144 }
28145
28146 // If the inner operand is a known splat with no undefs, just return that directly.
28147 // TODO: Create DemandedElts mask from Shuf's mask.
28148 // TODO: Allow undef elements and merge with the shuffle code below.
28149 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
28150 return Shuf->getOperand(0);
28151
28153 if (!Splat || !Splat->isSplat())
28154 return SDValue();
28155
28156 ArrayRef<int> ShufMask = Shuf->getMask();
28157 ArrayRef<int> SplatMask = Splat->getMask();
28158 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
28159
28160 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
28161 // every undef mask element in the splat-shuffle has a corresponding undef
28162 // element in the user-shuffle's mask or if the composition of mask elements
28163 // would result in undef.
28164 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
28165 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
28166 // In this case it is not legal to simplify to the splat-shuffle because we
28167 // may be exposing the users of the shuffle an undef element at index 1
28168 // which was not there before the combine.
28169 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
28170 // In this case the composition of masks yields SplatMask, so it's ok to
28171 // simplify to the splat-shuffle.
28172 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
28173 // In this case the composed mask includes all undef elements of SplatMask
28174 // and in addition sets element zero to undef. It is safe to simplify to
28175 // the splat-shuffle.
28176 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
28177 ArrayRef<int> SplatMask) {
28178 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
28179 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
28180 SplatMask[UserMask[i]] != -1)
28181 return false;
28182 return true;
28183 };
28184 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
28185 return Shuf->getOperand(0);
28186
28187 // Create a new shuffle with a mask that is composed of the two shuffles'
28188 // masks.
28189 SmallVector<int, 32> NewMask;
28190 for (int Idx : ShufMask)
28191 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
28192
28193 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
28194 Splat->getOperand(0), Splat->getOperand(1),
28195 NewMask);
28196}
28197
28198// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
28199// the mask can be treated as a larger type.
28201 SelectionDAG &DAG,
28202 const TargetLowering &TLI,
28203 bool LegalOperations) {
28204 SDValue Op0 = SVN->getOperand(0);
28205 SDValue Op1 = SVN->getOperand(1);
28206 EVT VT = SVN->getValueType(0);
28207 if (Op0.getOpcode() != ISD::BITCAST)
28208 return SDValue();
28209 EVT InVT = Op0.getOperand(0).getValueType();
28210 if (!InVT.isVector() ||
28211 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
28212 Op1.getOperand(0).getValueType() != InVT)))
28213 return SDValue();
28215 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
28216 return SDValue();
28217
28218 int VTLanes = VT.getVectorNumElements();
28219 int InLanes = InVT.getVectorNumElements();
28220 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
28221 (LegalOperations &&
28223 return SDValue();
28224 int Factor = VTLanes / InLanes;
28225
28226 // Check that each group of lanes in the mask are either undef or make a valid
28227 // mask for the wider lane type.
28228 ArrayRef<int> Mask = SVN->getMask();
28229 SmallVector<int> NewMask;
28230 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
28231 return SDValue();
28232
28233 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
28234 return SDValue();
28235
28236 // Create the new shuffle with the new mask and bitcast it back to the
28237 // original type.
28238 SDLoc DL(SVN);
28239 Op0 = Op0.getOperand(0);
28240 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
28241 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
28242 return DAG.getBitcast(VT, NewShuf);
28243}
28244
28245/// Combine shuffle of shuffle of the form:
28246/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
28248 SelectionDAG &DAG) {
28249 if (!OuterShuf->getOperand(1).isUndef())
28250 return SDValue();
28251 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
28252 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
28253 return SDValue();
28254
28255 ArrayRef<int> OuterMask = OuterShuf->getMask();
28256 ArrayRef<int> InnerMask = InnerShuf->getMask();
28257 unsigned NumElts = OuterMask.size();
28258 assert(NumElts == InnerMask.size() && "Mask length mismatch");
28259 SmallVector<int, 32> CombinedMask(NumElts, -1);
28260 int SplatIndex = -1;
28261 for (unsigned i = 0; i != NumElts; ++i) {
28262 // Undef lanes remain undef.
28263 int OuterMaskElt = OuterMask[i];
28264 if (OuterMaskElt == -1)
28265 continue;
28266
28267 // Peek through the shuffle masks to get the underlying source element.
28268 int InnerMaskElt = InnerMask[OuterMaskElt];
28269 if (InnerMaskElt == -1)
28270 continue;
28271
28272 // Initialize the splatted element.
28273 if (SplatIndex == -1)
28274 SplatIndex = InnerMaskElt;
28275
28276 // Non-matching index - this is not a splat.
28277 if (SplatIndex != InnerMaskElt)
28278 return SDValue();
28279
28280 CombinedMask[i] = InnerMaskElt;
28281 }
28282 assert((all_of(CombinedMask, equal_to(-1)) ||
28283 getSplatIndex(CombinedMask) != -1) &&
28284 "Expected a splat mask");
28285
28286 // TODO: The transform may be a win even if the mask is not legal.
28287 EVT VT = OuterShuf->getValueType(0);
28288 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
28289 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
28290 return SDValue();
28291
28292 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
28293 InnerShuf->getOperand(1), CombinedMask);
28294}
28295
28296/// If the shuffle mask is taking exactly one element from the first vector
28297/// operand and passing through all other elements from the second vector
28298/// operand, return the index of the mask element that is choosing an element
28299/// from the first operand. Otherwise, return -1.
28301 int MaskSize = Mask.size();
28302 int EltFromOp0 = -1;
28303 // TODO: This does not match if there are undef elements in the shuffle mask.
28304 // Should we ignore undefs in the shuffle mask instead? The trade-off is
28305 // removing an instruction (a shuffle), but losing the knowledge that some
28306 // vector lanes are not needed.
28307 for (int i = 0; i != MaskSize; ++i) {
28308 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
28309 // We're looking for a shuffle of exactly one element from operand 0.
28310 if (EltFromOp0 != -1)
28311 return -1;
28312 EltFromOp0 = i;
28313 } else if (Mask[i] != i + MaskSize) {
28314 // Nothing from operand 1 can change lanes.
28315 return -1;
28316 }
28317 }
28318 return EltFromOp0;
28319}
28320
28321/// If a shuffle inserts exactly one element from a source vector operand into
28322/// another vector operand and we can access the specified element as a scalar,
28323/// then we can eliminate the shuffle.
28324SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
28325 // First, check if we are taking one element of a vector and shuffling that
28326 // element into another vector.
28327 ArrayRef<int> Mask = Shuf->getMask();
28328 SmallVector<int, 16> CommutedMask(Mask);
28329 SDValue Op0 = Shuf->getOperand(0);
28330 SDValue Op1 = Shuf->getOperand(1);
28331 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
28332 if (ShufOp0Index == -1) {
28333 // Commute mask and check again.
28335 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
28336 if (ShufOp0Index == -1)
28337 return SDValue();
28338 // Commute operands to match the commuted shuffle mask.
28339 std::swap(Op0, Op1);
28340 Mask = CommutedMask;
28341 }
28342
28343 // The shuffle inserts exactly one element from operand 0 into operand 1.
28344 // Now see if we can access that element as a scalar via a real insert element
28345 // instruction.
28346 // TODO: We can try harder to locate the element as a scalar. Examples: it
28347 // could be an operand of BUILD_VECTOR, or a constant.
28348 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
28349 "Shuffle mask value must be from operand 0");
28350
28351 SDValue Elt;
28352 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
28353 m_SpecificInt(Mask[ShufOp0Index])))) {
28354 // There's an existing insertelement with constant insertion index, so we
28355 // don't need to check the legality/profitability of a replacement operation
28356 // that differs at most in the constant value. The target should be able to
28357 // lower any of those in a similar way. If not, legalization will expand
28358 // this to a scalar-to-vector plus shuffle.
28359 //
28360 // Note that the shuffle may move the scalar from the position that the
28361 // insert element used. Therefore, our new insert element occurs at the
28362 // shuffle's mask index value, not the insert's index value.
28363 //
28364 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
28365 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
28366 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
28367 Op1, Elt, NewInsIndex);
28368 }
28369
28370 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
28371 return SDValue();
28372
28374 Mask[ShufOp0Index] == 0) {
28375 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
28376 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
28377 Op1, Elt, NewInsIndex);
28378 }
28379
28380 return SDValue();
28381}
28382
28383/// If we have a unary shuffle of a shuffle, see if it can be folded away
28384/// completely. This has the potential to lose undef knowledge because the first
28385/// shuffle may not have an undef mask element where the second one does. So
28386/// only call this after doing simplifications based on demanded elements.
28388 // shuf (shuf0 X, Y, Mask0), undef, Mask
28389 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
28390 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
28391 return SDValue();
28392
28393 ArrayRef<int> Mask = Shuf->getMask();
28394 ArrayRef<int> Mask0 = Shuf0->getMask();
28395 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
28396 // Ignore undef elements.
28397 if (Mask[i] == -1)
28398 continue;
28399 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
28400
28401 // Is the element of the shuffle operand chosen by this shuffle the same as
28402 // the element chosen by the shuffle operand itself?
28403 if (Mask0[Mask[i]] != Mask0[i])
28404 return SDValue();
28405 }
28406 // Every element of this shuffle is identical to the result of the previous
28407 // shuffle, so we can replace this value.
28408 return Shuf->getOperand(0);
28409}
28410
28411SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
28412 EVT VT = N->getValueType(0);
28413 unsigned NumElts = VT.getVectorNumElements();
28414
28415 SDValue N0 = N->getOperand(0);
28416 SDValue N1 = N->getOperand(1);
28417
28418 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
28419
28420 // Canonicalize shuffle undef, undef -> undef
28421 if (N0.isUndef() && N1.isUndef())
28422 return DAG.getUNDEF(VT);
28423
28424 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
28425
28426 // Canonicalize shuffle v, v -> v, poison
28427 if (N0 == N1)
28428 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getPOISON(VT),
28429 createUnaryMask(SVN->getMask(), NumElts));
28430
28431 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
28432 if (N0.isUndef())
28433 return DAG.getCommutedVectorShuffle(*SVN);
28434
28435 // Remove references to rhs if it is undef
28436 if (N1.isUndef()) {
28437 bool Changed = false;
28438 SmallVector<int, 8> NewMask;
28439 for (unsigned i = 0; i != NumElts; ++i) {
28440 int Idx = SVN->getMaskElt(i);
28441 if (Idx >= (int)NumElts) {
28442 Idx = -1;
28443 Changed = true;
28444 }
28445 NewMask.push_back(Idx);
28446 }
28447 if (Changed)
28448 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
28449 }
28450
28451 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
28452 return InsElt;
28453
28454 // A shuffle of a single vector that is a splatted value can always be folded.
28455 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
28456 return V;
28457
28458 if (SDValue V = formSplatFromShuffles(SVN, DAG))
28459 return V;
28460
28461 // If it is a splat, check if the argument vector is another splat or a
28462 // build_vector.
28463 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
28464 int SplatIndex = SVN->getSplatIndex();
28465 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
28466 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
28467 // splat (vector_bo L, R), Index -->
28468 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
28469 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
28470 SDLoc DL(N);
28471 EVT EltVT = VT.getScalarType();
28472 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
28473 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
28474 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
28475 SDValue NewBO =
28476 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
28477 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
28478 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
28479 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getPOISON(VT), ZeroMask);
28480 }
28481
28482 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
28483 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
28484 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
28485 N0.hasOneUse()) {
28486 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
28487 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
28488
28490 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
28491 if (Idx->getAPIntValue() == SplatIndex)
28492 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
28493
28494 // Look through a bitcast if LE and splatting lane 0, through to a
28495 // scalar_to_vector or a build_vector.
28496 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
28497 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
28500 EVT N00VT = N0.getOperand(0).getValueType();
28501 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
28502 VT.isInteger() && N00VT.isInteger()) {
28503 EVT InVT =
28506 SDLoc(N), InVT);
28507 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
28508 }
28509 }
28510 }
28511
28512 // If this is a bit convert that changes the element type of the vector but
28513 // not the number of vector elements, look through it. Be careful not to
28514 // look though conversions that change things like v4f32 to v2f64.
28515 SDNode *V = N0.getNode();
28516 if (V->getOpcode() == ISD::BITCAST) {
28517 SDValue ConvInput = V->getOperand(0);
28518 if (ConvInput.getValueType().isVector() &&
28519 ConvInput.getValueType().getVectorNumElements() == NumElts)
28520 V = ConvInput.getNode();
28521 }
28522
28523 if (V->getOpcode() == ISD::BUILD_VECTOR) {
28524 assert(V->getNumOperands() == NumElts &&
28525 "BUILD_VECTOR has wrong number of operands");
28526 SDValue Base;
28527 bool AllSame = true;
28528 for (unsigned i = 0; i != NumElts; ++i) {
28529 if (!V->getOperand(i).isUndef()) {
28530 Base = V->getOperand(i);
28531 break;
28532 }
28533 }
28534 // Splat of <u, u, u, u>, return <u, u, u, u>
28535 if (!Base.getNode())
28536 return N0;
28537 for (unsigned i = 0; i != NumElts; ++i) {
28538 if (V->getOperand(i) != Base) {
28539 AllSame = false;
28540 break;
28541 }
28542 }
28543 // Splat of <x, x, x, x>, return <x, x, x, x>
28544 if (AllSame)
28545 return N0;
28546
28547 // Canonicalize any other splat as a build_vector, but avoid defining any
28548 // undefined elements in the mask.
28549 SDValue Splatted = V->getOperand(SplatIndex);
28550 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
28551 EVT EltVT = Splatted.getValueType();
28552
28553 for (unsigned i = 0; i != NumElts; ++i) {
28554 if (SVN->getMaskElt(i) < 0)
28555 Ops[i] = DAG.getPOISON(EltVT);
28556 }
28557
28558 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
28559
28560 // We may have jumped through bitcasts, so the type of the
28561 // BUILD_VECTOR may not match the type of the shuffle.
28562 if (V->getValueType(0) != VT)
28563 NewBV = DAG.getBitcast(VT, NewBV);
28564 return NewBV;
28565 }
28566 }
28567
28568 // Simplify source operands based on shuffle mask.
28570 return SDValue(N, 0);
28571
28572 // This is intentionally placed after demanded elements simplification because
28573 // it could eliminate knowledge of undef elements created by this shuffle.
28574 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
28575 return ShufOp;
28576
28577 // Match shuffles that can be converted to any_vector_extend_in_reg.
28578 if (SDValue V =
28579 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
28580 return V;
28581
28582 // Combine "truncate_vector_in_reg" style shuffles.
28583 if (SDValue V = combineTruncationShuffle(SVN, DAG))
28584 return V;
28585
28586 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
28587 Level < AfterLegalizeVectorOps &&
28588 (N1.isUndef() ||
28589 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
28590 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
28591 if (SDValue V = partitionShuffleOfConcats(N, DAG))
28592 return V;
28593 }
28594
28595 // A shuffle of a concat of the same narrow vector can be reduced to use
28596 // only low-half elements of a concat with undef:
28597 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
28598 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
28599 N0.getNumOperands() == 2 &&
28600 N0.getOperand(0) == N0.getOperand(1)) {
28601 int HalfNumElts = (int)NumElts / 2;
28602 SmallVector<int, 8> NewMask;
28603 for (unsigned i = 0; i != NumElts; ++i) {
28604 int Idx = SVN->getMaskElt(i);
28605 if (Idx >= HalfNumElts) {
28606 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
28607 Idx -= HalfNumElts;
28608 }
28609 NewMask.push_back(Idx);
28610 }
28611 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
28612 SDValue UndefVec = DAG.getPOISON(N0.getOperand(0).getValueType());
28613 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
28614 N0.getOperand(0), UndefVec);
28615 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
28616 }
28617 }
28618
28619 // See if we can replace a shuffle with an insert_subvector.
28620 // e.g. v2i32 into v8i32:
28621 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
28622 // --> insert_subvector(lhs,rhs1,4).
28623 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
28625 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
28626 // Ensure RHS subvectors are legal.
28627 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
28628 EVT SubVT = RHS.getOperand(0).getValueType();
28629 int NumSubVecs = RHS.getNumOperands();
28630 int NumSubElts = SubVT.getVectorNumElements();
28631 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
28632 if (!TLI.isTypeLegal(SubVT))
28633 return SDValue();
28634
28635 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
28636 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
28637 return SDValue();
28638
28639 // Search [NumSubElts] spans for RHS sequence.
28640 // TODO: Can we avoid nested loops to increase performance?
28641 SmallVector<int> InsertionMask(NumElts);
28642 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
28643 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
28644 // Reset mask to identity.
28645 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
28646
28647 // Add subvector insertion.
28648 std::iota(InsertionMask.begin() + SubIdx,
28649 InsertionMask.begin() + SubIdx + NumSubElts,
28650 NumElts + (SubVec * NumSubElts));
28651
28652 // See if the shuffle mask matches the reference insertion mask.
28653 bool MatchingShuffle = true;
28654 for (int i = 0; i != (int)NumElts; ++i) {
28655 int ExpectIdx = InsertionMask[i];
28656 int ActualIdx = Mask[i];
28657 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
28658 MatchingShuffle = false;
28659 break;
28660 }
28661 }
28662
28663 if (MatchingShuffle)
28664 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
28665 SubIdx);
28666 }
28667 }
28668 return SDValue();
28669 };
28670 ArrayRef<int> Mask = SVN->getMask();
28671 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
28672 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
28673 return InsertN1;
28674 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
28675 SmallVector<int> CommuteMask(Mask);
28677 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
28678 return InsertN0;
28679 }
28680 }
28681
28682 // If we're not performing a select/blend shuffle, see if we can convert the
28683 // shuffle into a AND node, with all the out-of-lane elements are known zero.
28684 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
28685 bool IsInLaneMask = true;
28686 ArrayRef<int> Mask = SVN->getMask();
28687 SmallVector<int, 16> ClearMask(NumElts, -1);
28688 APInt DemandedLHS = APInt::getZero(NumElts);
28689 APInt DemandedRHS = APInt::getZero(NumElts);
28690 for (int I = 0; I != (int)NumElts; ++I) {
28691 int M = Mask[I];
28692 if (M < 0)
28693 continue;
28694 ClearMask[I] = M == I ? I : (I + NumElts);
28695 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
28696 if (M != I) {
28697 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
28698 Demanded.setBit(M % NumElts);
28699 }
28700 }
28701 // TODO: Should we try to mask with N1 as well?
28702 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
28703 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
28704 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
28705 SDLoc DL(N);
28706 EVT IntVT = VT.changeVectorElementTypeToInteger();
28707 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
28708 // Transform the type to a legal type so that the buildvector constant
28709 // elements are not illegal. Make sure that the result is larger than the
28710 // original type, incase the value is split into two (eg i64->i32).
28711 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
28712 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
28713 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
28714 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
28715 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
28716 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getPOISON(IntSVT));
28717 for (int I = 0; I != (int)NumElts; ++I)
28718 if (0 <= Mask[I])
28719 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
28720
28721 // See if a clear mask is legal instead of going via
28722 // XformToShuffleWithZero which loses UNDEF mask elements.
28723 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
28724 return DAG.getBitcast(
28725 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
28726 DAG.getConstant(0, DL, IntVT), ClearMask));
28727
28728 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
28729 return DAG.getBitcast(
28730 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
28731 DAG.getBuildVector(IntVT, DL, AndMask)));
28732 }
28733 }
28734 }
28735
28736 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
28737 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
28738 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
28739 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
28740 return Res;
28741
28742 // If this shuffle only has a single input that is a bitcasted shuffle,
28743 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
28744 // back to their original types.
28745 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
28746 N1.isUndef() && Level < AfterLegalizeVectorOps &&
28747 TLI.isTypeLegal(VT)) {
28748
28750 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
28751 EVT SVT = VT.getScalarType();
28752 EVT InnerVT = BC0->getValueType(0);
28753 EVT InnerSVT = InnerVT.getScalarType();
28754
28755 // Determine which shuffle works with the smaller scalar type.
28756 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
28757 EVT ScaleSVT = ScaleVT.getScalarType();
28758
28759 if (TLI.isTypeLegal(ScaleVT) &&
28760 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
28761 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
28762 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
28763 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
28764
28765 // Scale the shuffle masks to the smaller scalar type.
28766 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
28767 SmallVector<int, 8> InnerMask;
28768 SmallVector<int, 8> OuterMask;
28769 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
28770 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
28771
28772 // Merge the shuffle masks.
28773 SmallVector<int, 8> NewMask;
28774 for (int M : OuterMask)
28775 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
28776
28777 // Test for shuffle mask legality over both commutations.
28778 SDValue SV0 = BC0->getOperand(0);
28779 SDValue SV1 = BC0->getOperand(1);
28780 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
28781 if (!LegalMask) {
28782 std::swap(SV0, SV1);
28784 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
28785 }
28786
28787 if (LegalMask) {
28788 SV0 = DAG.getBitcast(ScaleVT, SV0);
28789 SV1 = DAG.getBitcast(ScaleVT, SV1);
28790 return DAG.getBitcast(
28791 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
28792 }
28793 }
28794 }
28795 }
28796
28797 // Match shuffles of bitcasts, so long as the mask can be treated as the
28798 // larger type.
28799 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
28800 return V;
28801
28802 // Compute the combined shuffle mask for a shuffle with SV0 as the first
28803 // operand, and SV1 as the second operand.
28804 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
28805 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
28806 auto MergeInnerShuffle =
28807 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
28808 ShuffleVectorSDNode *OtherSVN, SDValue N1,
28809 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
28810 SmallVectorImpl<int> &Mask) -> bool {
28811 // Don't try to fold splats; they're likely to simplify somehow, or they
28812 // might be free.
28813 if (OtherSVN->isSplat())
28814 return false;
28815
28816 SV0 = SV1 = SDValue();
28817 Mask.clear();
28818
28819 for (unsigned i = 0; i != NumElts; ++i) {
28820 int Idx = SVN->getMaskElt(i);
28821 if (Idx < 0) {
28822 // Propagate Undef.
28823 Mask.push_back(Idx);
28824 continue;
28825 }
28826
28827 if (Commute)
28828 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
28829
28830 SDValue CurrentVec;
28831 if (Idx < (int)NumElts) {
28832 // This shuffle index refers to the inner shuffle N0. Lookup the inner
28833 // shuffle mask to identify which vector is actually referenced.
28834 Idx = OtherSVN->getMaskElt(Idx);
28835 if (Idx < 0) {
28836 // Propagate Undef.
28837 Mask.push_back(Idx);
28838 continue;
28839 }
28840 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
28841 : OtherSVN->getOperand(1);
28842 } else {
28843 // This shuffle index references an element within N1.
28844 CurrentVec = N1;
28845 }
28846
28847 // Simple case where 'CurrentVec' is UNDEF.
28848 if (CurrentVec.isUndef()) {
28849 Mask.push_back(-1);
28850 continue;
28851 }
28852
28853 // Canonicalize the shuffle index. We don't know yet if CurrentVec
28854 // will be the first or second operand of the combined shuffle.
28855 Idx = Idx % NumElts;
28856 if (!SV0.getNode() || SV0 == CurrentVec) {
28857 // Ok. CurrentVec is the left hand side.
28858 // Update the mask accordingly.
28859 SV0 = CurrentVec;
28860 Mask.push_back(Idx);
28861 continue;
28862 }
28863 if (!SV1.getNode() || SV1 == CurrentVec) {
28864 // Ok. CurrentVec is the right hand side.
28865 // Update the mask accordingly.
28866 SV1 = CurrentVec;
28867 Mask.push_back(Idx + NumElts);
28868 continue;
28869 }
28870
28871 // Last chance - see if the vector is another shuffle and if it
28872 // uses one of the existing candidate shuffle ops.
28873 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
28874 int InnerIdx = CurrentSVN->getMaskElt(Idx);
28875 if (InnerIdx < 0) {
28876 Mask.push_back(-1);
28877 continue;
28878 }
28879 SDValue InnerVec = (InnerIdx < (int)NumElts)
28880 ? CurrentSVN->getOperand(0)
28881 : CurrentSVN->getOperand(1);
28882 if (InnerVec.isUndef()) {
28883 Mask.push_back(-1);
28884 continue;
28885 }
28886 InnerIdx %= NumElts;
28887 if (InnerVec == SV0) {
28888 Mask.push_back(InnerIdx);
28889 continue;
28890 }
28891 if (InnerVec == SV1) {
28892 Mask.push_back(InnerIdx + NumElts);
28893 continue;
28894 }
28895 }
28896
28897 // Bail out if we cannot convert the shuffle pair into a single shuffle.
28898 return false;
28899 }
28900
28901 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
28902 return true;
28903
28904 // Avoid introducing shuffles with illegal mask.
28905 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
28906 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
28907 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
28908 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
28909 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
28910 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
28911 if (TLI.isShuffleMaskLegal(Mask, VT))
28912 return true;
28913
28914 std::swap(SV0, SV1);
28916 return TLI.isShuffleMaskLegal(Mask, VT);
28917 };
28918
28919 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
28920 // Canonicalize shuffles according to rules:
28921 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
28922 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
28923 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
28924 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
28926 // The incoming shuffle must be of the same type as the result of the
28927 // current shuffle.
28928 assert(N1->getOperand(0).getValueType() == VT &&
28929 "Shuffle types don't match");
28930
28931 SDValue SV0 = N1->getOperand(0);
28932 SDValue SV1 = N1->getOperand(1);
28933 bool HasSameOp0 = N0 == SV0;
28934 bool IsSV1Undef = SV1.isUndef();
28935 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
28936 // Commute the operands of this shuffle so merging below will trigger.
28937 return DAG.getCommutedVectorShuffle(*SVN);
28938 }
28939
28940 // Canonicalize splat shuffles to the RHS to improve merging below.
28941 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
28942 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
28943 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
28944 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
28945 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
28946 return DAG.getCommutedVectorShuffle(*SVN);
28947 }
28948
28949 // Try to fold according to rules:
28950 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
28951 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
28952 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
28953 // Don't try to fold shuffles with illegal type.
28954 // Only fold if this shuffle is the only user of the other shuffle.
28955 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
28956 for (int i = 0; i != 2; ++i) {
28957 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
28958 N->isOnlyUserOf(N->getOperand(i).getNode())) {
28959 // The incoming shuffle must be of the same type as the result of the
28960 // current shuffle.
28961 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
28962 assert(OtherSV->getOperand(0).getValueType() == VT &&
28963 "Shuffle types don't match");
28964
28965 SDValue SV0, SV1;
28966 SmallVector<int, 4> Mask;
28967 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
28968 SV0, SV1, Mask)) {
28969 // Check if all indices in Mask are poison. In case, propagate poison.
28970 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
28971 return DAG.getPOISON(VT);
28972
28973 return DAG.getVectorShuffle(VT, SDLoc(N),
28974 SV0 ? SV0 : DAG.getPOISON(VT),
28975 SV1 ? SV1 : DAG.getPOISON(VT), Mask);
28976 }
28977 }
28978 }
28979
28980 // Merge shuffles through binops if we are able to merge it with at least
28981 // one other shuffles.
28982 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
28983 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
28984 unsigned SrcOpcode = N0.getOpcode();
28985 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
28986 (N1.isUndef() ||
28987 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode()) &&
28988 N0.getResNo() == N1.getResNo()))) {
28989 // Get binop source ops, or just pass on the undef.
28990 SDValue Op00 = N0.getOperand(0);
28991 SDValue Op01 = N0.getOperand(1);
28992 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
28993 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
28994 // TODO: We might be able to relax the VT check but we don't currently
28995 // have any isBinOp() that has different result/ops VTs so play safe until
28996 // we have test coverage.
28997 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
28998 Op01.getValueType() == VT && Op11.getValueType() == VT &&
28999 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
29000 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
29001 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
29002 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
29003 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
29004 SmallVectorImpl<int> &Mask, bool LeftOp,
29005 bool Commute) {
29006 SDValue InnerN = Commute ? N1 : N0;
29007 SDValue Op0 = LeftOp ? Op00 : Op01;
29008 SDValue Op1 = LeftOp ? Op10 : Op11;
29009 if (Commute)
29010 std::swap(Op0, Op1);
29011 // Only accept the merged shuffle if we don't introduce undef elements,
29012 // or the inner shuffle already contained undef elements.
29013 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
29014 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
29015 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
29016 Mask) &&
29017 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
29018 llvm::none_of(Mask, [](int M) { return M < 0; }));
29019 };
29020
29021 // Ensure we don't increase the number of shuffles - we must merge a
29022 // shuffle from at least one of the LHS and RHS ops.
29023 bool MergedLeft = false;
29024 SDValue LeftSV0, LeftSV1;
29025 SmallVector<int, 4> LeftMask;
29026 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
29027 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
29028 MergedLeft = true;
29029 } else {
29030 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
29031 LeftSV0 = Op00, LeftSV1 = Op10;
29032 }
29033
29034 bool MergedRight = false;
29035 SDValue RightSV0, RightSV1;
29036 SmallVector<int, 4> RightMask;
29037 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
29038 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
29039 MergedRight = true;
29040 } else {
29041 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
29042 RightSV0 = Op01, RightSV1 = Op11;
29043 }
29044
29045 if (MergedLeft || MergedRight) {
29046 SDLoc DL(N);
29048 VT, DL, LeftSV0 ? LeftSV0 : DAG.getPOISON(VT),
29049 LeftSV1 ? LeftSV1 : DAG.getPOISON(VT), LeftMask);
29051 VT, DL, RightSV0 ? RightSV0 : DAG.getPOISON(VT),
29052 RightSV1 ? RightSV1 : DAG.getPOISON(VT), RightMask);
29053 return DAG.getNode(SrcOpcode, DL, N0->getVTList(), LHS, RHS)
29054 .getValue(N0.getResNo());
29055 }
29056 }
29057 }
29058 }
29059
29060 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
29061 return V;
29062
29063 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
29064 // Perform this really late, because it could eliminate knowledge
29065 // of undef elements created by this shuffle.
29066 if (Level < AfterLegalizeTypes)
29067 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
29068 LegalOperations))
29069 return V;
29070
29071 return SDValue();
29072}
29073
29074SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
29075 EVT VT = N->getValueType(0);
29076 if (!VT.isFixedLengthVector())
29077 return SDValue();
29078
29079 // Try to convert a scalar binop with an extracted vector element to a vector
29080 // binop. This is intended to reduce potentially expensive register moves.
29081 // TODO: Check if both operands are extracted.
29082 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
29083 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
29084 SDValue Scalar = N->getOperand(0);
29085 unsigned Opcode = Scalar.getOpcode();
29086 EVT VecEltVT = VT.getScalarType();
29087 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
29088 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
29089 Scalar.getOperand(0).getValueType() == VecEltVT &&
29090 Scalar.getOperand(1).getValueType() == VecEltVT &&
29091 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
29092 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
29093 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
29094 // Match an extract element and get a shuffle mask equivalent.
29095 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
29096
29097 for (int i : {0, 1}) {
29098 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
29099 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
29100 SDValue EE = Scalar.getOperand(i);
29101 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
29102 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
29103 EE.getOperand(0).getValueType() == VT &&
29105 // Mask = {ExtractIndex, undef, undef....}
29106 ShufMask[0] = EE.getConstantOperandVal(1);
29107 // Make sure the shuffle is legal if we are crossing lanes.
29108 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
29109 SDLoc DL(N);
29110 SDValue V[] = {EE.getOperand(0),
29111 DAG.getConstant(C->getAPIntValue(), DL, VT)};
29112 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
29113 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getPOISON(VT),
29114 ShufMask);
29115 }
29116 }
29117 }
29118 }
29119
29120 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
29121 // with a VECTOR_SHUFFLE and possible truncate.
29122 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
29123 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
29124 return SDValue();
29125
29126 // If we have an implicit truncate, truncate here if it is legal.
29127 if (VecEltVT != Scalar.getValueType() &&
29128 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
29129 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
29130 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
29131 }
29132
29133 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
29134 if (!ExtIndexC)
29135 return SDValue();
29136
29137 SDValue SrcVec = Scalar.getOperand(0);
29138 EVT SrcVT = SrcVec.getValueType();
29139 unsigned SrcNumElts = SrcVT.getVectorNumElements();
29140 unsigned VTNumElts = VT.getVectorNumElements();
29141 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
29142 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
29143 SmallVector<int, 8> Mask(SrcNumElts, -1);
29144 Mask[0] = ExtIndexC->getZExtValue();
29145 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
29146 SrcVT, SDLoc(N), SrcVec, DAG.getPOISON(SrcVT), Mask, DAG);
29147 if (!LegalShuffle)
29148 return SDValue();
29149
29150 // If the initial vector is the same size, the shuffle is the result.
29151 if (VT == SrcVT)
29152 return LegalShuffle;
29153
29154 // If not, shorten the shuffled vector.
29155 if (VTNumElts != SrcNumElts) {
29156 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
29157 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
29158 SrcVT.getVectorElementType(), VTNumElts);
29159 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
29160 ZeroIdx);
29161 }
29162 }
29163
29164 return SDValue();
29165}
29166
29167SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
29168 EVT VT = N->getValueType(0);
29169 SDValue N0 = N->getOperand(0);
29170 SDValue N1 = N->getOperand(1);
29171 SDValue N2 = N->getOperand(2);
29172 uint64_t InsIdx = N->getConstantOperandVal(2);
29173
29174 // Remove insert of UNDEF/POISON.
29175 if (N1.isUndef()) {
29176 if (N1.getOpcode() == ISD::POISON || N0.getOpcode() == ISD::UNDEF)
29177 return N0;
29178 return DAG.getFreeze(N0);
29179 }
29180
29181 // If this is an insert of an extracted vector into an undef/poison vector, we
29182 // can just use the input to the extract if the types match, and can simplify
29183 // in some cases even if they don't.
29184 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
29185 N1.getOperand(1) == N2) {
29186 EVT N1VT = N1.getValueType();
29187 EVT SrcVT = N1.getOperand(0).getValueType();
29188 if (SrcVT == VT) {
29189 // Need to ensure that result isn't more poisonous if skipping both the
29190 // extract+insert.
29191 if (N0.getOpcode() == ISD::POISON)
29192 return N1.getOperand(0);
29193 if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
29194 unsigned SubVecNumElts = N1VT.getVectorNumElements();
29195 APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
29196 InsIdx + SubVecNumElts);
29197 if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
29198 return N1.getOperand(0);
29199 } else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
29200 return N1.getOperand(0);
29201 }
29202 // TODO: To remove the zero check, need to adjust the offset to
29203 // a multiple of the new src type.
29204 if (isNullConstant(N2)) {
29205 if (VT.knownBitsGE(SrcVT) &&
29206 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
29207 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
29208 VT, N0, N1.getOperand(0), N2);
29209 else if (VT.knownBitsLE(SrcVT) &&
29210 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
29211 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
29212 VT, N1.getOperand(0), N2);
29213 }
29214 }
29215
29216 // Handle case where we've ended up inserting back into the source vector
29217 // we extracted the subvector from.
29218 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
29219 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
29220 N1.getOperand(1) == N2)
29221 return N0;
29222
29223 // Simplify scalar inserts into an undef vector:
29224 // insert_subvector undef, (splat X), N2 -> splat X
29225 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
29226 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
29227 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
29228
29229 // insert_subvector (splat X), (splat X), N2 -> splat X
29230 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
29231 N0.getOperand(0) == N1.getOperand(0))
29232 return N0;
29233
29234 // If we are inserting a bitcast value into an undef, with the same
29235 // number of elements, just use the bitcast input of the extract.
29236 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
29237 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
29238 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
29240 N1.getOperand(0).getOperand(1) == N2 &&
29242 VT.getVectorElementCount() &&
29244 VT.getSizeInBits()) {
29245 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
29246 }
29247
29248 // If both N1 and N2 are bitcast values on which insert_subvector
29249 // would makes sense, pull the bitcast through.
29250 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
29251 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
29252 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
29253 SDValue CN0 = N0.getOperand(0);
29254 SDValue CN1 = N1.getOperand(0);
29255 EVT CN0VT = CN0.getValueType();
29256 EVT CN1VT = CN1.getValueType();
29257 if (CN0VT.isVector() && CN1VT.isVector() &&
29258 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
29260 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
29261 CN0.getValueType(), CN0, CN1, N2);
29262 return DAG.getBitcast(VT, NewINSERT);
29263 }
29264 }
29265
29266 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
29267 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
29268 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
29269 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
29270 N0.getOperand(1).getValueType() == N1.getValueType() &&
29271 N0.getOperand(2) == N2)
29272 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
29273 N1, N2);
29274
29275 // Eliminate an intermediate insert into an undef vector:
29276 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
29277 // insert_subvector undef, X, 0
29278 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
29279 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
29280 isNullConstant(N2))
29281 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
29282 N1.getOperand(1), N2);
29283
29284 // Push subvector bitcasts to the output, adjusting the index as we go.
29285 // insert_subvector(bitcast(v), bitcast(s), c1)
29286 // -> bitcast(insert_subvector(v, s, c2))
29287 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
29288 N1.getOpcode() == ISD::BITCAST) {
29289 SDValue N0Src = peekThroughBitcasts(N0);
29290 SDValue N1Src = peekThroughBitcasts(N1);
29291 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
29292 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
29293 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
29294 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
29295 EVT NewVT;
29296 SDLoc DL(N);
29297 SDValue NewIdx;
29298 LLVMContext &Ctx = *DAG.getContext();
29299 ElementCount NumElts = VT.getVectorElementCount();
29300 unsigned EltSizeInBits = VT.getScalarSizeInBits();
29301 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
29302 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
29303 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
29304 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
29305 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
29306 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
29307 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
29308 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
29309 NumElts.divideCoefficientBy(Scale));
29310 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
29311 }
29312 }
29313 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
29314 SDValue Res = DAG.getBitcast(NewVT, N0Src);
29315 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
29316 return DAG.getBitcast(VT, Res);
29317 }
29318 }
29319 }
29320
29321 // Canonicalize insert_subvector dag nodes.
29322 // Example:
29323 // (insert_subvector (insert_subvector A, Idx0), Idx1)
29324 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
29325 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
29326 N1.getValueType() == N0.getOperand(1).getValueType()) {
29327 unsigned OtherIdx = N0.getConstantOperandVal(2);
29328 if (InsIdx < OtherIdx) {
29329 // Swap nodes.
29330 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
29331 N0.getOperand(0), N1, N2);
29332 AddToWorklist(NewOp.getNode());
29333 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
29334 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
29335 }
29336 }
29337
29338 // If the input vector is a concatenation, and the insert replaces
29339 // one of the pieces, we can optimize into a single concat_vectors.
29340 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
29341 N0.getOperand(0).getValueType() == N1.getValueType() &&
29344 unsigned Factor = N1.getValueType().getVectorMinNumElements();
29346 Ops[InsIdx / Factor] = N1;
29347 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
29348 }
29349
29350 // Simplify source operands based on insertion.
29352 return SDValue(N, 0);
29353
29354 return SDValue();
29355}
29356
29357SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
29358 SDValue N0 = N->getOperand(0);
29359
29360 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
29361 if (N0->getOpcode() == ISD::FP16_TO_FP)
29362 return N0->getOperand(0);
29363
29364 return SDValue();
29365}
29366
29367SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
29368 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
29369 auto Op = N->getOpcode();
29371 "opcode should be FP16_TO_FP or BF16_TO_FP.");
29372 SDValue N0 = N->getOperand(0);
29373
29374 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
29375 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
29376 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
29377 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
29378 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
29379 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
29380 }
29381 }
29382
29383 if (SDValue CastEliminated = eliminateFPCastPair(N))
29384 return CastEliminated;
29385
29386 // Sometimes constants manage to survive very late in the pipeline, e.g.,
29387 // because they are wrapped inside the <1 x f16> type. Try one last time to
29388 // get rid of them.
29389 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
29390 N->getValueType(0), {N0});
29391 return Folded;
29392}
29393
29394SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
29395 SDValue N0 = N->getOperand(0);
29396
29397 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
29398 if (N0->getOpcode() == ISD::BF16_TO_FP)
29399 return N0->getOperand(0);
29400
29401 return SDValue();
29402}
29403
29404SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
29405 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
29406 return visitFP16_TO_FP(N);
29407}
29408
29409SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
29410 SDValue N0 = N->getOperand(0);
29411 EVT VT = N0.getValueType();
29412 unsigned Opcode = N->getOpcode();
29413
29414 // VECREDUCE over 1-element vector is just an extract.
29415 if (VT.getVectorElementCount().isScalar()) {
29416 SDLoc dl(N);
29417 SDValue Res =
29419 DAG.getVectorIdxConstant(0, dl));
29420 if (Res.getValueType() != N->getValueType(0))
29421 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
29422 return Res;
29423 }
29424
29425 // On an boolean vector an and/or reduction is the same as a umin/umax
29426 // reduction. Convert them if the latter is legal while the former isn't.
29427 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
29428 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
29430 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
29431 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
29433 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
29434 }
29435
29436 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
29437 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
29438 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
29439 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
29440 SDValue Vec = N0.getOperand(0);
29441 SDValue Subvec = N0.getOperand(1);
29442 if ((Opcode == ISD::VECREDUCE_OR &&
29443 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
29444 (Opcode == ISD::VECREDUCE_AND &&
29445 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
29446 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
29447 }
29448
29449 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
29450 // Same for zext and anyext, and for and/or/xor reductions.
29451 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
29452 Opcode == ISD::VECREDUCE_XOR) &&
29453 (N0.getOpcode() == ISD::SIGN_EXTEND ||
29454 N0.getOpcode() == ISD::ZERO_EXTEND ||
29455 N0.getOpcode() == ISD::ANY_EXTEND) &&
29456 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
29457 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
29459 N0.getOperand(0));
29460 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
29461 }
29462 return SDValue();
29463}
29464
29465SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
29466 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
29467
29468 // FSUB -> FMA combines:
29469 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
29470 AddToWorklist(Fused.getNode());
29471 return Fused;
29472 }
29473 return SDValue();
29474}
29475
29476SDValue DAGCombiner::visitVPOp(SDNode *N) {
29477
29478 if (N->getOpcode() == ISD::VP_GATHER)
29479 if (SDValue SD = visitVPGATHER(N))
29480 return SD;
29481
29482 if (N->getOpcode() == ISD::VP_SCATTER)
29483 if (SDValue SD = visitVPSCATTER(N))
29484 return SD;
29485
29486 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
29487 if (SDValue SD = visitVP_STRIDED_LOAD(N))
29488 return SD;
29489
29490 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
29491 if (SDValue SD = visitVP_STRIDED_STORE(N))
29492 return SD;
29493
29494 // VP operations in which all vector elements are disabled - either by
29495 // determining that the mask is all false or that the EVL is 0 - can be
29496 // eliminated.
29497 bool AreAllEltsDisabled = false;
29498 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
29499 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
29500 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
29501 AreAllEltsDisabled |=
29502 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
29503
29504 // This is the only generic VP combine we support for now.
29505 if (!AreAllEltsDisabled) {
29506 switch (N->getOpcode()) {
29507 case ISD::VP_FADD:
29508 return visitVP_FADD(N);
29509 case ISD::VP_FSUB:
29510 return visitVP_FSUB(N);
29511 case ISD::VP_FMA:
29512 return visitFMA<VPMatchContext>(N);
29513 case ISD::VP_SELECT:
29514 return visitVP_SELECT(N);
29515 case ISD::VP_MUL:
29516 return visitMUL<VPMatchContext>(N);
29517 case ISD::VP_SUB:
29518 return foldSubCtlzNot<VPMatchContext>(N, DAG);
29519 default:
29520 break;
29521 }
29522 return SDValue();
29523 }
29524
29525 // Binary operations can be replaced by UNDEF.
29526 if (ISD::isVPBinaryOp(N->getOpcode()))
29527 return DAG.getUNDEF(N->getValueType(0));
29528
29529 // VP Memory operations can be replaced by either the chain (stores) or the
29530 // chain + undef (loads).
29531 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
29532 if (MemSD->writeMem())
29533 return MemSD->getChain();
29534 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
29535 }
29536
29537 // Reduction operations return the start operand when no elements are active.
29538 if (ISD::isVPReduction(N->getOpcode()))
29539 return N->getOperand(0);
29540
29541 return SDValue();
29542}
29543
29544SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
29545 SDValue Chain = N->getOperand(0);
29546 SDValue Ptr = N->getOperand(1);
29547 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
29548
29549 // Check if the memory, where FP state is written to, is used only in a single
29550 // load operation.
29551 LoadSDNode *LdNode = nullptr;
29552 for (auto *U : Ptr->users()) {
29553 if (U == N)
29554 continue;
29555 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
29556 if (LdNode && LdNode != Ld)
29557 return SDValue();
29558 LdNode = Ld;
29559 continue;
29560 }
29561 return SDValue();
29562 }
29563 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
29564 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
29566 return SDValue();
29567
29568 // Check if the loaded value is used only in a store operation.
29569 StoreSDNode *StNode = nullptr;
29570 for (SDUse &U : LdNode->uses()) {
29571 if (U.getResNo() == 0) {
29572 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
29573 if (StNode)
29574 return SDValue();
29575 StNode = St;
29576 } else {
29577 return SDValue();
29578 }
29579 }
29580 }
29581 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
29582 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
29583 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
29584 return SDValue();
29585
29586 // Create new node GET_FPENV_MEM, which uses the store address to write FP
29587 // environment.
29588 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
29589 StNode->getMemOperand());
29590 CombineTo(StNode, Res, false);
29591 return Res;
29592}
29593
29594SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
29595 SDValue Chain = N->getOperand(0);
29596 SDValue Ptr = N->getOperand(1);
29597 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
29598
29599 // Check if the address of FP state is used also in a store operation only.
29600 StoreSDNode *StNode = nullptr;
29601 for (auto *U : Ptr->users()) {
29602 if (U == N)
29603 continue;
29604 if (auto *St = dyn_cast<StoreSDNode>(U)) {
29605 if (StNode && StNode != St)
29606 return SDValue();
29607 StNode = St;
29608 continue;
29609 }
29610 return SDValue();
29611 }
29612 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
29613 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
29614 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
29615 return SDValue();
29616
29617 // Check if the stored value is loaded from some location and the loaded
29618 // value is used only in the store operation.
29619 SDValue StValue = StNode->getValue();
29620 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
29621 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
29622 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
29623 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
29624 return SDValue();
29625
29626 // Create new node SET_FPENV_MEM, which uses the load address to read FP
29627 // environment.
29628 SDValue Res =
29629 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
29630 LdNode->getMemOperand());
29631 return Res;
29632}
29633
29634/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
29635/// with the destination vector and a zero vector.
29636/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
29637/// vector_shuffle V, Zero, <0, 4, 2, 4>
29638SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
29639 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
29640
29641 EVT VT = N->getValueType(0);
29642 SDValue LHS = N->getOperand(0);
29643 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
29644 SDLoc DL(N);
29645
29646 // Make sure we're not running after operation legalization where it
29647 // may have custom lowered the vector shuffles.
29648 if (LegalOperations)
29649 return SDValue();
29650
29651 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
29652 return SDValue();
29653
29654 EVT RVT = RHS.getValueType();
29655 unsigned NumElts = RHS.getNumOperands();
29656
29657 // Attempt to create a valid clear mask, splitting the mask into
29658 // sub elements and checking to see if each is
29659 // all zeros or all ones - suitable for shuffle masking.
29660 auto BuildClearMask = [&](int Split) {
29661 int NumSubElts = NumElts * Split;
29662 int NumSubBits = RVT.getScalarSizeInBits() / Split;
29663
29664 SmallVector<int, 8> Indices;
29665 for (int i = 0; i != NumSubElts; ++i) {
29666 int EltIdx = i / Split;
29667 int SubIdx = i % Split;
29668 SDValue Elt = RHS.getOperand(EltIdx);
29669 // X & undef --> 0 (not undef). So this lane must be converted to choose
29670 // from the zero constant vector (same as if the element had all 0-bits).
29671 if (Elt.isUndef()) {
29672 Indices.push_back(i + NumSubElts);
29673 continue;
29674 }
29675
29676 std::optional<APInt> Bits = Elt->bitcastToAPInt();
29677 if (!Bits)
29678 return SDValue();
29679
29680 // Extract the sub element from the constant bit mask.
29681 if (DAG.getDataLayout().isBigEndian())
29682 *Bits =
29683 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
29684 else
29685 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
29686
29687 if (Bits->isAllOnes())
29688 Indices.push_back(i);
29689 else if (*Bits == 0)
29690 Indices.push_back(i + NumSubElts);
29691 else
29692 return SDValue();
29693 }
29694
29695 // Let's see if the target supports this vector_shuffle.
29696 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
29697 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
29698 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
29699 return SDValue();
29700
29701 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
29702 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
29703 DAG.getBitcast(ClearVT, LHS),
29704 Zero, Indices));
29705 };
29706
29707 // Determine maximum split level (byte level masking).
29708 int MaxSplit = 1;
29709 if (RVT.getScalarSizeInBits() % 8 == 0)
29710 MaxSplit = RVT.getScalarSizeInBits() / 8;
29711
29712 for (int Split = 1; Split <= MaxSplit; ++Split)
29713 if (RVT.getScalarSizeInBits() % Split == 0)
29714 if (SDValue S = BuildClearMask(Split))
29715 return S;
29716
29717 return SDValue();
29718}
29719
29720/// If a vector binop is performed on splat values, it may be profitable to
29721/// extract, scalarize, and insert/splat.
29723 const SDLoc &DL, bool LegalTypes) {
29724 SDValue N0 = N->getOperand(0);
29725 SDValue N1 = N->getOperand(1);
29726 unsigned Opcode = N->getOpcode();
29727 EVT VT = N->getValueType(0);
29728 EVT EltVT = VT.getVectorElementType();
29729 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
29730
29731 // TODO: Remove/replace the extract cost check? If the elements are available
29732 // as scalars, then there may be no extract cost. Should we ask if
29733 // inserting a scalar back into a vector is cheap instead?
29734 int Index0, Index1;
29735 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
29736 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
29737 // Extract element from splat_vector should be free.
29738 // TODO: use DAG.isSplatValue instead?
29739 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
29741 if (!Src0 || !Src1 || Index0 != Index1 ||
29742 Src0.getValueType().getVectorElementType() != EltVT ||
29743 Src1.getValueType().getVectorElementType() != EltVT ||
29744 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
29745 // If before type legalization, allow scalar types that will eventually be
29746 // made legal.
29748 Opcode, LegalTypes
29749 ? EltVT
29750 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
29751 return SDValue();
29752
29753 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
29754 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
29755 return SDValue();
29756
29757 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
29758 // All but one element should have an undef input, which will fold to a
29759 // constant or undef. Avoid splatting which would over-define potentially
29760 // undefined elements.
29761
29762 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
29763 // build_vec ..undef, (bo X, Y), undef...
29764 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
29765 DAG.ExtractVectorElements(Src0, EltsX);
29766 DAG.ExtractVectorElements(Src1, EltsY);
29767
29768 for (auto [X, Y] : zip(EltsX, EltsY))
29769 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
29770 return DAG.getBuildVector(VT, DL, EltsResult);
29771 }
29772
29773 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
29774 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
29775 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
29776 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
29777
29778 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
29779 return DAG.getSplat(VT, DL, ScalarBO);
29780}
29781
29782/// Visit a vector cast operation, like FP_EXTEND.
29783SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
29784 EVT VT = N->getValueType(0);
29785 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
29786 EVT EltVT = VT.getVectorElementType();
29787 unsigned Opcode = N->getOpcode();
29788
29789 SDValue N0 = N->getOperand(0);
29790 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
29791
29792 // TODO: promote operation might be also good here?
29793 int Index0;
29794 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
29795 if (Src0 &&
29796 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
29797 TLI.isExtractVecEltCheap(VT, Index0)) &&
29798 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
29799 TLI.preferScalarizeSplat(N)) {
29800 EVT SrcVT = N0.getValueType();
29801 EVT SrcEltVT = SrcVT.getVectorElementType();
29802 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
29803 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
29804 SDValue Elt =
29805 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
29806 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
29807 if (VT.isScalableVector())
29808 return DAG.getSplatVector(VT, DL, ScalarBO);
29810 return DAG.getBuildVector(VT, DL, Ops);
29811 }
29812 }
29813
29814 return SDValue();
29815}
29816
29817/// Visit a binary vector operation, like ADD.
29818SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
29819 EVT VT = N->getValueType(0);
29820 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
29821
29822 SDValue LHS = N->getOperand(0);
29823 SDValue RHS = N->getOperand(1);
29824 unsigned Opcode = N->getOpcode();
29825 SDNodeFlags Flags = N->getFlags();
29826
29827 // Move unary shuffles with identical masks after a vector binop:
29828 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
29829 // --> shuffle (VBinOp A, B), Undef, Mask
29830 // This does not require type legality checks because we are creating the
29831 // same types of operations that are in the original sequence. We do have to
29832 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
29833 // though. This code is adapted from the identical transform in instcombine.
29834 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
29835 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
29836 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
29837 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
29838 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
29839 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
29840 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
29841 RHS.getOperand(0), Flags);
29842 SDValue UndefV = LHS.getOperand(1);
29843 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
29844 }
29845
29846 // Try to sink a splat shuffle after a binop with a uniform constant.
29847 // This is limited to cases where neither the shuffle nor the constant have
29848 // undefined elements because that could be poison-unsafe or inhibit
29849 // demanded elements analysis. It is further limited to not change a splat
29850 // of an inserted scalar because that may be optimized better by
29851 // load-folding or other target-specific behaviors.
29852 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
29853 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
29854 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
29855 // binop (splat X), (splat C) --> splat (binop X, C)
29856 SDValue X = Shuf0->getOperand(0);
29857 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
29858 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getPOISON(VT),
29859 Shuf0->getMask());
29860 }
29861 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
29862 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
29863 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
29864 // binop (splat C), (splat X) --> splat (binop C, X)
29865 SDValue X = Shuf1->getOperand(0);
29866 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
29867 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getPOISON(VT),
29868 Shuf1->getMask());
29869 }
29870 }
29871
29872 // The following pattern is likely to emerge with vector reduction ops. Moving
29873 // the binary operation ahead of insertion may allow using a narrower vector
29874 // instruction that has better performance than the wide version of the op:
29875 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
29876 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
29877 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
29878 LHS.getOperand(2) == RHS.getOperand(2) &&
29879 (LHS.hasOneUse() || RHS.hasOneUse())) {
29880 SDValue X = LHS.getOperand(1);
29881 SDValue Y = RHS.getOperand(1);
29882 SDValue Z = LHS.getOperand(2);
29883 EVT NarrowVT = X.getValueType();
29884 if (NarrowVT == Y.getValueType() &&
29885 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
29886 LegalOperations)) {
29887 // (binop undef, undef) may not return undef, so compute that result.
29888 SDValue VecC =
29889 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
29890 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
29891 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
29892 }
29893 }
29894
29895 // Make sure all but the first op are undef or constant.
29896 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
29897 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
29898 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
29899 return Op.isUndef() ||
29900 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
29901 });
29902 };
29903
29904 // The following pattern is likely to emerge with vector reduction ops. Moving
29905 // the binary operation ahead of the concat may allow using a narrower vector
29906 // instruction that has better performance than the wide version of the op:
29907 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
29908 // concat (VBinOp X, Y), VecC
29909 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
29910 (LHS.hasOneUse() || RHS.hasOneUse())) {
29911 EVT NarrowVT = LHS.getOperand(0).getValueType();
29912 if (NarrowVT == RHS.getOperand(0).getValueType() &&
29913 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
29914 unsigned NumOperands = LHS.getNumOperands();
29915 SmallVector<SDValue, 4> ConcatOps;
29916 for (unsigned i = 0; i != NumOperands; ++i) {
29917 // This constant fold for operands 1 and up.
29918 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
29919 RHS.getOperand(i)));
29920 }
29921
29922 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
29923 }
29924 }
29925
29926 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
29927 return V;
29928
29929 return SDValue();
29930}
29931
29932SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
29933 SDValue N2) {
29934 assert(N0.getOpcode() == ISD::SETCC &&
29935 "First argument must be a SetCC node!");
29936
29937 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
29938 cast<CondCodeSDNode>(N0.getOperand(2))->get());
29939
29940 // If we got a simplified select_cc node back from SimplifySelectCC, then
29941 // break it down into a new SETCC node, and a new SELECT node, and then return
29942 // the SELECT node, since we were called with a SELECT node.
29943 if (SCC.getNode()) {
29944 // Check to see if we got a select_cc back (to turn into setcc/select).
29945 // Otherwise, just return whatever node we got back, like fabs.
29946 if (SCC.getOpcode() == ISD::SELECT_CC) {
29947 const SDNodeFlags Flags = N0->getFlags();
29948 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
29949 N0.getValueType(),
29950 SCC.getOperand(0), SCC.getOperand(1),
29951 SCC.getOperand(4), Flags);
29952 AddToWorklist(SETCC.getNode());
29953 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
29954 SCC.getOperand(2), SCC.getOperand(3), Flags);
29955 }
29956
29957 return SCC;
29958 }
29959 return SDValue();
29960}
29961
29962/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
29963/// being selected between, see if we can simplify the select. Callers of this
29964/// should assume that TheSelect is deleted if this returns true. As such, they
29965/// should return the appropriate thing (e.g. the node) back to the top-level of
29966/// the DAG combiner loop to avoid it being looked at.
29967bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
29968 SDValue RHS) {
29969 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
29970 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
29971 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
29972 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
29973 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
29974 SDValue Sqrt = RHS;
29975 ISD::CondCode CC;
29976 SDValue CmpLHS;
29977 const ConstantFPSDNode *Zero = nullptr;
29978
29979 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
29980 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
29981 CmpLHS = TheSelect->getOperand(0);
29982 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
29983 } else {
29984 // SELECT or VSELECT
29985 SDValue Cmp = TheSelect->getOperand(0);
29986 if (Cmp.getOpcode() == ISD::SETCC) {
29987 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
29988 CmpLHS = Cmp.getOperand(0);
29989 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
29990 }
29991 }
29992 if (Zero && Zero->isZero() &&
29993 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
29994 CC == ISD::SETULT || CC == ISD::SETLT)) {
29995 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
29996 CombineTo(TheSelect, Sqrt);
29997 return true;
29998 }
29999 }
30000 }
30001 // Cannot simplify select with vector condition
30002 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
30003
30004 // If this is a select from two identical things, try to pull the operation
30005 // through the select.
30006 if (LHS.getOpcode() != RHS.getOpcode() ||
30007 !LHS.hasOneUse() || !RHS.hasOneUse())
30008 return false;
30009
30010 // If this is a load and the token chain is identical, replace the select
30011 // of two loads with a load through a select of the address to load from.
30012 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
30013 // constants have been dropped into the constant pool.
30014 if (LHS.getOpcode() == ISD::LOAD) {
30015 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
30016 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
30017
30018 // Token chains must be identical.
30019 if (LHS.getOperand(0) != RHS.getOperand(0) ||
30020 // Do not let this transformation reduce the number of volatile loads.
30021 // Be conservative for atomics for the moment
30022 // TODO: This does appear to be legal for unordered atomics (see D66309)
30023 !LLD->isSimple() || !RLD->isSimple() ||
30024 // FIXME: If either is a pre/post inc/dec load,
30025 // we'd need to split out the address adjustment.
30026 LLD->isIndexed() || RLD->isIndexed() ||
30027 // If this is an EXTLOAD, the VT's must match.
30028 LLD->getMemoryVT() != RLD->getMemoryVT() ||
30029 // If this is an EXTLOAD, the kind of extension must match.
30030 (LLD->getExtensionType() != RLD->getExtensionType() &&
30031 // The only exception is if one of the extensions is anyext.
30032 LLD->getExtensionType() != ISD::EXTLOAD &&
30033 RLD->getExtensionType() != ISD::EXTLOAD) ||
30034 // FIXME: this discards src value information. This is
30035 // over-conservative. It would be beneficial to be able to remember
30036 // both potential memory locations. Since we are discarding
30037 // src value info, don't do the transformation if the memory
30038 // locations are not in the same address space.
30039 LLD->getPointerInfo().getAddrSpace() !=
30040 RLD->getPointerInfo().getAddrSpace() ||
30041 // We can't produce a CMOV of a TargetFrameIndex since we won't
30042 // generate the address generation required.
30045 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
30046 LLD->getBasePtr().getValueType()))
30047 return false;
30048
30049 // The loads must not depend on one another.
30050 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
30051 return false;
30052
30053 // Check that the select condition doesn't reach either load. If so,
30054 // folding this will induce a cycle into the DAG. If not, this is safe to
30055 // xform, so create a select of the addresses.
30056
30057 SmallPtrSet<const SDNode *, 32> Visited;
30059
30060 // Always fail if LLD and RLD are not independent. TheSelect is a
30061 // predecessor to all Nodes in question so we need not search past it.
30062
30063 Visited.insert(TheSelect);
30064 Worklist.push_back(LLD);
30065 Worklist.push_back(RLD);
30066
30067 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
30068 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
30069 return false;
30070
30071 SDValue Addr;
30072 if (TheSelect->getOpcode() == ISD::SELECT) {
30073 // We cannot do this optimization if any pair of {RLD, LLD} is a
30074 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
30075 // Loads, we only need to check if CondNode is a successor to one of the
30076 // loads. We can further avoid this if there's no use of their chain
30077 // value.
30078 SDNode *CondNode = TheSelect->getOperand(0).getNode();
30079 Worklist.push_back(CondNode);
30080
30081 if ((LLD->hasAnyUseOfValue(1) &&
30082 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
30083 (RLD->hasAnyUseOfValue(1) &&
30084 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
30085 return false;
30086
30087 Addr = DAG.getSelect(SDLoc(TheSelect),
30088 LLD->getBasePtr().getValueType(),
30089 TheSelect->getOperand(0), LLD->getBasePtr(),
30090 RLD->getBasePtr());
30091 } else { // Otherwise SELECT_CC
30092 // We cannot do this optimization if any pair of {RLD, LLD} is a
30093 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
30094 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
30095 // one of the loads. We can further avoid this if there's no use of their
30096 // chain value.
30097
30098 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
30099 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
30100 Worklist.push_back(CondLHS);
30101 Worklist.push_back(CondRHS);
30102
30103 if ((LLD->hasAnyUseOfValue(1) &&
30104 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
30105 (RLD->hasAnyUseOfValue(1) &&
30106 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
30107 return false;
30108
30109 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
30110 LLD->getBasePtr().getValueType(),
30111 TheSelect->getOperand(0),
30112 TheSelect->getOperand(1),
30113 LLD->getBasePtr(), RLD->getBasePtr(),
30114 TheSelect->getOperand(4));
30115 }
30116
30117 SDValue Load;
30118 // It is safe to replace the two loads if they have different alignments,
30119 // but the new load must be the minimum (most restrictive) alignment of the
30120 // inputs.
30121 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
30122 unsigned AddrSpace = LLD->getAddressSpace();
30123 assert(AddrSpace == RLD->getAddressSpace());
30124
30125 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
30126 if (!RLD->isInvariant())
30127 MMOFlags &= ~MachineMemOperand::MOInvariant;
30128 if (!RLD->isDereferenceable())
30129 MMOFlags &= ~MachineMemOperand::MODereferenceable;
30130 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
30131 // FIXME: Discards pointer and AA info.
30132 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
30133 LLD->getChain(), Addr, MachinePointerInfo(AddrSpace),
30134 Alignment, MMOFlags);
30135 } else {
30136 // FIXME: Discards pointer and AA info.
30137 Load = DAG.getExtLoad(
30139 : LLD->getExtensionType(),
30140 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
30141 MachinePointerInfo(AddrSpace), LLD->getMemoryVT(), Alignment,
30142 MMOFlags);
30143 }
30144
30145 // Users of the select now use the result of the load.
30146 CombineTo(TheSelect, Load);
30147
30148 // Users of the old loads now use the new load's chain. We know the
30149 // old-load value is dead now.
30150 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
30151 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
30152 return true;
30153 }
30154
30155 return false;
30156}
30157
30158/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
30159/// bitwise 'and'.
30160SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
30161 SDValue N1, SDValue N2, SDValue N3,
30162 ISD::CondCode CC) {
30163 // If this is a select where the false operand is zero and the compare is a
30164 // check of the sign bit, see if we can perform the "gzip trick":
30165 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
30166 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
30167 EVT XType = N0.getValueType();
30168 EVT AType = N2.getValueType();
30169 if (!isNullConstant(N3) || !XType.bitsGE(AType))
30170 return SDValue();
30171
30172 // If the comparison is testing for a positive value, we have to invert
30173 // the sign bit mask, so only do that transform if the target has a bitwise
30174 // 'and not' instruction (the invert is free).
30175 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
30176 // (X > -1) ? A : 0
30177 // (X > 0) ? X : 0 <-- This is canonical signed max.
30178 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
30179 return SDValue();
30180 } else if (CC == ISD::SETLT) {
30181 // (X < 0) ? A : 0
30182 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
30183 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
30184 return SDValue();
30185 } else {
30186 return SDValue();
30187 }
30188
30189 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
30190 // constant.
30191 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
30192 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
30193 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
30194 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
30195 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
30196 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
30197 AddToWorklist(Shift.getNode());
30198
30199 if (XType.bitsGT(AType)) {
30200 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
30201 AddToWorklist(Shift.getNode());
30202 }
30203
30204 if (CC == ISD::SETGT)
30205 Shift = DAG.getNOT(DL, Shift, AType);
30206
30207 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
30208 }
30209 }
30210
30211 unsigned ShCt = XType.getSizeInBits() - 1;
30212 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
30213 return SDValue();
30214
30215 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
30216 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
30217 AddToWorklist(Shift.getNode());
30218
30219 if (XType.bitsGT(AType)) {
30220 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
30221 AddToWorklist(Shift.getNode());
30222 }
30223
30224 if (CC == ISD::SETGT)
30225 Shift = DAG.getNOT(DL, Shift, AType);
30226
30227 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
30228}
30229
30230// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
30231SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
30232 SDValue N0 = N->getOperand(0);
30233 SDValue N1 = N->getOperand(1);
30234 SDValue N2 = N->getOperand(2);
30235 SDLoc DL(N);
30236
30237 unsigned BinOpc = N1.getOpcode();
30238 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
30239 (N1.getResNo() != N2.getResNo()))
30240 return SDValue();
30241
30242 // The use checks are intentionally on SDNode because we may be dealing
30243 // with opcodes that produce more than one SDValue.
30244 // TODO: Do we really need to check N0 (the condition operand of the select)?
30245 // But removing that clause could cause an infinite loop...
30246 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
30247 return SDValue();
30248
30249 // Binops may include opcodes that return multiple values, so all values
30250 // must be created/propagated from the newly created binops below.
30251 SDVTList OpVTs = N1->getVTList();
30252
30253 // Fold select(cond, binop(x, y), binop(z, y))
30254 // --> binop(select(cond, x, z), y)
30255 if (N1.getOperand(1) == N2.getOperand(1)) {
30256 SDValue N10 = N1.getOperand(0);
30257 SDValue N20 = N2.getOperand(0);
30258 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
30259 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
30260 SDValue NewBinOp =
30261 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
30262 return SDValue(NewBinOp.getNode(), N1.getResNo());
30263 }
30264
30265 // Fold select(cond, binop(x, y), binop(x, z))
30266 // --> binop(x, select(cond, y, z))
30267 if (N1.getOperand(0) == N2.getOperand(0)) {
30268 SDValue N11 = N1.getOperand(1);
30269 SDValue N21 = N2.getOperand(1);
30270 // Second op VT might be different (e.g. shift amount type)
30271 if (N11.getValueType() == N21.getValueType()) {
30272 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
30273 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
30274 SDValue NewBinOp =
30275 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
30276 return SDValue(NewBinOp.getNode(), N1.getResNo());
30277 }
30278 }
30279
30280 // TODO: Handle isCommutativeBinOp patterns as well?
30281 return SDValue();
30282}
30283
30284// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
30285SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
30286 SDValue N0 = N->getOperand(0);
30287 EVT VT = N->getValueType(0);
30288 bool IsFabs = N->getOpcode() == ISD::FABS;
30289 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
30290
30291 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
30292 return SDValue();
30293
30294 SDValue Int = N0.getOperand(0);
30295 EVT IntVT = Int.getValueType();
30296
30297 // The operand to cast should be integer.
30298 if (!IntVT.isInteger() || IntVT.isVector())
30299 return SDValue();
30300
30301 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
30302 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
30303 APInt SignMask;
30304 if (N0.getValueType().isVector()) {
30305 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
30306 // 0x7f...) per element and splat it.
30308 if (IsFabs)
30309 SignMask = ~SignMask;
30310 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
30311 } else {
30312 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
30313 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
30314 if (IsFabs)
30315 SignMask = ~SignMask;
30316 }
30317 SDLoc DL(N0);
30318 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
30319 DAG.getConstant(SignMask, DL, IntVT));
30320 AddToWorklist(Int.getNode());
30321 return DAG.getBitcast(VT, Int);
30322}
30323
30324/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
30325/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
30326/// in it. This may be a win when the constant is not otherwise available
30327/// because it replaces two constant pool loads with one.
30328SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
30329 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
30330 ISD::CondCode CC) {
30332 return SDValue();
30333
30334 // If we are before legalize types, we want the other legalization to happen
30335 // first (for example, to avoid messing with soft float).
30336 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
30337 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
30338 EVT VT = N2.getValueType();
30339 if (!TV || !FV || !TLI.isTypeLegal(VT))
30340 return SDValue();
30341
30342 // If a constant can be materialized without loads, this does not make sense.
30344 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
30345 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
30346 return SDValue();
30347
30348 // If both constants have multiple uses, then we won't need to do an extra
30349 // load. The values are likely around in registers for other users.
30350 if (!TV->hasOneUse() && !FV->hasOneUse())
30351 return SDValue();
30352
30353 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
30354 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
30355 Type *FPTy = Elts[0]->getType();
30356 const DataLayout &TD = DAG.getDataLayout();
30357
30358 // Create a ConstantArray of the two constants.
30359 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
30360 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
30361 TD.getPrefTypeAlign(FPTy));
30362 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
30363
30364 // Get offsets to the 0 and 1 elements of the array, so we can select between
30365 // them.
30366 SDValue Zero = DAG.getIntPtrConstant(0, DL);
30367 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
30368 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
30369 SDValue Cond =
30370 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
30371 AddToWorklist(Cond.getNode());
30372 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
30373 AddToWorklist(CstOffset.getNode());
30374 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
30375 AddToWorklist(CPIdx.getNode());
30376 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
30378 DAG.getMachineFunction()), Alignment);
30379}
30380
30381/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
30382/// where 'cond' is the comparison specified by CC.
30383SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
30384 SDValue N2, SDValue N3, ISD::CondCode CC,
30385 bool NotExtCompare) {
30386 // (x ? y : y) -> y.
30387 if (N2 == N3) return N2;
30388
30389 EVT CmpOpVT = N0.getValueType();
30390 EVT CmpResVT = getSetCCResultType(CmpOpVT);
30391 EVT VT = N2.getValueType();
30392 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
30393 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
30394 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
30395
30396 // Determine if the condition we're dealing with is constant.
30397 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
30398 AddToWorklist(SCC.getNode());
30399 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
30400 // fold select_cc true, x, y -> x
30401 // fold select_cc false, x, y -> y
30402 return !(SCCC->isZero()) ? N2 : N3;
30403 }
30404 }
30405
30406 if (SDValue V =
30407 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
30408 return V;
30409
30410 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
30411 return V;
30412
30413 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
30414 // where y is has a single bit set.
30415 // A plaintext description would be, we can turn the SELECT_CC into an AND
30416 // when the condition can be materialized as an all-ones register. Any
30417 // single bit-test can be materialized as an all-ones register with
30418 // shift-left and shift-right-arith.
30419 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
30420 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
30421 SDValue AndLHS = N0->getOperand(0);
30422 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
30423 if (ConstAndRHS && ConstAndRHS->getAPIntValue().isPowerOf2()) {
30424 // Shift the tested bit over the sign bit.
30425 const APInt &AndMask = ConstAndRHS->getAPIntValue();
30426 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
30427 unsigned ShCt = AndMask.getBitWidth() - 1;
30428 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
30429 SDLoc(AndLHS));
30430 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
30431
30432 // Now arithmetic right shift it all the way over, so the result is
30433 // either all-ones, or zero.
30434 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
30435 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
30436
30437 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
30438 }
30439 }
30440 }
30441
30442 // fold select C, 16, 0 -> shl C, 4
30443 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
30444 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
30445
30446 if ((Fold || Swap) &&
30447 TLI.getBooleanContents(CmpOpVT) ==
30449 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
30451
30452 if (Swap) {
30453 CC = ISD::getSetCCInverse(CC, CmpOpVT);
30454 std::swap(N2C, N3C);
30455 }
30456
30457 // If the caller doesn't want us to simplify this into a zext of a compare,
30458 // don't do it.
30459 if (NotExtCompare && N2C->isOne())
30460 return SDValue();
30461
30462 SDValue Temp, SCC;
30463 // zext (setcc n0, n1)
30464 if (LegalTypes) {
30465 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
30466 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
30467 } else {
30468 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
30469 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
30470 }
30471
30472 AddToWorklist(SCC.getNode());
30473 AddToWorklist(Temp.getNode());
30474
30475 if (N2C->isOne())
30476 return Temp;
30477
30478 unsigned ShCt = N2C->getAPIntValue().logBase2();
30479 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
30480 return SDValue();
30481
30482 // shl setcc result by log2 n2c
30483 return DAG.getNode(
30484 ISD::SHL, DL, N2.getValueType(), Temp,
30485 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
30486 }
30487
30488 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
30489 // select_cc seteq X, 0, sizeof(X), ctlz_zero_poison(X) -> ctlz(X)
30490 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
30491 // select_cc seteq X, 0, sizeof(X), cttz_zero_poison(X) -> cttz(X)
30492 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
30493 // select_cc setne X, 0, ctlz_zero_poison(X), sizeof(X) -> ctlz(X)
30494 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
30495 // select_cc setne X, 0, cttz_zero_poison(X), sizeof(X) -> cttz(X)
30496 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
30497 SDValue ValueOnZero = N2;
30498 SDValue Count = N3;
30499 // If the condition is NE instead of E, swap the operands.
30500 if (CC == ISD::SETNE)
30501 std::swap(ValueOnZero, Count);
30502 // Check if the value on zero is a constant equal to the bits in the type.
30503 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
30504 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
30505 // If the other operand is cttz/cttz_zero_poison of N0, and cttz is
30506 // legal, combine to just cttz.
30507 if ((Count.getOpcode() == ISD::CTTZ ||
30508 Count.getOpcode() == ISD::CTTZ_ZERO_POISON) &&
30509 N0 == Count.getOperand(0) &&
30510 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
30511 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
30512 // If the other operand is ctlz/ctlz_zero_poison of N0, and ctlz is
30513 // legal, combine to just ctlz.
30514 if ((Count.getOpcode() == ISD::CTLZ ||
30515 Count.getOpcode() == ISD::CTLZ_ZERO_POISON) &&
30516 N0 == Count.getOperand(0) &&
30517 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
30518 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
30519 }
30520 }
30521 }
30522
30523 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
30524 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
30525 if (!NotExtCompare && N1C && N2C && N3C &&
30526 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
30527 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
30528 (N1C->isZero() && CC == ISD::SETLT)) &&
30529 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
30530 SDValue ASHR =
30531 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
30533 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
30534 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
30535 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
30536 }
30537
30538 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
30539 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
30540 N2C->isOne() && N3C->isAllOnes() &&
30541 !TLI.shouldAvoidTransformToShift(CmpOpVT,
30542 CmpOpVT.getScalarSizeInBits() - 1)) {
30543 SDValue ASHR =
30544 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
30546 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
30547 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
30548 DAG.getConstant(1, DL, VT));
30549 }
30550
30551 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
30552 return S;
30553 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
30554 return S;
30555 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
30556 return ABD;
30557
30558 return SDValue();
30559}
30560
30562 const TargetLowering &TLI) {
30563 // Match a pattern such as:
30564 // (X | (X >> C0) | (X >> C1) | ...) & Mask
30565 // This extracts contiguous parts of X and ORs them together before comparing.
30566 // We can optimize this so that we directly check (X & SomeMask) instead,
30567 // eliminating the shifts.
30568
30569 EVT VT = Root.getValueType();
30570
30571 // TODO: Support vectors?
30572 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
30573 return SDValue();
30574
30575 SDValue N0 = Root.getOperand(0);
30576 SDValue N1 = Root.getOperand(1);
30577
30578 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
30579 return SDValue();
30580
30581 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
30582
30583 SDValue Src;
30584 const auto IsSrc = [&](SDValue V) {
30585 if (!Src) {
30586 Src = V;
30587 return true;
30588 }
30589
30590 return Src == V;
30591 };
30592
30593 SmallVector<SDValue> Worklist = {N0};
30594 APInt PartsMask(VT.getSizeInBits(), 0);
30595 while (!Worklist.empty()) {
30596 SDValue V = Worklist.pop_back_val();
30597 if (!V.hasOneUse() && (Src && Src != V))
30598 return SDValue();
30599
30600 if (V.getOpcode() == ISD::OR) {
30601 Worklist.push_back(V.getOperand(0));
30602 Worklist.push_back(V.getOperand(1));
30603 continue;
30604 }
30605
30606 if (V.getOpcode() == ISD::SRL) {
30607 SDValue ShiftSrc = V.getOperand(0);
30608 SDValue ShiftAmt = V.getOperand(1);
30609
30610 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
30611 return SDValue();
30612
30613 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
30614 if (ShiftAmtVal > RootMask.getBitWidth())
30615 return SDValue();
30616
30617 PartsMask |= (RootMask << ShiftAmtVal);
30618 continue;
30619 }
30620
30621 if (IsSrc(V)) {
30622 PartsMask |= RootMask;
30623 continue;
30624 }
30625
30626 return SDValue();
30627 }
30628
30629 if (!Src)
30630 return SDValue();
30631
30632 SDLoc DL(Root);
30633 return DAG.getNode(ISD::AND, DL, VT,
30634 {Src, DAG.getConstant(PartsMask, DL, VT)});
30635}
30636
30637/// This is a stub for TargetLowering::SimplifySetCC.
30638SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
30639 ISD::CondCode Cond, const SDLoc &DL,
30640 bool foldBooleans) {
30641 TargetLowering::DAGCombinerInfo
30642 DagCombineInfo(DAG, Level, false, this);
30643 if (SDValue C =
30644 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
30645 return C;
30646
30648 isNullConstant(N1)) {
30649
30650 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
30651 return DAG.getSetCC(DL, VT, Res, N1, Cond);
30652 }
30653
30654 return SDValue();
30655}
30656
30657/// Given an ISD::SDIV node expressing a divide by constant, return
30658/// a DAG expression to select that will generate the same value by multiplying
30659/// by a magic number.
30660/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
30661SDValue DAGCombiner::BuildSDIV(SDNode *N) {
30662 // when optimising for minimum size, we don't want to expand a div to a mul
30663 // and a shift.
30665 return SDValue();
30666
30668 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
30669 for (SDNode *N : Built)
30670 AddToWorklist(N);
30671 return S;
30672 }
30673
30674 return SDValue();
30675}
30676
30677/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
30678/// DAG expression that will generate the same value by right shifting.
30679SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
30680 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
30681 if (!C)
30682 return SDValue();
30683
30684 // Avoid division by zero.
30685 if (C->isZero())
30686 return SDValue();
30687
30689 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
30690 for (SDNode *N : Built)
30691 AddToWorklist(N);
30692 return S;
30693 }
30694
30695 return SDValue();
30696}
30697
30698/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
30699/// expression that will generate the same value by multiplying by a magic
30700/// number.
30701/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
30702SDValue DAGCombiner::BuildUDIV(SDNode *N) {
30703 // when optimising for minimum size, we don't want to expand a div to a mul
30704 // and a shift.
30706 return SDValue();
30707
30709 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
30710 for (SDNode *N : Built)
30711 AddToWorklist(N);
30712 return S;
30713 }
30714
30715 return SDValue();
30716}
30717
30718/// Given an ISD::SREM node expressing a remainder by constant power of 2,
30719/// return a DAG expression that will generate the same value.
30720SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
30721 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
30722 if (!C)
30723 return SDValue();
30724
30725 // Avoid division by zero.
30726 if (C->isZero())
30727 return SDValue();
30728
30730 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
30731 for (SDNode *N : Built)
30732 AddToWorklist(N);
30733 return S;
30734 }
30735
30736 return SDValue();
30737}
30738
30739// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
30740//
30741// Returns the node that represents `Log2(Op)`. This may create a new node. If
30742// we are unable to compute `Log2(Op)` its return `SDValue()`.
30743//
30744// All nodes will be created at `DL` and the output will be of type `VT`.
30745//
30746// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
30747// `AssumeNonZero` if this function should simply assume (not require proving
30748// `Op` is non-zero).
30750 SDValue Op, unsigned Depth,
30751 bool AssumeNonZero) {
30752 assert(VT.isInteger() && "Only integer types are supported!");
30753
30754 auto PeekThroughCastsAndTrunc = [](SDValue V) {
30755 while (true) {
30756 switch (V.getOpcode()) {
30757 case ISD::TRUNCATE:
30758 case ISD::ZERO_EXTEND:
30759 V = V.getOperand(0);
30760 break;
30761 default:
30762 return V;
30763 }
30764 }
30765 };
30766
30767 if (VT.isScalableVector())
30768 return SDValue();
30769
30770 Op = PeekThroughCastsAndTrunc(Op);
30771
30772 // Helper for determining whether a value is a power-2 constant scalar or a
30773 // vector of such elements.
30774 SmallVector<APInt> Pow2Constants;
30775 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
30776 if (C->isZero() || C->isOpaque())
30777 return false;
30778 // TODO: We may also be able to support negative powers of 2 here.
30779 if (C->getAPIntValue().isPowerOf2()) {
30780 Pow2Constants.emplace_back(C->getAPIntValue());
30781 return true;
30782 }
30783 return false;
30784 };
30785
30786 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false,
30787 /*AllowTruncation=*/true)) {
30788 if (!VT.isVector())
30789 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
30790 // We need to create a build vector
30791 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
30792 return DAG.getSplat(VT, DL,
30793 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
30794 VT.getScalarType()));
30795 SmallVector<SDValue> Log2Ops;
30796 for (const APInt &Pow2 : Pow2Constants)
30797 Log2Ops.emplace_back(
30798 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
30799 return DAG.getBuildVector(VT, DL, Log2Ops);
30800 }
30801
30802 if (Depth >= DAG.MaxRecursionDepth)
30803 return SDValue();
30804
30805 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
30806 // Peek through zero extend. We can't peek through truncates since this
30807 // function is called on a shift amount. We must ensure that all of the bits
30808 // above the original shift amount are zeroed by this function.
30809 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
30810 ToCast = ToCast.getOperand(0);
30811 EVT CurVT = ToCast.getValueType();
30812 if (NewVT == CurVT)
30813 return ToCast;
30814
30815 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
30816 return DAG.getBitcast(NewVT, ToCast);
30817
30818 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
30819 };
30820
30821 // log2(X << Y) -> log2(X) + Y
30822 if (Op.getOpcode() == ISD::SHL) {
30823 // 1 << Y and X nuw/nsw << Y are all non-zero.
30824 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
30825 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
30826 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
30827 Depth + 1, AssumeNonZero))
30828 return DAG.getNode(ISD::ADD, DL, VT, LogX,
30829 CastToVT(VT, Op.getOperand(1)));
30830 }
30831
30832 // c ? X : Y -> c ? Log2(X) : Log2(Y)
30833 SDValue Cond, TVal, FVal;
30835 m_Value(FVal))))) {
30836 if (SDValue LogX =
30837 takeInexpensiveLog2(DAG, DL, VT, TVal, Depth + 1, AssumeNonZero))
30838 if (SDValue LogY =
30839 takeInexpensiveLog2(DAG, DL, VT, FVal, Depth + 1, AssumeNonZero))
30840 return DAG.getSelect(DL, VT, Cond, LogX, LogY);
30841 }
30842
30843 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
30844 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
30845 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
30846 Op.hasOneUse()) {
30847 // Use AssumeNonZero as false here. Otherwise we can hit case where
30848 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
30849 if (SDValue LogX =
30850 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
30851 /*AssumeNonZero*/ false))
30852 if (SDValue LogY =
30853 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
30854 /*AssumeNonZero*/ false))
30855 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
30856 }
30857
30858 return SDValue();
30859}
30860
30861/// Determines the LogBase2 value for a non-null input value using the
30862/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
30863SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
30864 bool KnownNonZero, bool InexpensiveOnly,
30865 std::optional<EVT> OutVT) {
30866 EVT VT = OutVT ? *OutVT : V.getValueType();
30867 SDValue InexpensiveLogBase2 =
30868 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
30869 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
30870 return InexpensiveLogBase2;
30871
30872 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
30873 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
30874 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
30875 return LogBase2;
30876}
30877
30878/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30879/// For the reciprocal, we need to find the zero of the function:
30880/// F(X) = 1/X - A [which has a zero at X = 1/A]
30881/// =>
30882/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
30883/// does not require additional intermediate precision]
30884/// For the last iteration, put numerator N into it to gain more precision:
30885/// Result = N X_i + X_i (N - N A X_i)
30886SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
30887 SDNodeFlags Flags) {
30888 if (LegalDAG)
30889 return SDValue();
30890
30891 // TODO: Handle extended types?
30892 EVT VT = Op.getValueType();
30893 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
30894 VT.getScalarType() != MVT::f64)
30895 return SDValue();
30896
30897 // If estimates are explicitly disabled for this function, we're done.
30898 MachineFunction &MF = DAG.getMachineFunction();
30899 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
30900 if (Enabled == TLI.ReciprocalEstimate::Disabled)
30901 return SDValue();
30902
30903 // Estimates may be explicitly enabled for this type with a custom number of
30904 // refinement steps.
30905 int Iterations = TLI.getDivRefinementSteps(VT, MF);
30906 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
30907 AddToWorklist(Est.getNode());
30908
30909 SDLoc DL(Op);
30910 if (Iterations) {
30911 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
30912
30913 // Newton iterations: Est = Est + Est (N - Arg * Est)
30914 // If this is the last iteration, also multiply by the numerator.
30915 for (int i = 0; i < Iterations; ++i) {
30916 SDValue MulEst = Est;
30917
30918 if (i == Iterations - 1) {
30919 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
30920 AddToWorklist(MulEst.getNode());
30921 }
30922
30923 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
30924 AddToWorklist(NewEst.getNode());
30925
30926 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
30927 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
30928 AddToWorklist(NewEst.getNode());
30929
30930 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
30931 AddToWorklist(NewEst.getNode());
30932
30933 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
30934 AddToWorklist(Est.getNode());
30935 }
30936 } else {
30937 // If no iterations are available, multiply with N.
30938 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
30939 AddToWorklist(Est.getNode());
30940 }
30941
30942 return Est;
30943 }
30944
30945 return SDValue();
30946}
30947
30948/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30949/// For the reciprocal sqrt, we need to find the zero of the function:
30950/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
30951/// =>
30952/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
30953/// As a result, we precompute A/2 prior to the iteration loop.
30954SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
30955 unsigned Iterations, bool Reciprocal) {
30956 EVT VT = Arg.getValueType();
30957 SDLoc DL(Arg);
30958 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
30959
30960 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
30961 // this entire sequence requires only one FP constant.
30962 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
30963 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
30964
30965 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
30966 for (unsigned i = 0; i < Iterations; ++i) {
30967 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
30968 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
30969 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
30970 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
30971 }
30972
30973 // If non-reciprocal square root is requested, multiply the result by Arg.
30974 if (!Reciprocal)
30975 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
30976
30977 return Est;
30978}
30979
30980/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30981/// For the reciprocal sqrt, we need to find the zero of the function:
30982/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
30983/// =>
30984/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
30985SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
30986 unsigned Iterations, bool Reciprocal) {
30987 EVT VT = Arg.getValueType();
30988 SDLoc DL(Arg);
30989 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
30990 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
30991
30992 // This routine must enter the loop below to work correctly
30993 // when (Reciprocal == false).
30994 assert(Iterations > 0);
30995
30996 // Newton iterations for reciprocal square root:
30997 // E = (E * -0.5) * ((A * E) * E + -3.0)
30998 for (unsigned i = 0; i < Iterations; ++i) {
30999 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
31000 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
31001 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
31002
31003 // When calculating a square root at the last iteration build:
31004 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
31005 // (notice a common subexpression)
31006 SDValue LHS;
31007 if (Reciprocal || (i + 1) < Iterations) {
31008 // RSQRT: LHS = (E * -0.5)
31009 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
31010 } else {
31011 // SQRT: LHS = (A * E) * -0.5
31012 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
31013 }
31014
31015 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
31016 }
31017
31018 return Est;
31019}
31020
31021/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
31022/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
31023/// Op can be zero.
31024SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal,
31025 SDNodeFlags Flags) {
31026 if (LegalDAG)
31027 return SDValue();
31028
31029 // TODO: Handle extended types?
31030 EVT VT = Op.getValueType();
31031 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
31032 VT.getScalarType() != MVT::f64)
31033 return SDValue();
31034
31035 // If estimates are explicitly disabled for this function, we're done.
31036 MachineFunction &MF = DAG.getMachineFunction();
31037 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
31038 if (Enabled == TLI.ReciprocalEstimate::Disabled)
31039 return SDValue();
31040
31041 // Estimates may be explicitly enabled for this type with a custom number of
31042 // refinement steps.
31043 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
31044
31045 bool UseOneConstNR = false;
31046 if (SDValue Est =
31047 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
31048 Reciprocal)) {
31049 AddToWorklist(Est.getNode());
31050
31051 if (Iterations > 0)
31052 Est = UseOneConstNR
31053 ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
31054 : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
31055 if (!Reciprocal) {
31056 SDLoc DL(Op);
31057 // Try the target specific test first.
31058 SDValue Test =
31059 TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT), Flags);
31060
31061 // The estimate is now completely wrong if the input was exactly 0.0 or
31062 // possibly a denormal. Force the answer to 0.0 or value provided by
31063 // target for those cases.
31064 Est = DAG.getSelect(DL, VT, Test,
31065 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
31066 }
31067 return Est;
31068 }
31069
31070 return SDValue();
31071}
31072
31073SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
31074 return buildSqrtEstimateImpl(Op, true, Flags);
31075}
31076
31077SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
31078 return buildSqrtEstimateImpl(Op, false, Flags);
31079}
31080
31081/// Return true if there is any possibility that the two addresses overlap.
31082bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
31083
31084 struct MemUseCharacteristics {
31085 bool IsVolatile;
31086 bool IsAtomic;
31088 int64_t Offset;
31089 LocationSize NumBytes;
31090 MachineMemOperand *MMO;
31091 };
31092
31093 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
31094 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
31095 int64_t Offset = 0;
31096 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
31097 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
31098 : (LSN->getAddressingMode() == ISD::PRE_DEC)
31099 ? -1 * C->getSExtValue()
31100 : 0;
31101 TypeSize Size = LSN->getMemoryVT().getStoreSize();
31102 return {LSN->isVolatile(), LSN->isAtomic(),
31103 LSN->getBasePtr(), Offset /*base offset*/,
31104 LocationSize::precise(Size), LSN->getMemOperand()};
31105 }
31106 if (const auto *LN = cast<LifetimeSDNode>(N)) {
31107 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
31108 return {false /*isVolatile*/,
31109 /*isAtomic*/ false,
31110 LN->getOperand(1),
31111 0,
31112 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
31113 (MachineMemOperand *)nullptr};
31114 }
31115 // Default.
31116 return {false /*isvolatile*/,
31117 /*isAtomic*/ false,
31118 SDValue(),
31119 (int64_t)0 /*offset*/,
31121 (MachineMemOperand *)nullptr};
31122 };
31123
31124 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
31125 MUC1 = getCharacteristics(Op1);
31126
31127 // If they are to the same address, then they must be aliases.
31128 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
31129 MUC0.Offset == MUC1.Offset)
31130 return true;
31131
31132 // If they are both volatile then they cannot be reordered.
31133 if (MUC0.IsVolatile && MUC1.IsVolatile)
31134 return true;
31135
31136 // Be conservative about atomics for the moment
31137 // TODO: This is way overconservative for unordered atomics (see D66309)
31138 if (MUC0.IsAtomic && MUC1.IsAtomic)
31139 return true;
31140
31141 if (MUC0.MMO && MUC1.MMO) {
31142 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
31143 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
31144 return false;
31145 }
31146
31147 // If NumBytes is scalable and offset is not 0, conservatively return may
31148 // alias
31149 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
31150 MUC0.Offset != 0) ||
31151 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
31152 MUC1.Offset != 0))
31153 return true;
31154 // Try to prove that there is aliasing, or that there is no aliasing. Either
31155 // way, we can return now. If nothing can be proved, proceed with more tests.
31156 bool IsAlias;
31157 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
31158 DAG, IsAlias))
31159 return IsAlias;
31160
31161 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
31162 // either are not known.
31163 if (!MUC0.MMO || !MUC1.MMO)
31164 return true;
31165
31166 // If one operation reads from invariant memory, and the other may store, they
31167 // cannot alias. These should really be checking the equivalent of mayWrite,
31168 // but it only matters for memory nodes other than load /store.
31169 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
31170 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
31171 return false;
31172
31173 // If we know required SrcValue1 and SrcValue2 have relatively large
31174 // alignment compared to the size and offset of the access, we may be able
31175 // to prove they do not alias. This check is conservative for now to catch
31176 // cases created by splitting vector types, it only works when the offsets are
31177 // multiples of the size of the data.
31178 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
31179 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
31180 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
31181 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
31182 LocationSize Size0 = MUC0.NumBytes;
31183 LocationSize Size1 = MUC1.NumBytes;
31184
31185 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
31186 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
31187 !Size1.isScalable() && Size0 == Size1 &&
31188 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
31189 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
31190 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
31191 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
31192 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
31193
31194 // There is no overlap between these relatively aligned accesses of
31195 // similar size. Return no alias.
31196 if ((OffAlign0 + static_cast<int64_t>(
31197 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
31198 (OffAlign1 + static_cast<int64_t>(
31199 Size1.getValue().getKnownMinValue())) <= OffAlign0)
31200 return false;
31201 }
31202
31205 : DAG.getSubtarget().useAA();
31206#ifndef NDEBUG
31207 if (CombinerAAOnlyFunc.getNumOccurrences() &&
31209 UseAA = false;
31210#endif
31211
31212 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
31213 Size0.hasValue() && Size1.hasValue() &&
31214 // Can't represent a scalable size + fixed offset in LocationSize
31215 (!Size0.isScalable() || SrcValOffset0 == 0) &&
31216 (!Size1.isScalable() || SrcValOffset1 == 0)) {
31217 // Use alias analysis information.
31218 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
31219 int64_t Overlap0 =
31220 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
31221 int64_t Overlap1 =
31222 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
31223 LocationSize Loc0 =
31224 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
31225 LocationSize Loc1 =
31226 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
31227 if (BatchAA->isNoAlias(
31228 MemoryLocation(MUC0.MMO->getValue(), Loc0,
31229 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
31230 MemoryLocation(MUC1.MMO->getValue(), Loc1,
31231 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
31232 return false;
31233 }
31234
31235 // Otherwise we have to assume they alias.
31236 return true;
31237}
31238
31239/// Walk up chain skipping non-aliasing memory nodes,
31240/// looking for aliasing nodes and adding them to the Aliases vector.
31241void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
31242 SmallVectorImpl<SDValue> &Aliases) {
31243 SmallVector<SDValue, 8> Chains; // List of chains to visit.
31244 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
31245
31246 // Get alias information for node.
31247 // TODO: relax aliasing for unordered atomics (see D66309)
31248 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
31249
31250 // Starting off.
31251 Chains.push_back(OriginalChain);
31252 unsigned Depth = 0;
31253
31254 // Attempt to improve chain by a single step
31255 auto ImproveChain = [&](SDValue &C) -> bool {
31256 switch (C.getOpcode()) {
31257 case ISD::EntryToken:
31258 // No need to mark EntryToken.
31259 C = SDValue();
31260 return true;
31261 case ISD::LOAD:
31262 case ISD::STORE: {
31263 // Get alias information for C.
31264 // TODO: Relax aliasing for unordered atomics (see D66309)
31265 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
31266 cast<LSBaseSDNode>(C.getNode())->isSimple();
31267 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
31268 // Look further up the chain.
31269 C = C.getOperand(0);
31270 return true;
31271 }
31272 // Alias, so stop here.
31273 return false;
31274 }
31275
31276 case ISD::CopyFromReg:
31277 // Always forward past CopyFromReg.
31278 C = C.getOperand(0);
31279 return true;
31280
31282 case ISD::LIFETIME_END: {
31283 // We can forward past any lifetime start/end that can be proven not to
31284 // alias the memory access.
31285 if (!mayAlias(N, C.getNode())) {
31286 // Look further up the chain.
31287 C = C.getOperand(0);
31288 return true;
31289 }
31290 return false;
31291 }
31292 default:
31293 return false;
31294 }
31295 };
31296
31297 // Look at each chain and determine if it is an alias. If so, add it to the
31298 // aliases list. If not, then continue up the chain looking for the next
31299 // candidate.
31300 while (!Chains.empty()) {
31301 SDValue Chain = Chains.pop_back_val();
31302
31303 // Don't bother if we've seen Chain before.
31304 if (!Visited.insert(Chain.getNode()).second)
31305 continue;
31306
31307 // For TokenFactor nodes, look at each operand and only continue up the
31308 // chain until we reach the depth limit.
31309 //
31310 // FIXME: The depth check could be made to return the last non-aliasing
31311 // chain we found before we hit a tokenfactor rather than the original
31312 // chain.
31313 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
31314 Aliases.clear();
31315 Aliases.push_back(OriginalChain);
31316 return;
31317 }
31318
31319 if (Chain.getOpcode() == ISD::TokenFactor) {
31320 // We have to check each of the operands of the token factor for "small"
31321 // token factors, so we queue them up. Adding the operands to the queue
31322 // (stack) in reverse order maintains the original order and increases the
31323 // likelihood that getNode will find a matching token factor (CSE.)
31324 if (Chain.getNumOperands() > 16) {
31325 Aliases.push_back(Chain);
31326 continue;
31327 }
31328 for (unsigned n = Chain.getNumOperands(); n;)
31329 Chains.push_back(Chain.getOperand(--n));
31330 ++Depth;
31331 continue;
31332 }
31333 // Everything else
31334 if (ImproveChain(Chain)) {
31335 // Updated Chain Found, Consider new chain if one exists.
31336 if (Chain.getNode())
31337 Chains.push_back(Chain);
31338 ++Depth;
31339 continue;
31340 }
31341 // No Improved Chain Possible, treat as Alias.
31342 Aliases.push_back(Chain);
31343 }
31344}
31345
31346/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
31347/// (aliasing node.)
31348SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
31349 if (OptLevel == CodeGenOptLevel::None)
31350 return OldChain;
31351
31352 // Ops for replacing token factor.
31354
31355 // Accumulate all the aliases to this node.
31356 GatherAllAliases(N, OldChain, Aliases);
31357
31358 // If no operands then chain to entry token.
31359 if (Aliases.empty())
31360 return DAG.getEntryNode();
31361
31362 // If a single operand then chain to it. We don't need to revisit it.
31363 if (Aliases.size() == 1)
31364 return Aliases[0];
31365
31366 // Construct a custom tailored token factor.
31367 return DAG.getTokenFactor(SDLoc(N), Aliases);
31368}
31369
31370// This function tries to collect a bunch of potentially interesting
31371// nodes to improve the chains of, all at once. This might seem
31372// redundant, as this function gets called when visiting every store
31373// node, so why not let the work be done on each store as it's visited?
31374//
31375// I believe this is mainly important because mergeConsecutiveStores
31376// is unable to deal with merging stores of different sizes, so unless
31377// we improve the chains of all the potential candidates up-front
31378// before running mergeConsecutiveStores, it might only see some of
31379// the nodes that will eventually be candidates, and then not be able
31380// to go from a partially-merged state to the desired final
31381// fully-merged state.
31382
31383bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
31384 SmallVector<StoreSDNode *, 8> ChainedStores;
31385 StoreSDNode *STChain = St;
31386 // Intervals records which offsets from BaseIndex have been covered. In
31387 // the common case, every store writes to the immediately previous address
31388 // space and thus merged with the previous interval at insertion time.
31389
31390 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
31391 IntervalMapHalfOpenInfo<int64_t>>;
31392 IMap::Allocator A;
31393 IMap Intervals(A);
31394
31395 // This holds the base pointer, index, and the offset in bytes from the base
31396 // pointer.
31397 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
31398
31399 // We must have a base and an offset.
31400 if (!BasePtr.getBase().getNode())
31401 return false;
31402
31403 // Do not handle stores to undef base pointers.
31404 if (BasePtr.getBase().isUndef())
31405 return false;
31406
31407 // Do not handle stores to opaque types
31408 if (St->getMemoryVT().isZeroSized())
31409 return false;
31410
31411 // BaseIndexOffset assumes that offsets are fixed-size, which
31412 // is not valid for scalable vectors where the offsets are
31413 // scaled by `vscale`, so bail out early.
31414 if (St->getMemoryVT().isScalableVT())
31415 return false;
31416
31417 // Add ST's interval.
31418 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
31419 std::monostate{});
31420
31421 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
31422 if (Chain->getMemoryVT().isScalableVector())
31423 return false;
31424
31425 // If the chain has more than one use, then we can't reorder the mem ops.
31426 if (!SDValue(Chain, 0)->hasOneUse())
31427 break;
31428 // TODO: Relax for unordered atomics (see D66309)
31429 if (!Chain->isSimple() || Chain->isIndexed())
31430 break;
31431
31432 // Find the base pointer and offset for this memory node.
31433 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
31434 // Check that the base pointer is the same as the original one.
31435 int64_t Offset;
31436 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
31437 break;
31438 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
31439 // Make sure we don't overlap with other intervals by checking the ones to
31440 // the left or right before inserting.
31441 auto I = Intervals.find(Offset);
31442 // If there's a next interval, we should end before it.
31443 if (I != Intervals.end() && I.start() < (Offset + Length))
31444 break;
31445 // If there's a previous interval, we should start after it.
31446 if (I != Intervals.begin() && (--I).stop() <= Offset)
31447 break;
31448 Intervals.insert(Offset, Offset + Length, std::monostate{});
31449
31450 ChainedStores.push_back(Chain);
31451 STChain = Chain;
31452 }
31453
31454 // If we didn't find a chained store, exit.
31455 if (ChainedStores.empty())
31456 return false;
31457
31458 // Improve all chained stores (St and ChainedStores members) starting from
31459 // where the store chain ended and return single TokenFactor.
31460 SDValue NewChain = STChain->getChain();
31462 for (unsigned I = ChainedStores.size(); I;) {
31463 StoreSDNode *S = ChainedStores[--I];
31464 SDValue BetterChain = FindBetterChain(S, NewChain);
31466 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
31467 TFOps.push_back(SDValue(S, 0));
31468 ChainedStores[I] = S;
31469 }
31470
31471 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
31472 SDValue BetterChain = FindBetterChain(St, NewChain);
31473 SDValue NewST;
31474 if (St->isTruncatingStore())
31475 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
31476 St->getBasePtr(), St->getMemoryVT(),
31477 St->getMemOperand());
31478 else
31479 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
31480 St->getBasePtr(), St->getMemOperand());
31481
31482 TFOps.push_back(NewST);
31483
31484 // If we improved every element of TFOps, then we've lost the dependence on
31485 // NewChain to successors of St and we need to add it back to TFOps. Do so at
31486 // the beginning to keep relative order consistent with FindBetterChains.
31487 auto hasImprovedChain = [&](SDValue ST) -> bool {
31488 return ST->getOperand(0) != NewChain;
31489 };
31490 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
31491 if (AddNewChain)
31492 TFOps.insert(TFOps.begin(), NewChain);
31493
31494 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
31495 CombineTo(St, TF);
31496
31497 // Add TF and its operands to the worklist.
31498 AddToWorklist(TF.getNode());
31499 for (const SDValue &Op : TF->ops())
31500 AddToWorklist(Op.getNode());
31501 AddToWorklist(STChain);
31502 return true;
31503}
31504
31505bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
31506 if (OptLevel == CodeGenOptLevel::None)
31507 return false;
31508
31509 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
31510
31511 // We must have a base and an offset.
31512 if (!BasePtr.getBase().getNode())
31513 return false;
31514
31515 // Do not handle stores to undef base pointers.
31516 if (BasePtr.getBase().isUndef())
31517 return false;
31518
31519 // Directly improve a chain of disjoint stores starting at St.
31520 if (parallelizeChainedStores(St))
31521 return true;
31522
31523 // Improve St's Chain..
31524 SDValue BetterChain = FindBetterChain(St, St->getChain());
31525 if (St->getChain() != BetterChain) {
31526 replaceStoreChain(St, BetterChain);
31527 return true;
31528 }
31529 return false;
31530}
31531
31532/// This is the entry point for the file.
31534 CodeGenOptLevel OptLevel) {
31535 /// This is the main entry point to this class.
31536 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
31537}
return SDValue()
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static unsigned getMinMaxOpcodeForClamp(bool IsMin, SDValue Operand1, SDValue Operand2, SelectionDAG &DAG, const TargetLowering &TLI)
Returns an appropriate FP min/max opcode for clamping operations.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static unsigned getMinMaxOpcodeForCompareFold(SDValue Operand1, SDValue Operand2, bool SetCCNoNaNs, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const SelectionDAG &DAG)
Return true if divmod libcall is available.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineOrOfSetCCToUSUBOCarry(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< bool > EnableTopologicalSorting("combiner-topological-sorting", cl::Hidden, cl::init(false), cl::desc("DAG combiner nodes consistently processed in topological order"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
ByteProvider< SDNode * > SDByteProvider
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
static MaybeAlign getAlign(Value *Ptr)
iv Induction Variable Users
Definition IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:222
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:218
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:228
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1179
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1267
bool isNegative() const
Definition APFloat.h:1538
bool isNormal() const
Definition APFloat.h:1542
bool isDenormal() const
Definition APFloat.h:1539
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1521
const fltSemantics & getSemantics() const
Definition APFloat.h:1546
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
Definition APFloat.h:1375
bool isNaN() const
Definition APFloat.h:1536
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
bool isSignaling() const
Definition APFloat.h:1540
APInt bitcastToAPInt() const
Definition APFloat.h:1430
bool isLargest() const
Definition APFloat.h:1554
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1391
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2023
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition APInt.cpp:645
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition APInt.h:467
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
int32_t exactLogBase2() const
Definition APInt.h:1806
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1173
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1987
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1475
unsigned logBase2() const
Definition APInt.h:1784
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1788
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2012
bool isMask(unsigned numBits) const
Definition APInt.h:489
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition APInt.h:433
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition APInt.h:1244
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< SDNode * > Val, int64_t ByteOffset, int64_t VectorOffset)
Combiner implementation.
Definition Combiner.h:33
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI APInt getSignedMin() const
Return the smallest signed value contained in the ConstantRange.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
LLVM_ABI APInt getSignedMax() const
Return the largest signed value contained in the ConstantRange.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
DWARF expression.
static LLVM_ABI ExtOps getExtOps(unsigned FromSize, unsigned ToSize, bool Signed)
Returns the ops for a zero- or sign-extension in a DIExpression.
static LLVM_ABI DIExpression * appendOpsToArg(const DIExpression *Expr, ArrayRef< uint64_t > Ops, unsigned ArgNo, bool StackValue=false)
Create a copy of Expr by appending the given list of Ops to each instance of the operand DW_OP_LLVM_a...
static LLVM_ABI const DIExpression * convertToVariadicExpression(const DIExpression *Expr)
If Expr is a non-variadic expression (i.e.
static LLVM_ABI DIExpression * appendExt(const DIExpression *Expr, unsigned FromSize, unsigned ToSize, bool Signed)
Append a zero- or sign-extension to Expr.
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
bool isBigEndian() const
Definition DataLayout.h:218
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const_iterator find(KeyT x) const
find - Return an iterator pointing to the first interval ending at or after x, or end().
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:415
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:408
Holds the information for a single machine location through SDISel; either an SDNode,...
static SDDbgOperand fromNode(SDNode *Node, unsigned ResNo)
@ SDNODE
Value is the result of an expression.
Holds the information from a dbg_value node through SDISel.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI ConstantRange computeConstantRange(SDValue Op, bool ForSigned, unsigned Depth=0) const
Determine the possible constant range of an integer or vector of integers.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI bool canIgnoreSignBitOfZero(const SDUse &Use) const
Check if a use of a float value is insensitive to signed zeros.
LLVM_ABI bool SignBitIsZeroFP(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero, for a floating-point value.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
bool isGuaranteedNotToBePoison(SDValue Op, unsigned Depth=0) const
Return true if this function can prove that Op is never poison.
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI void AddDbgValue(SDDbgValue *DB, bool isParameter)
Add a dbg_value SDNode.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
LLVM_ABI SDDbgValue * getDbgValueList(DIVariable *Var, DIExpression *Expr, ArrayRef< SDDbgOperand > Locs, ArrayRef< SDNode * > Dependencies, bool IsIndirect, const DebugLoc &DL, unsigned O, bool IsVariadic)
Creates a SDDbgValue node from a list of locations.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
SDValue getPartialReduceMLS(unsigned Opc, const SDLoc &DL, SDValue Acc, SDValue LHS, SDValue RHS)
Get an expression that implements a partial multiply-subtract reduction.
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI KnownFPClass computeKnownFPClass(SDValue Op, FPClassTest InterestedClasses, unsigned Depth=0) const
Determine floating-point class information about Op.
LLVM_ABI bool isIdentityElement(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo, unsigned Depth=0) const
Returns true if V is an identity element of Opc with Flags.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
const LibcallLoweringInfo & getLibcalls() const
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
ArrayRef< SDDbgValue * > GetDbgValues(const SDNode *SD) const
Get the debug values which reference the given SDNode.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI unsigned AssignTopologicalOrder()
Topological-sort the AllNodes list and a assign a unique node id for each node in the DAG based on th...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
bool empty() const
Definition SmallSet.h:169
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
unsigned getMaximumLegalStoreInBits() const
Return maximum known-legal store size, which can be guaranteed for scalable vectors.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N) const
Return true if it is profitable to fold a pair of shifts into a mask.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool canTransformPtrArithOutOfBounds(const Function &F, EVT PtrVT) const
True if the target allows transformations of in-bounds pointer arithmetic that cause out-of-bounds in...
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, bool LegalOnly) const
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isTruncStoreLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace) const
Return true if the specified store with truncation is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
bool isLoadLegalOrCustom(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal or custom on this target.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual bool useTopologicalSorting() const
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2277
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2282
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2287
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2292
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ POISON
POISON - A poison node.
Definition ISDOpcodes.h:236
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ COND_LOOP
COND_LOOP is a conditional branch to self, used for implementing efficient conditional traps.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:788
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ TRUNCATE_SSAT_U
Definition ISDOpcodes.h:873
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:827
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:635
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:691
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ CTLS
Count leading redundant sign bits.
Definition ISDOpcodes.h:792
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ AssertNoFPClass
AssertNoFPClass - These nodes record if a register contains a float value that is known to be not som...
Definition ISDOpcodes.h:78
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ TargetConstantFP
Definition ISDOpcodes.h:180
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ TargetFrameIndex
Definition ISDOpcodes.h:187
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ LIFETIME_START
This corresponds to the llvm.lifetime.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ GET_FPENV_MEM
Gets the current floating-point environment.
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:285
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:787
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:699
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Experimental vector histogram intrinsic Operands: Input Chain, Inc, Mask, Base, Index,...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ BRCOND
BRCOND - Conditional branch.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ SET_FPENV_MEM
Sets the current floating point environment.
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:871
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:875
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:338
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isIndexTypeSigned(MemIndexType IndexType)
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Ctlz(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
LLVM_ABI Libcall getCBRT(EVT RetVT)
getCBRT - Return the CBRT_* value for the given types, or UNKNOWN_LIBCALL if there is none.
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
Opcode_match m_SpecificOpc(unsigned Opcode)
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, true > m_Clmul(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
ReassociatableOpc_match< PatternTs... > m_ReassociatableAdd(const PatternTs &...Patterns)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
auto m_SMaxLike(const LHS &L, const RHS &R)
SpecificNeg_match m_SpecificNeg(SDValue V)
Match a negation of a specific value V, either as sub(0, V) or as constant(s) that are the negation o...
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
ReassociatableOpc_match< PatternTs... > m_ReassociatableAnd(const PatternTs &...Patterns)
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
Not(const Pred &P) -> Not< Pred >
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:557
@ Length
Definition DWP.cpp:557
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
SDValue peekThroughFreeze(SDValue V)
Return the non-frozen source operand of V if it exists.
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1565
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2142
constexpr NextUseDistance min(NextUseDistance A, NextUseDistance B)
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:359
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1631
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1547
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1503
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:361
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
constexpr T AbsoluteDifference(U X, V Y)
Subtract two unsigned integers, X and Y, of type T and return the absolute value of the result.
Definition MathExtras.h:600
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2165
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool isZeroOrZeroSplatFP(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant (+/-)0.0 floating-point value or a splatted vector thereof (wi...
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:358
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition ValueTypes.h:287
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:486
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
EVT changeVectorElementCount(LLVMContext &Context, ElementCount EC) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element coun...
Definition ValueTypes.h:109
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:195
bool isFixedLengthVector() const
Definition ValueTypes.h:189
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:427
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:264
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:256
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:276
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition ValueTypes.h:140
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:78
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:310
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:81
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:58
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...