LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
228 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
229 "Invalid instruction");
230 bool IsUndef = true;
231 MachineInstr *Undef = nullptr;
232
233 // Walk over all the operands of concat vectors and check if they are
234 // build_vector themselves or undef.
235 // Then collect their operands in Ops.
236 for (const MachineOperand &MO : MI.uses()) {
237 Register Reg = MO.getReg();
238 MachineInstr *Def = MRI.getVRegDef(Reg);
239 assert(Def && "Operand not defined");
240 if (!MRI.hasOneNonDBGUse(Reg))
241 return false;
242 switch (Def->getOpcode()) {
243 case TargetOpcode::G_BUILD_VECTOR:
244 IsUndef = false;
245 // Remember the operands of the build_vector to fold
246 // them into the yet-to-build flattened concat vectors.
247 for (const MachineOperand &BuildVecMO : Def->uses())
248 Ops.push_back(BuildVecMO.getReg());
249 break;
250 case TargetOpcode::G_IMPLICIT_DEF: {
251 LLT OpType = MRI.getType(Reg);
252 // Keep one undef value for all the undef operands.
253 if (!Undef) {
254 Builder.setInsertPt(*MI.getParent(), MI);
255 Undef = Builder.buildUndef(OpType.getScalarType());
256 }
257 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
258 OpType.getScalarType() &&
259 "All undefs should have the same type");
260 // Break the undef vector in as many scalar elements as needed
261 // for the flattening.
262 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
263 EltIdx != EltEnd; ++EltIdx)
264 Ops.push_back(Undef->getOperand(0).getReg());
265 break;
266 }
267 default:
268 return false;
269 }
270 }
271
272 // Check if the combine is illegal
273 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
275 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
276 return false;
277 }
278
279 if (IsUndef)
280 Ops.clear();
281
282 return true;
283}
286 // We determined that the concat_vectors can be flatten.
287 // Generate the flattened build_vector.
288 Register DstReg = MI.getOperand(0).getReg();
289 Builder.setInsertPt(*MI.getParent(), MI);
290 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
291
292 // Note: IsUndef is sort of redundant. We could have determine it by
293 // checking that at all Ops are undef. Alternatively, we could have
294 // generate a build_vector of undefs and rely on another combine to
295 // clean that up. For now, given we already gather this information
296 // in matchCombineConcatVectors, just save compile time and issue the
297 // right thing.
298 if (Ops.empty())
299 Builder.buildUndef(NewDstReg);
300 else
301 Builder.buildBuildVector(NewDstReg, Ops);
302 MI.eraseFromParent();
303 replaceRegWith(MRI, DstReg, NewDstReg);
304}
305
308 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
309 auto ConcatMI1 =
310 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
311 auto ConcatMI2 =
312 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
313 if (!ConcatMI1 || !ConcatMI2)
314 return false;
315
316 // Check that the sources of the Concat instructions have the same type
317 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
318 MRI.getType(ConcatMI2->getSourceReg(0)))
319 return false;
320
321 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
322 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
323 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
324 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
325 // Check if the index takes a whole source register from G_CONCAT_VECTORS
326 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
327 if (Mask[i] == -1) {
328 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
329 if (i + j >= Mask.size())
330 return false;
331 if (Mask[i + j] != -1)
332 return false;
333 }
335 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
336 return false;
337 Ops.push_back(0);
338 } else if (Mask[i] % ConcatSrcNumElt == 0) {
339 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
340 if (i + j >= Mask.size())
341 return false;
342 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
343 return false;
344 }
345 // Retrieve the source register from its respective G_CONCAT_VECTORS
346 // instruction
347 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
348 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
349 } else {
350 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
351 ConcatMI1->getNumSources()));
352 }
353 } else {
354 return false;
355 }
356 }
357
359 {TargetOpcode::G_CONCAT_VECTORS,
360 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
361 return false;
362
363 return !Ops.empty();
364}
365
368 LLT SrcTy = MRI.getType(Ops[0]);
369 Register UndefReg = 0;
370
371 for (unsigned i = 0; i < Ops.size(); i++) {
372 if (Ops[i] == 0) {
373 if (UndefReg == 0)
374 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
375 Ops[i] = UndefReg;
376 }
377 }
378
379 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
380 MI.eraseFromParent();
381}
382
385 if (matchCombineShuffleVector(MI, Ops)) {
387 return true;
388 }
389 return false;
390}
391
394 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
395 "Invalid instruction kind");
396 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
397 Register Src1 = MI.getOperand(1).getReg();
398 LLT SrcType = MRI.getType(Src1);
399 // As bizarre as it may look, shuffle vector can actually produce
400 // scalar! This is because at the IR level a <1 x ty> shuffle
401 // vector is perfectly valid.
402 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
403 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
404
405 // If the resulting vector is smaller than the size of the source
406 // vectors being concatenated, we won't be able to replace the
407 // shuffle vector into a concat_vectors.
408 //
409 // Note: We may still be able to produce a concat_vectors fed by
410 // extract_vector_elt and so on. It is less clear that would
411 // be better though, so don't bother for now.
412 //
413 // If the destination is a scalar, the size of the sources doesn't
414 // matter. we will lower the shuffle to a plain copy. This will
415 // work only if the source and destination have the same size. But
416 // that's covered by the next condition.
417 //
418 // TODO: If the size between the source and destination don't match
419 // we could still emit an extract vector element in that case.
420 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
421 return false;
422
423 // Check that the shuffle mask can be broken evenly between the
424 // different sources.
425 if (DstNumElts % SrcNumElts != 0)
426 return false;
427
428 // Mask length is a multiple of the source vector length.
429 // Check if the shuffle is some kind of concatenation of the input
430 // vectors.
431 unsigned NumConcat = DstNumElts / SrcNumElts;
432 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
433 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
434 for (unsigned i = 0; i != DstNumElts; ++i) {
435 int Idx = Mask[i];
436 // Undef value.
437 if (Idx < 0)
438 continue;
439 // Ensure the indices in each SrcType sized piece are sequential and that
440 // the same source is used for the whole piece.
441 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
442 (ConcatSrcs[i / SrcNumElts] >= 0 &&
443 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
444 return false;
445 // Remember which source this index came from.
446 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
447 }
448
449 // The shuffle is concatenating multiple vectors together.
450 // Collect the different operands for that.
451 Register UndefReg;
452 Register Src2 = MI.getOperand(2).getReg();
453 for (auto Src : ConcatSrcs) {
454 if (Src < 0) {
455 if (!UndefReg) {
456 Builder.setInsertPt(*MI.getParent(), MI);
457 UndefReg = Builder.buildUndef(SrcType).getReg(0);
458 }
459 Ops.push_back(UndefReg);
460 } else if (Src == 0)
461 Ops.push_back(Src1);
462 else
463 Ops.push_back(Src2);
464 }
465 return true;
466}
467
469 const ArrayRef<Register> Ops) {
470 Register DstReg = MI.getOperand(0).getReg();
471 Builder.setInsertPt(*MI.getParent(), MI);
472 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
473
474 if (Ops.size() == 1)
475 Builder.buildCopy(NewDstReg, Ops[0]);
476 else
477 Builder.buildMergeLikeInstr(NewDstReg, Ops);
478
479 MI.eraseFromParent();
480 replaceRegWith(MRI, DstReg, NewDstReg);
481}
482
484 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
485 "Invalid instruction kind");
486
487 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
488 return Mask.size() == 1;
489}
490
492 Register DstReg = MI.getOperand(0).getReg();
493 Builder.setInsertPt(*MI.getParent(), MI);
494
495 int I = MI.getOperand(3).getShuffleMask()[0];
496 Register Src1 = MI.getOperand(1).getReg();
497 LLT Src1Ty = MRI.getType(Src1);
498 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
499 Register SrcReg;
500 if (I >= Src1NumElts) {
501 SrcReg = MI.getOperand(2).getReg();
502 I -= Src1NumElts;
503 } else if (I >= 0)
504 SrcReg = Src1;
505
506 if (I < 0)
507 Builder.buildUndef(DstReg);
508 else if (!MRI.getType(SrcReg).isVector())
509 Builder.buildCopy(DstReg, SrcReg);
510 else
512
513 MI.eraseFromParent();
514}
515
516namespace {
517
518/// Select a preference between two uses. CurrentUse is the current preference
519/// while *ForCandidate is attributes of the candidate under consideration.
520PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
521 PreferredTuple &CurrentUse,
522 const LLT TyForCandidate,
523 unsigned OpcodeForCandidate,
524 MachineInstr *MIForCandidate) {
525 if (!CurrentUse.Ty.isValid()) {
526 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
527 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
528 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
529 return CurrentUse;
530 }
531
532 // We permit the extend to hoist through basic blocks but this is only
533 // sensible if the target has extending loads. If you end up lowering back
534 // into a load and extend during the legalizer then the end result is
535 // hoisting the extend up to the load.
536
537 // Prefer defined extensions to undefined extensions as these are more
538 // likely to reduce the number of instructions.
539 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
540 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
541 return CurrentUse;
542 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
543 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
544 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
545
546 // Prefer sign extensions to zero extensions as sign-extensions tend to be
547 // more expensive. Don't do this if the load is already a zero-extend load
548 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
549 // later.
550 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
551 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
552 OpcodeForCandidate == TargetOpcode::G_ZEXT)
553 return CurrentUse;
554 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
555 OpcodeForCandidate == TargetOpcode::G_SEXT)
556 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
557 }
558
559 // This is potentially target specific. We've chosen the largest type
560 // because G_TRUNC is usually free. One potential catch with this is that
561 // some targets have a reduced number of larger registers than smaller
562 // registers and this choice potentially increases the live-range for the
563 // larger value.
564 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
565 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
566 }
567 return CurrentUse;
568}
569
570/// Find a suitable place to insert some instructions and insert them. This
571/// function accounts for special cases like inserting before a PHI node.
572/// The current strategy for inserting before PHI's is to duplicate the
573/// instructions for each predecessor. However, while that's ok for G_TRUNC
574/// on most targets since it generally requires no code, other targets/cases may
575/// want to try harder to find a dominating block.
576static void InsertInsnsWithoutSideEffectsBeforeUse(
579 MachineOperand &UseMO)>
580 Inserter) {
581 MachineInstr &UseMI = *UseMO.getParent();
582
583 MachineBasicBlock *InsertBB = UseMI.getParent();
584
585 // If the use is a PHI then we want the predecessor block instead.
586 if (UseMI.isPHI()) {
587 MachineOperand *PredBB = std::next(&UseMO);
588 InsertBB = PredBB->getMBB();
589 }
590
591 // If the block is the same block as the def then we want to insert just after
592 // the def instead of at the start of the block.
593 if (InsertBB == DefMI.getParent()) {
595 Inserter(InsertBB, std::next(InsertPt), UseMO);
596 return;
597 }
598
599 // Otherwise we want the start of the BB
600 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
601}
602} // end anonymous namespace
603
605 PreferredTuple Preferred;
606 if (matchCombineExtendingLoads(MI, Preferred)) {
607 applyCombineExtendingLoads(MI, Preferred);
608 return true;
609 }
610 return false;
611}
612
613static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
614 unsigned CandidateLoadOpc;
615 switch (ExtOpc) {
616 case TargetOpcode::G_ANYEXT:
617 CandidateLoadOpc = TargetOpcode::G_LOAD;
618 break;
619 case TargetOpcode::G_SEXT:
620 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
621 break;
622 case TargetOpcode::G_ZEXT:
623 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
624 break;
625 default:
626 llvm_unreachable("Unexpected extend opc");
627 }
628 return CandidateLoadOpc;
629}
630
632 PreferredTuple &Preferred) {
633 // We match the loads and follow the uses to the extend instead of matching
634 // the extends and following the def to the load. This is because the load
635 // must remain in the same position for correctness (unless we also add code
636 // to find a safe place to sink it) whereas the extend is freely movable.
637 // It also prevents us from duplicating the load for the volatile case or just
638 // for performance.
639 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
640 if (!LoadMI)
641 return false;
642
643 Register LoadReg = LoadMI->getDstReg();
644
645 LLT LoadValueTy = MRI.getType(LoadReg);
646 if (!LoadValueTy.isScalar())
647 return false;
648
649 // Most architectures are going to legalize <s8 loads into at least a 1 byte
650 // load, and the MMOs can only describe memory accesses in multiples of bytes.
651 // If we try to perform extload combining on those, we can end up with
652 // %a(s8) = extload %ptr (load 1 byte from %ptr)
653 // ... which is an illegal extload instruction.
654 if (LoadValueTy.getSizeInBits() < 8)
655 return false;
656
657 // For non power-of-2 types, they will very likely be legalized into multiple
658 // loads. Don't bother trying to match them into extending loads.
659 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
660 return false;
661
662 // Find the preferred type aside from the any-extends (unless it's the only
663 // one) and non-extending ops. We'll emit an extending load to that type and
664 // and emit a variant of (extend (trunc X)) for the others according to the
665 // relative type sizes. At the same time, pick an extend to use based on the
666 // extend involved in the chosen type.
667 unsigned PreferredOpcode =
668 isa<GLoad>(&MI)
669 ? TargetOpcode::G_ANYEXT
670 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
671 Preferred = {LLT(), PreferredOpcode, nullptr};
672 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
673 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
674 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
675 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
676 const auto &MMO = LoadMI->getMMO();
677 // Don't do anything for atomics.
678 if (MMO.isAtomic())
679 continue;
680 // Check for legality.
681 if (!isPreLegalize()) {
682 LegalityQuery::MemDesc MMDesc(MMO);
683 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
684 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
685 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
686 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
687 .Action != LegalizeActions::Legal)
688 continue;
689 }
690 Preferred = ChoosePreferredUse(MI, Preferred,
691 MRI.getType(UseMI.getOperand(0).getReg()),
692 UseMI.getOpcode(), &UseMI);
693 }
694 }
695
696 // There were no extends
697 if (!Preferred.MI)
698 return false;
699 // It should be impossible to chose an extend without selecting a different
700 // type since by definition the result of an extend is larger.
701 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
702
703 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
704 return true;
705}
706
708 PreferredTuple &Preferred) {
709 // Rewrite the load to the chosen extending load.
710 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
711
712 // Inserter to insert a truncate back to the original type at a given point
713 // with some basic CSE to limit truncate duplication to one per BB.
715 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
716 MachineBasicBlock::iterator InsertBefore,
717 MachineOperand &UseMO) {
718 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
719 if (PreviouslyEmitted) {
721 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
723 return;
724 }
725
726 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
727 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
728 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
729 EmittedInsns[InsertIntoBB] = NewMI;
730 replaceRegOpWith(MRI, UseMO, NewDstReg);
731 };
732
734 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
735 MI.setDesc(Builder.getTII().get(LoadOpc));
736
737 // Rewrite all the uses to fix up the types.
738 auto &LoadValue = MI.getOperand(0);
740 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
741 Uses.push_back(&UseMO);
742
743 for (auto *UseMO : Uses) {
744 MachineInstr *UseMI = UseMO->getParent();
745
746 // If the extend is compatible with the preferred extend then we should fix
747 // up the type and extend so that it uses the preferred use.
748 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
749 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
750 Register UseDstReg = UseMI->getOperand(0).getReg();
751 MachineOperand &UseSrcMO = UseMI->getOperand(1);
752 const LLT UseDstTy = MRI.getType(UseDstReg);
753 if (UseDstReg != ChosenDstReg) {
754 if (Preferred.Ty == UseDstTy) {
755 // If the use has the same type as the preferred use, then merge
756 // the vregs and erase the extend. For example:
757 // %1:_(s8) = G_LOAD ...
758 // %2:_(s32) = G_SEXT %1(s8)
759 // %3:_(s32) = G_ANYEXT %1(s8)
760 // ... = ... %3(s32)
761 // rewrites to:
762 // %2:_(s32) = G_SEXTLOAD ...
763 // ... = ... %2(s32)
764 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
766 UseMO->getParent()->eraseFromParent();
767 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
768 // If the preferred size is smaller, then keep the extend but extend
769 // from the result of the extending load. For example:
770 // %1:_(s8) = G_LOAD ...
771 // %2:_(s32) = G_SEXT %1(s8)
772 // %3:_(s64) = G_ANYEXT %1(s8)
773 // ... = ... %3(s64)
774 /// rewrites to:
775 // %2:_(s32) = G_SEXTLOAD ...
776 // %3:_(s64) = G_ANYEXT %2:_(s32)
777 // ... = ... %3(s64)
778 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
779 } else {
780 // If the preferred size is large, then insert a truncate. For
781 // example:
782 // %1:_(s8) = G_LOAD ...
783 // %2:_(s64) = G_SEXT %1(s8)
784 // %3:_(s32) = G_ZEXT %1(s8)
785 // ... = ... %3(s32)
786 /// rewrites to:
787 // %2:_(s64) = G_SEXTLOAD ...
788 // %4:_(s8) = G_TRUNC %2:_(s32)
789 // %3:_(s64) = G_ZEXT %2:_(s8)
790 // ... = ... %3(s64)
791 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
792 InsertTruncAt);
793 }
794 continue;
795 }
796 // The use is (one of) the uses of the preferred use we chose earlier.
797 // We're going to update the load to def this value later so just erase
798 // the old extend.
800 UseMO->getParent()->eraseFromParent();
801 continue;
802 }
803
804 // The use isn't an extend. Truncate back to the type we originally loaded.
805 // This is free on many targets.
806 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
807 }
808
809 MI.getOperand(0).setReg(ChosenDstReg);
811}
812
814 BuildFnTy &MatchInfo) {
815 assert(MI.getOpcode() == TargetOpcode::G_AND);
816
817 // If we have the following code:
818 // %mask = G_CONSTANT 255
819 // %ld = G_LOAD %ptr, (load s16)
820 // %and = G_AND %ld, %mask
821 //
822 // Try to fold it into
823 // %ld = G_ZEXTLOAD %ptr, (load s8)
824
825 Register Dst = MI.getOperand(0).getReg();
826 if (MRI.getType(Dst).isVector())
827 return false;
828
829 auto MaybeMask =
830 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
831 if (!MaybeMask)
832 return false;
833
834 APInt MaskVal = MaybeMask->Value;
835
836 if (!MaskVal.isMask())
837 return false;
838
839 Register SrcReg = MI.getOperand(1).getReg();
840 // Don't use getOpcodeDef() here since intermediate instructions may have
841 // multiple users.
842 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
843 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
844 return false;
845
846 Register LoadReg = LoadMI->getDstReg();
847 LLT RegTy = MRI.getType(LoadReg);
848 Register PtrReg = LoadMI->getPointerReg();
849 unsigned RegSize = RegTy.getSizeInBits();
850 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
851 unsigned MaskSizeBits = MaskVal.countr_one();
852
853 // The mask may not be larger than the in-memory type, as it might cover sign
854 // extended bits
855 if (MaskSizeBits > LoadSizeBits.getValue())
856 return false;
857
858 // If the mask covers the whole destination register, there's nothing to
859 // extend
860 if (MaskSizeBits >= RegSize)
861 return false;
862
863 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
864 // at least byte loads. Avoid creating such loads here
865 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
866 return false;
867
868 const MachineMemOperand &MMO = LoadMI->getMMO();
869 LegalityQuery::MemDesc MemDesc(MMO);
870
871 // Don't modify the memory access size if this is atomic/volatile, but we can
872 // still adjust the opcode to indicate the high bit behavior.
873 if (LoadMI->isSimple())
874 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
875 else if (LoadSizeBits.getValue() > MaskSizeBits ||
876 LoadSizeBits.getValue() == RegSize)
877 return false;
878
879 // TODO: Could check if it's legal with the reduced or original memory size.
881 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
882 return false;
883
884 MatchInfo = [=](MachineIRBuilder &B) {
885 B.setInstrAndDebugLoc(*LoadMI);
886 auto &MF = B.getMF();
887 auto PtrInfo = MMO.getPointerInfo();
888 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
889 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
890 LoadMI->eraseFromParent();
891 };
892 return true;
893}
894
896 const MachineInstr &UseMI) {
897 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
898 "shouldn't consider debug uses");
899 assert(DefMI.getParent() == UseMI.getParent());
900 if (&DefMI == &UseMI)
901 return true;
902 const MachineBasicBlock &MBB = *DefMI.getParent();
903 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
904 return &MI == &DefMI || &MI == &UseMI;
905 });
906 if (DefOrUse == MBB.end())
907 llvm_unreachable("Block must contain both DefMI and UseMI!");
908 return &*DefOrUse == &DefMI;
909}
910
912 const MachineInstr &UseMI) {
913 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
914 "shouldn't consider debug uses");
915 if (MDT)
916 return MDT->dominates(&DefMI, &UseMI);
917 else if (DefMI.getParent() != UseMI.getParent())
918 return false;
919
920 return isPredecessor(DefMI, UseMI);
921}
922
924 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
925 Register SrcReg = MI.getOperand(1).getReg();
926 Register LoadUser = SrcReg;
927
928 if (MRI.getType(SrcReg).isVector())
929 return false;
930
931 Register TruncSrc;
932 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
933 LoadUser = TruncSrc;
934
935 uint64_t SizeInBits = MI.getOperand(2).getImm();
936 // If the source is a G_SEXTLOAD from the same bit width, then we don't
937 // need any extend at all, just a truncate.
938 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
939 // If truncating more than the original extended value, abort.
940 auto LoadSizeBits = LoadMI->getMemSizeInBits();
941 if (TruncSrc &&
942 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
943 return false;
944 if (LoadSizeBits == SizeInBits)
945 return true;
946 }
947 return false;
948}
949
951 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
952 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
953 MI.eraseFromParent();
954}
955
957 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
958 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
959
960 Register DstReg = MI.getOperand(0).getReg();
961 LLT RegTy = MRI.getType(DstReg);
962
963 // Only supports scalars for now.
964 if (RegTy.isVector())
965 return false;
966
967 Register SrcReg = MI.getOperand(1).getReg();
968 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
969 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
970 return false;
971
972 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
973
974 // If the sign extend extends from a narrower width than the load's width,
975 // then we can narrow the load width when we combine to a G_SEXTLOAD.
976 // Avoid widening the load at all.
977 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
978
979 // Don't generate G_SEXTLOADs with a < 1 byte width.
980 if (NewSizeBits < 8)
981 return false;
982 // Don't bother creating a non-power-2 sextload, it will likely be broken up
983 // anyway for most targets.
984 if (!isPowerOf2_32(NewSizeBits))
985 return false;
986
987 const MachineMemOperand &MMO = LoadDef->getMMO();
988 LegalityQuery::MemDesc MMDesc(MMO);
989
990 // Don't modify the memory access size if this is atomic/volatile, but we can
991 // still adjust the opcode to indicate the high bit behavior.
992 if (LoadDef->isSimple())
993 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
994 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
995 return false;
996
997 // TODO: Could check if it's legal with the reduced or original memory size.
998 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
999 {MRI.getType(LoadDef->getDstReg()),
1000 MRI.getType(LoadDef->getPointerReg())},
1001 {MMDesc}}))
1002 return false;
1003
1004 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1005 return true;
1006}
1007
1009 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1010 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1011 Register LoadReg;
1012 unsigned ScalarSizeBits;
1013 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1014 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1015
1016 // If we have the following:
1017 // %ld = G_LOAD %ptr, (load 2)
1018 // %ext = G_SEXT_INREG %ld, 8
1019 // ==>
1020 // %ld = G_SEXTLOAD %ptr (load 1)
1021
1022 auto &MMO = LoadDef->getMMO();
1023 Builder.setInstrAndDebugLoc(*LoadDef);
1024 auto &MF = Builder.getMF();
1025 auto PtrInfo = MMO.getPointerInfo();
1026 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1027 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1028 LoadDef->getPointerReg(), *NewMMO);
1029 MI.eraseFromParent();
1030}
1031
1033 if (Ty.isVector())
1035 Ty.getNumElements());
1036 return IntegerType::get(C, Ty.getSizeInBits());
1037}
1038
1039/// Return true if 'MI' is a load or a store that may be fold it's address
1040/// operand into the load / store addressing mode.
1044 auto *MF = MI->getMF();
1045 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1046 if (!Addr)
1047 return false;
1048
1049 AM.HasBaseReg = true;
1050 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1051 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1052 else
1053 AM.Scale = 1; // [reg +/- reg]
1054
1055 return TLI.isLegalAddressingMode(
1056 MF->getDataLayout(), AM,
1057 getTypeForLLT(MI->getMMO().getMemoryType(),
1058 MF->getFunction().getContext()),
1059 MI->getMMO().getAddrSpace());
1060}
1061
1062static unsigned getIndexedOpc(unsigned LdStOpc) {
1063 switch (LdStOpc) {
1064 case TargetOpcode::G_LOAD:
1065 return TargetOpcode::G_INDEXED_LOAD;
1066 case TargetOpcode::G_STORE:
1067 return TargetOpcode::G_INDEXED_STORE;
1068 case TargetOpcode::G_ZEXTLOAD:
1069 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1070 case TargetOpcode::G_SEXTLOAD:
1071 return TargetOpcode::G_INDEXED_SEXTLOAD;
1072 default:
1073 llvm_unreachable("Unexpected opcode");
1074 }
1075}
1076
1077bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1078 // Check for legality.
1079 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1080 LLT Ty = MRI.getType(LdSt.getReg(0));
1081 LLT MemTy = LdSt.getMMO().getMemoryType();
1083 {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
1084 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1085 SmallVector<LLT> OpTys;
1086 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1087 OpTys = {PtrTy, Ty, Ty};
1088 else
1089 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1090
1091 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1092 return isLegal(Q);
1093}
1094
1096 "post-index-use-threshold", cl::Hidden, cl::init(32),
1097 cl::desc("Number of uses of a base pointer to check before it is no longer "
1098 "considered for post-indexing."));
1099
1100bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1102 bool &RematOffset) {
1103 // We're looking for the following pattern, for either load or store:
1104 // %baseptr:_(p0) = ...
1105 // G_STORE %val(s64), %baseptr(p0)
1106 // %offset:_(s64) = G_CONSTANT i64 -256
1107 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1108 const auto &TLI = getTargetLowering();
1109
1110 Register Ptr = LdSt.getPointerReg();
1111 // If the store is the only use, don't bother.
1112 if (MRI.hasOneNonDBGUse(Ptr))
1113 return false;
1114
1115 if (!isIndexedLoadStoreLegal(LdSt))
1116 return false;
1117
1118 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1119 return false;
1120
1121 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1122 auto *PtrDef = MRI.getVRegDef(Ptr);
1123
1124 unsigned NumUsesChecked = 0;
1125 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1126 if (++NumUsesChecked > PostIndexUseThreshold)
1127 return false; // Try to avoid exploding compile time.
1128
1129 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1130 // The use itself might be dead. This can happen during combines if DCE
1131 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1132 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1133 continue;
1134
1135 // Check the user of this isn't the store, otherwise we'd be generate a
1136 // indexed store defining its own use.
1137 if (StoredValDef == &Use)
1138 continue;
1139
1140 Offset = PtrAdd->getOffsetReg();
1141 if (!ForceLegalIndexing &&
1142 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1143 /*IsPre*/ false, MRI))
1144 continue;
1145
1146 // Make sure the offset calculation is before the potentially indexed op.
1147 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1148 RematOffset = false;
1149 if (!dominates(*OffsetDef, LdSt)) {
1150 // If the offset however is just a G_CONSTANT, we can always just
1151 // rematerialize it where we need it.
1152 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1153 continue;
1154 RematOffset = true;
1155 }
1156
1157 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1158 if (&BasePtrUse == PtrDef)
1159 continue;
1160
1161 // If the user is a later load/store that can be post-indexed, then don't
1162 // combine this one.
1163 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1164 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1165 dominates(LdSt, *BasePtrLdSt) &&
1166 isIndexedLoadStoreLegal(*BasePtrLdSt))
1167 return false;
1168
1169 // Now we're looking for the key G_PTR_ADD instruction, which contains
1170 // the offset add that we want to fold.
1171 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1172 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1173 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1174 // If the use is in a different block, then we may produce worse code
1175 // due to the extra register pressure.
1176 if (BaseUseUse.getParent() != LdSt.getParent())
1177 return false;
1178
1179 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1180 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1181 return false;
1182 }
1183 if (!dominates(LdSt, BasePtrUse))
1184 return false; // All use must be dominated by the load/store.
1185 }
1186 }
1187
1188 Addr = PtrAdd->getReg(0);
1189 Base = PtrAdd->getBaseReg();
1190 return true;
1191 }
1192
1193 return false;
1194}
1195
1196bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1198 auto &MF = *LdSt.getParent()->getParent();
1199 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1200
1201 Addr = LdSt.getPointerReg();
1204 return false;
1205
1206 if (!ForceLegalIndexing &&
1207 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1208 return false;
1209
1210 if (!isIndexedLoadStoreLegal(LdSt))
1211 return false;
1212
1214 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1215 return false;
1216
1217 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1218 // Would require a copy.
1219 if (Base == St->getValueReg())
1220 return false;
1221
1222 // We're expecting one use of Addr in MI, but it could also be the
1223 // value stored, which isn't actually dominated by the instruction.
1224 if (St->getValueReg() == Addr)
1225 return false;
1226 }
1227
1228 // Avoid increasing cross-block register pressure.
1229 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1230 if (AddrUse.getParent() != LdSt.getParent())
1231 return false;
1232
1233 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1234 // That might allow us to end base's liveness here by adjusting the constant.
1235 bool RealUse = false;
1236 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1237 if (!dominates(LdSt, AddrUse))
1238 return false; // All use must be dominated by the load/store.
1239
1240 // If Ptr may be folded in addressing mode of other use, then it's
1241 // not profitable to do this transformation.
1242 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1243 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1244 RealUse = true;
1245 } else {
1246 RealUse = true;
1247 }
1248 }
1249 return RealUse;
1250}
1251
1253 BuildFnTy &MatchInfo) {
1254 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1255
1256 // Check if there is a load that defines the vector being extracted from.
1257 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1258 if (!LoadMI)
1259 return false;
1260
1261 Register Vector = MI.getOperand(1).getReg();
1262 LLT VecEltTy = MRI.getType(Vector).getElementType();
1263
1264 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1265
1266 // Checking whether we should reduce the load width.
1268 return false;
1269
1270 // Check if the defining load is simple.
1271 if (!LoadMI->isSimple())
1272 return false;
1273
1274 // If the vector element type is not a multiple of a byte then we are unable
1275 // to correctly compute an address to load only the extracted element as a
1276 // scalar.
1277 if (!VecEltTy.isByteSized())
1278 return false;
1279
1280 // Check for load fold barriers between the extraction and the load.
1281 if (MI.getParent() != LoadMI->getParent())
1282 return false;
1283 const unsigned MaxIter = 20;
1284 unsigned Iter = 0;
1285 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1286 if (II->isLoadFoldBarrier())
1287 return false;
1288 if (Iter++ == MaxIter)
1289 return false;
1290 }
1291
1292 // Check if the new load that we are going to create is legal
1293 // if we are in the post-legalization phase.
1294 MachineMemOperand MMO = LoadMI->getMMO();
1295 Align Alignment = MMO.getAlign();
1296 MachinePointerInfo PtrInfo;
1298
1299 // Finding the appropriate PtrInfo if offset is a known constant.
1300 // This is required to create the memory operand for the narrowed load.
1301 // This machine memory operand object helps us infer about legality
1302 // before we proceed to combine the instruction.
1303 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1304 int Elt = CVal->getZExtValue();
1305 // FIXME: should be (ABI size)*Elt.
1306 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1307 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1308 } else {
1309 // Discard the pointer info except the address space because the memory
1310 // operand can't represent this new access since the offset is variable.
1311 Offset = VecEltTy.getSizeInBits() / 8;
1313 }
1314
1315 Alignment = commonAlignment(Alignment, Offset);
1316
1317 Register VecPtr = LoadMI->getPointerReg();
1318 LLT PtrTy = MRI.getType(VecPtr);
1319
1320 MachineFunction &MF = *MI.getMF();
1321 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1322
1323 LegalityQuery::MemDesc MMDesc(*NewMMO);
1324
1325 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1326
1328 return false;
1329
1330 // Load must be allowed and fast on the target.
1332 auto &DL = MF.getDataLayout();
1333 unsigned Fast = 0;
1334 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1335 &Fast) ||
1336 !Fast)
1337 return false;
1338
1339 Register Result = MI.getOperand(0).getReg();
1340 Register Index = MI.getOperand(2).getReg();
1341
1342 MatchInfo = [=](MachineIRBuilder &B) {
1343 GISelObserverWrapper DummyObserver;
1344 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1345 //// Get pointer to the vector element.
1346 Register finalPtr = Helper.getVectorElementPointer(
1347 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1348 Index);
1349 // New G_LOAD instruction.
1350 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1351 // Remove original GLOAD instruction.
1352 LoadMI->eraseFromParent();
1353 };
1354
1355 return true;
1356}
1357
1360 auto &LdSt = cast<GLoadStore>(MI);
1361
1362 if (LdSt.isAtomic())
1363 return false;
1364
1365 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1366 MatchInfo.Offset);
1367 if (!MatchInfo.IsPre &&
1368 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1369 MatchInfo.Offset, MatchInfo.RematOffset))
1370 return false;
1371
1372 return true;
1373}
1374
1377 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1378 unsigned Opcode = MI.getOpcode();
1379 bool IsStore = Opcode == TargetOpcode::G_STORE;
1380 unsigned NewOpcode = getIndexedOpc(Opcode);
1381
1382 // If the offset constant didn't happen to dominate the load/store, we can
1383 // just clone it as needed.
1384 if (MatchInfo.RematOffset) {
1385 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1386 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1387 *OldCst->getOperand(1).getCImm());
1388 MatchInfo.Offset = NewCst.getReg(0);
1389 }
1390
1391 auto MIB = Builder.buildInstr(NewOpcode);
1392 if (IsStore) {
1393 MIB.addDef(MatchInfo.Addr);
1394 MIB.addUse(MI.getOperand(0).getReg());
1395 } else {
1396 MIB.addDef(MI.getOperand(0).getReg());
1397 MIB.addDef(MatchInfo.Addr);
1398 }
1399
1400 MIB.addUse(MatchInfo.Base);
1401 MIB.addUse(MatchInfo.Offset);
1402 MIB.addImm(MatchInfo.IsPre);
1403 MIB->cloneMemRefs(*MI.getMF(), MI);
1404 MI.eraseFromParent();
1405 AddrDef.eraseFromParent();
1406
1407 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1408}
1409
1411 MachineInstr *&OtherMI) {
1412 unsigned Opcode = MI.getOpcode();
1413 bool IsDiv, IsSigned;
1414
1415 switch (Opcode) {
1416 default:
1417 llvm_unreachable("Unexpected opcode!");
1418 case TargetOpcode::G_SDIV:
1419 case TargetOpcode::G_UDIV: {
1420 IsDiv = true;
1421 IsSigned = Opcode == TargetOpcode::G_SDIV;
1422 break;
1423 }
1424 case TargetOpcode::G_SREM:
1425 case TargetOpcode::G_UREM: {
1426 IsDiv = false;
1427 IsSigned = Opcode == TargetOpcode::G_SREM;
1428 break;
1429 }
1430 }
1431
1432 Register Src1 = MI.getOperand(1).getReg();
1433 unsigned DivOpcode, RemOpcode, DivremOpcode;
1434 if (IsSigned) {
1435 DivOpcode = TargetOpcode::G_SDIV;
1436 RemOpcode = TargetOpcode::G_SREM;
1437 DivremOpcode = TargetOpcode::G_SDIVREM;
1438 } else {
1439 DivOpcode = TargetOpcode::G_UDIV;
1440 RemOpcode = TargetOpcode::G_UREM;
1441 DivremOpcode = TargetOpcode::G_UDIVREM;
1442 }
1443
1444 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1445 return false;
1446
1447 // Combine:
1448 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1449 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1450 // into:
1451 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1452
1453 // Combine:
1454 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1455 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1456 // into:
1457 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1458
1459 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1460 if (MI.getParent() == UseMI.getParent() &&
1461 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1462 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1463 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1464 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1465 OtherMI = &UseMI;
1466 return true;
1467 }
1468 }
1469
1470 return false;
1471}
1472
1474 MachineInstr *&OtherMI) {
1475 unsigned Opcode = MI.getOpcode();
1476 assert(OtherMI && "OtherMI shouldn't be empty.");
1477
1478 Register DestDivReg, DestRemReg;
1479 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1480 DestDivReg = MI.getOperand(0).getReg();
1481 DestRemReg = OtherMI->getOperand(0).getReg();
1482 } else {
1483 DestDivReg = OtherMI->getOperand(0).getReg();
1484 DestRemReg = MI.getOperand(0).getReg();
1485 }
1486
1487 bool IsSigned =
1488 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1489
1490 // Check which instruction is first in the block so we don't break def-use
1491 // deps by "moving" the instruction incorrectly. Also keep track of which
1492 // instruction is first so we pick it's operands, avoiding use-before-def
1493 // bugs.
1494 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1495 Builder.setInstrAndDebugLoc(*FirstInst);
1496
1497 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1498 : TargetOpcode::G_UDIVREM,
1499 {DestDivReg, DestRemReg},
1500 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1501 MI.eraseFromParent();
1502 OtherMI->eraseFromParent();
1503}
1504
1506 MachineInstr *&BrCond) {
1507 assert(MI.getOpcode() == TargetOpcode::G_BR);
1508
1509 // Try to match the following:
1510 // bb1:
1511 // G_BRCOND %c1, %bb2
1512 // G_BR %bb3
1513 // bb2:
1514 // ...
1515 // bb3:
1516
1517 // The above pattern does not have a fall through to the successor bb2, always
1518 // resulting in a branch no matter which path is taken. Here we try to find
1519 // and replace that pattern with conditional branch to bb3 and otherwise
1520 // fallthrough to bb2. This is generally better for branch predictors.
1521
1522 MachineBasicBlock *MBB = MI.getParent();
1524 if (BrIt == MBB->begin())
1525 return false;
1526 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1527
1528 BrCond = &*std::prev(BrIt);
1529 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1530 return false;
1531
1532 // Check that the next block is the conditional branch target. Also make sure
1533 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1534 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1535 return BrCondTarget != MI.getOperand(0).getMBB() &&
1536 MBB->isLayoutSuccessor(BrCondTarget);
1537}
1538
1540 MachineInstr *&BrCond) {
1541 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1543 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1544 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1545 // this to i1 only since we might not know for sure what kind of
1546 // compare generated the condition value.
1547 auto True = Builder.buildConstant(
1548 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1549 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1550
1551 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1553 MI.getOperand(0).setMBB(FallthroughBB);
1555
1556 // Change the conditional branch to use the inverted condition and
1557 // new target block.
1558 Observer.changingInstr(*BrCond);
1559 BrCond->getOperand(0).setReg(Xor.getReg(0));
1560 BrCond->getOperand(1).setMBB(BrTarget);
1561 Observer.changedInstr(*BrCond);
1562}
1563
1564
1566 MachineIRBuilder HelperBuilder(MI);
1567 GISelObserverWrapper DummyObserver;
1568 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1569 return Helper.lowerMemcpyInline(MI) ==
1571}
1572
1574 MachineIRBuilder HelperBuilder(MI);
1575 GISelObserverWrapper DummyObserver;
1576 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1577 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1579}
1580
1582 const MachineRegisterInfo &MRI,
1583 const APFloat &Val) {
1584 APFloat Result(Val);
1585 switch (MI.getOpcode()) {
1586 default:
1587 llvm_unreachable("Unexpected opcode!");
1588 case TargetOpcode::G_FNEG: {
1589 Result.changeSign();
1590 return Result;
1591 }
1592 case TargetOpcode::G_FABS: {
1593 Result.clearSign();
1594 return Result;
1595 }
1596 case TargetOpcode::G_FPTRUNC: {
1597 bool Unused;
1598 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1600 &Unused);
1601 return Result;
1602 }
1603 case TargetOpcode::G_FSQRT: {
1604 bool Unused;
1606 &Unused);
1607 Result = APFloat(sqrt(Result.convertToDouble()));
1608 break;
1609 }
1610 case TargetOpcode::G_FLOG2: {
1611 bool Unused;
1613 &Unused);
1614 Result = APFloat(log2(Result.convertToDouble()));
1615 break;
1616 }
1617 }
1618 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1619 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1620 // `G_FLOG2` reach here.
1621 bool Unused;
1622 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1623 return Result;
1624}
1625
1627 const ConstantFP *Cst) {
1628 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1629 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1630 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1631 MI.eraseFromParent();
1632}
1633
1635 PtrAddChain &MatchInfo) {
1636 // We're trying to match the following pattern:
1637 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1638 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1639 // -->
1640 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1641
1642 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1643 return false;
1644
1645 Register Add2 = MI.getOperand(1).getReg();
1646 Register Imm1 = MI.getOperand(2).getReg();
1647 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1648 if (!MaybeImmVal)
1649 return false;
1650
1651 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1652 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1653 return false;
1654
1655 Register Base = Add2Def->getOperand(1).getReg();
1656 Register Imm2 = Add2Def->getOperand(2).getReg();
1657 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1658 if (!MaybeImm2Val)
1659 return false;
1660
1661 // Check if the new combined immediate forms an illegal addressing mode.
1662 // Do not combine if it was legal before but would get illegal.
1663 // To do so, we need to find a load/store user of the pointer to get
1664 // the access type.
1665 Type *AccessTy = nullptr;
1666 auto &MF = *MI.getMF();
1667 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1668 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1669 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1670 MF.getFunction().getContext());
1671 break;
1672 }
1673 }
1675 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1676 AMNew.BaseOffs = CombinedImm.getSExtValue();
1677 if (AccessTy) {
1678 AMNew.HasBaseReg = true;
1680 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1681 AMOld.HasBaseReg = true;
1682 unsigned AS = MRI.getType(Add2).getAddressSpace();
1683 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1684 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1685 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1686 return false;
1687 }
1688
1689 // Pass the combined immediate to the apply function.
1690 MatchInfo.Imm = AMNew.BaseOffs;
1691 MatchInfo.Base = Base;
1692 MatchInfo.Bank = getRegBank(Imm2);
1693 return true;
1694}
1695
1697 PtrAddChain &MatchInfo) {
1698 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1699 MachineIRBuilder MIB(MI);
1700 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1701 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1702 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1704 MI.getOperand(1).setReg(MatchInfo.Base);
1705 MI.getOperand(2).setReg(NewOffset.getReg(0));
1707}
1708
1710 RegisterImmPair &MatchInfo) {
1711 // We're trying to match the following pattern with any of
1712 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1713 // %t1 = SHIFT %base, G_CONSTANT imm1
1714 // %root = SHIFT %t1, G_CONSTANT imm2
1715 // -->
1716 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1717
1718 unsigned Opcode = MI.getOpcode();
1719 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1720 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1721 Opcode == TargetOpcode::G_USHLSAT) &&
1722 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1723
1724 Register Shl2 = MI.getOperand(1).getReg();
1725 Register Imm1 = MI.getOperand(2).getReg();
1726 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1727 if (!MaybeImmVal)
1728 return false;
1729
1730 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1731 if (Shl2Def->getOpcode() != Opcode)
1732 return false;
1733
1734 Register Base = Shl2Def->getOperand(1).getReg();
1735 Register Imm2 = Shl2Def->getOperand(2).getReg();
1736 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1737 if (!MaybeImm2Val)
1738 return false;
1739
1740 // Pass the combined immediate to the apply function.
1741 MatchInfo.Imm =
1742 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1743 MatchInfo.Reg = Base;
1744
1745 // There is no simple replacement for a saturating unsigned left shift that
1746 // exceeds the scalar size.
1747 if (Opcode == TargetOpcode::G_USHLSAT &&
1748 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1749 return false;
1750
1751 return true;
1752}
1753
1755 RegisterImmPair &MatchInfo) {
1756 unsigned Opcode = MI.getOpcode();
1757 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1758 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1759 Opcode == TargetOpcode::G_USHLSAT) &&
1760 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1761
1762 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1763 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1764 auto Imm = MatchInfo.Imm;
1765
1766 if (Imm >= ScalarSizeInBits) {
1767 // Any logical shift that exceeds scalar size will produce zero.
1768 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1769 Builder.buildConstant(MI.getOperand(0), 0);
1770 MI.eraseFromParent();
1771 return;
1772 }
1773 // Arithmetic shift and saturating signed left shift have no effect beyond
1774 // scalar size.
1775 Imm = ScalarSizeInBits - 1;
1776 }
1777
1778 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1779 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1781 MI.getOperand(1).setReg(MatchInfo.Reg);
1782 MI.getOperand(2).setReg(NewImm);
1784}
1785
1787 ShiftOfShiftedLogic &MatchInfo) {
1788 // We're trying to match the following pattern with any of
1789 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1790 // with any of G_AND/G_OR/G_XOR logic instructions.
1791 // %t1 = SHIFT %X, G_CONSTANT C0
1792 // %t2 = LOGIC %t1, %Y
1793 // %root = SHIFT %t2, G_CONSTANT C1
1794 // -->
1795 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1796 // %t4 = SHIFT %Y, G_CONSTANT C1
1797 // %root = LOGIC %t3, %t4
1798 unsigned ShiftOpcode = MI.getOpcode();
1799 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1800 ShiftOpcode == TargetOpcode::G_ASHR ||
1801 ShiftOpcode == TargetOpcode::G_LSHR ||
1802 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1803 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1804 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1805
1806 // Match a one-use bitwise logic op.
1807 Register LogicDest = MI.getOperand(1).getReg();
1808 if (!MRI.hasOneNonDBGUse(LogicDest))
1809 return false;
1810
1811 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1812 unsigned LogicOpcode = LogicMI->getOpcode();
1813 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1814 LogicOpcode != TargetOpcode::G_XOR)
1815 return false;
1816
1817 // Find a matching one-use shift by constant.
1818 const Register C1 = MI.getOperand(2).getReg();
1819 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1820 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1821 return false;
1822
1823 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1824
1825 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1826 // Shift should match previous one and should be a one-use.
1827 if (MI->getOpcode() != ShiftOpcode ||
1828 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1829 return false;
1830
1831 // Must be a constant.
1832 auto MaybeImmVal =
1833 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1834 if (!MaybeImmVal)
1835 return false;
1836
1837 ShiftVal = MaybeImmVal->Value.getSExtValue();
1838 return true;
1839 };
1840
1841 // Logic ops are commutative, so check each operand for a match.
1842 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1843 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1844 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1845 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1846 uint64_t C0Val;
1847
1848 if (matchFirstShift(LogicMIOp1, C0Val)) {
1849 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1850 MatchInfo.Shift2 = LogicMIOp1;
1851 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1852 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1853 MatchInfo.Shift2 = LogicMIOp2;
1854 } else
1855 return false;
1856
1857 MatchInfo.ValSum = C0Val + C1Val;
1858
1859 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1860 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1861 return false;
1862
1863 MatchInfo.Logic = LogicMI;
1864 return true;
1865}
1866
1868 ShiftOfShiftedLogic &MatchInfo) {
1869 unsigned Opcode = MI.getOpcode();
1870 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1871 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1872 Opcode == TargetOpcode::G_SSHLSAT) &&
1873 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1874
1875 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1876 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1877
1878 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1879
1880 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1881 Register Shift1 =
1882 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1883
1884 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1885 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1886 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1887 // remove old shift1. And it will cause crash later. So erase it earlier to
1888 // avoid the crash.
1889 MatchInfo.Shift2->eraseFromParent();
1890
1891 Register Shift2Const = MI.getOperand(2).getReg();
1892 Register Shift2 = Builder
1893 .buildInstr(Opcode, {DestType},
1894 {MatchInfo.LogicNonShiftReg, Shift2Const})
1895 .getReg(0);
1896
1897 Register Dest = MI.getOperand(0).getReg();
1898 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1899
1900 // This was one use so it's safe to remove it.
1901 MatchInfo.Logic->eraseFromParent();
1902
1903 MI.eraseFromParent();
1904}
1905
1907 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1908 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1909 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1910 auto &Shl = cast<GenericMachineInstr>(MI);
1911 Register DstReg = Shl.getReg(0);
1912 Register SrcReg = Shl.getReg(1);
1913 Register ShiftReg = Shl.getReg(2);
1914 Register X, C1;
1915
1916 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1917 return false;
1918
1919 if (!mi_match(SrcReg, MRI,
1921 m_GOr(m_Reg(X), m_Reg(C1))))))
1922 return false;
1923
1924 APInt C1Val, C2Val;
1925 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1926 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1927 return false;
1928
1929 auto *SrcDef = MRI.getVRegDef(SrcReg);
1930 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1931 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1932 LLT SrcTy = MRI.getType(SrcReg);
1933 MatchInfo = [=](MachineIRBuilder &B) {
1934 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1935 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
1936 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
1937 };
1938 return true;
1939}
1940
1942 unsigned &ShiftVal) {
1943 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1944 auto MaybeImmVal =
1945 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1946 if (!MaybeImmVal)
1947 return false;
1948
1949 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1950 return (static_cast<int32_t>(ShiftVal) != -1);
1951}
1952
1954 unsigned &ShiftVal) {
1955 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1956 MachineIRBuilder MIB(MI);
1957 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1958 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1960 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1961 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1963}
1964
1965// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1967 RegisterImmPair &MatchData) {
1968 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1969 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
1970 return false;
1971
1972 Register LHS = MI.getOperand(1).getReg();
1973
1974 Register ExtSrc;
1975 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1976 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1977 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1978 return false;
1979
1980 Register RHS = MI.getOperand(2).getReg();
1981 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
1982 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
1983 if (!MaybeShiftAmtVal)
1984 return false;
1985
1986 if (LI) {
1987 LLT SrcTy = MRI.getType(ExtSrc);
1988
1989 // We only really care about the legality with the shifted value. We can
1990 // pick any type the constant shift amount, so ask the target what to
1991 // use. Otherwise we would have to guess and hope it is reported as legal.
1992 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1993 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1994 return false;
1995 }
1996
1997 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
1998 MatchData.Reg = ExtSrc;
1999 MatchData.Imm = ShiftAmt;
2000
2001 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2002 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2003 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2004}
2005
2007 const RegisterImmPair &MatchData) {
2008 Register ExtSrcReg = MatchData.Reg;
2009 int64_t ShiftAmtVal = MatchData.Imm;
2010
2011 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2012 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2013 auto NarrowShift =
2014 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2015 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2016 MI.eraseFromParent();
2017}
2018
2020 Register &MatchInfo) {
2021 GMerge &Merge = cast<GMerge>(MI);
2022 SmallVector<Register, 16> MergedValues;
2023 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2024 MergedValues.emplace_back(Merge.getSourceReg(I));
2025
2026 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2027 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2028 return false;
2029
2030 for (unsigned I = 0; I < MergedValues.size(); ++I)
2031 if (MergedValues[I] != Unmerge->getReg(I))
2032 return false;
2033
2034 MatchInfo = Unmerge->getSourceReg();
2035 return true;
2036}
2037
2039 const MachineRegisterInfo &MRI) {
2040 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2041 ;
2042
2043 return Reg;
2044}
2045
2048 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2049 "Expected an unmerge");
2050 auto &Unmerge = cast<GUnmerge>(MI);
2051 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2052
2053 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2054 if (!SrcInstr)
2055 return false;
2056
2057 // Check the source type of the merge.
2058 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2059 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2060 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2061 if (SrcMergeTy != Dst0Ty && !SameSize)
2062 return false;
2063 // They are the same now (modulo a bitcast).
2064 // We can collect all the src registers.
2065 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2066 Operands.push_back(SrcInstr->getSourceReg(Idx));
2067 return true;
2068}
2069
2072 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2073 "Expected an unmerge");
2074 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2075 "Not enough operands to replace all defs");
2076 unsigned NumElems = MI.getNumOperands() - 1;
2077
2078 LLT SrcTy = MRI.getType(Operands[0]);
2079 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2080 bool CanReuseInputDirectly = DstTy == SrcTy;
2081 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2082 Register DstReg = MI.getOperand(Idx).getReg();
2083 Register SrcReg = Operands[Idx];
2084
2085 // This combine may run after RegBankSelect, so we need to be aware of
2086 // register banks.
2087 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2088 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2089 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2090 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2091 }
2092
2093 if (CanReuseInputDirectly)
2094 replaceRegWith(MRI, DstReg, SrcReg);
2095 else
2096 Builder.buildCast(DstReg, SrcReg);
2097 }
2098 MI.eraseFromParent();
2099}
2100
2102 SmallVectorImpl<APInt> &Csts) {
2103 unsigned SrcIdx = MI.getNumOperands() - 1;
2104 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2105 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2106 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2107 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2108 return false;
2109 // Break down the big constant in smaller ones.
2110 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2111 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2112 ? CstVal.getCImm()->getValue()
2113 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2114
2115 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2116 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2117 // Unmerge a constant.
2118 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2119 Csts.emplace_back(Val.trunc(ShiftAmt));
2120 Val = Val.lshr(ShiftAmt);
2121 }
2122
2123 return true;
2124}
2125
2127 SmallVectorImpl<APInt> &Csts) {
2128 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2129 "Expected an unmerge");
2130 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2131 "Not enough operands to replace all defs");
2132 unsigned NumElems = MI.getNumOperands() - 1;
2133 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2134 Register DstReg = MI.getOperand(Idx).getReg();
2135 Builder.buildConstant(DstReg, Csts[Idx]);
2136 }
2137
2138 MI.eraseFromParent();
2139}
2140
2142 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2143 unsigned SrcIdx = MI.getNumOperands() - 1;
2144 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2145 MatchInfo = [&MI](MachineIRBuilder &B) {
2146 unsigned NumElems = MI.getNumOperands() - 1;
2147 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2148 Register DstReg = MI.getOperand(Idx).getReg();
2149 B.buildUndef(DstReg);
2150 }
2151 };
2152 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2153}
2154
2156 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2157 "Expected an unmerge");
2158 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2159 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2160 return false;
2161 // Check that all the lanes are dead except the first one.
2162 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2163 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2164 return false;
2165 }
2166 return true;
2167}
2168
2170 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2171 Register Dst0Reg = MI.getOperand(0).getReg();
2172 Builder.buildTrunc(Dst0Reg, SrcReg);
2173 MI.eraseFromParent();
2174}
2175
2177 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2178 "Expected an unmerge");
2179 Register Dst0Reg = MI.getOperand(0).getReg();
2180 LLT Dst0Ty = MRI.getType(Dst0Reg);
2181 // G_ZEXT on vector applies to each lane, so it will
2182 // affect all destinations. Therefore we won't be able
2183 // to simplify the unmerge to just the first definition.
2184 if (Dst0Ty.isVector())
2185 return false;
2186 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2187 LLT SrcTy = MRI.getType(SrcReg);
2188 if (SrcTy.isVector())
2189 return false;
2190
2191 Register ZExtSrcReg;
2192 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2193 return false;
2194
2195 // Finally we can replace the first definition with
2196 // a zext of the source if the definition is big enough to hold
2197 // all of ZExtSrc bits.
2198 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2199 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2200}
2201
2203 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2204 "Expected an unmerge");
2205
2206 Register Dst0Reg = MI.getOperand(0).getReg();
2207
2208 MachineInstr *ZExtInstr =
2209 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2210 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2211 "Expecting a G_ZEXT");
2212
2213 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2214 LLT Dst0Ty = MRI.getType(Dst0Reg);
2215 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2216
2217 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2218 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2219 } else {
2220 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2221 "ZExt src doesn't fit in destination");
2222 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2223 }
2224
2225 Register ZeroReg;
2226 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2227 if (!ZeroReg)
2228 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2229 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2230 }
2231 MI.eraseFromParent();
2232}
2233
2235 unsigned TargetShiftSize,
2236 unsigned &ShiftVal) {
2237 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2238 MI.getOpcode() == TargetOpcode::G_LSHR ||
2239 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2240
2241 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2242 if (Ty.isVector()) // TODO:
2243 return false;
2244
2245 // Don't narrow further than the requested size.
2246 unsigned Size = Ty.getSizeInBits();
2247 if (Size <= TargetShiftSize)
2248 return false;
2249
2250 auto MaybeImmVal =
2251 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2252 if (!MaybeImmVal)
2253 return false;
2254
2255 ShiftVal = MaybeImmVal->Value.getSExtValue();
2256 return ShiftVal >= Size / 2 && ShiftVal < Size;
2257}
2258
2260 const unsigned &ShiftVal) {
2261 Register DstReg = MI.getOperand(0).getReg();
2262 Register SrcReg = MI.getOperand(1).getReg();
2263 LLT Ty = MRI.getType(SrcReg);
2264 unsigned Size = Ty.getSizeInBits();
2265 unsigned HalfSize = Size / 2;
2266 assert(ShiftVal >= HalfSize);
2267
2268 LLT HalfTy = LLT::scalar(HalfSize);
2269
2270 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2271 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2272
2273 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2274 Register Narrowed = Unmerge.getReg(1);
2275
2276 // dst = G_LSHR s64:x, C for C >= 32
2277 // =>
2278 // lo, hi = G_UNMERGE_VALUES x
2279 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2280
2281 if (NarrowShiftAmt != 0) {
2282 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2283 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2284 }
2285
2286 auto Zero = Builder.buildConstant(HalfTy, 0);
2287 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2288 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2289 Register Narrowed = Unmerge.getReg(0);
2290 // dst = G_SHL s64:x, C for C >= 32
2291 // =>
2292 // lo, hi = G_UNMERGE_VALUES x
2293 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2294 if (NarrowShiftAmt != 0) {
2295 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2296 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2297 }
2298
2299 auto Zero = Builder.buildConstant(HalfTy, 0);
2300 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2301 } else {
2302 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2303 auto Hi = Builder.buildAShr(
2304 HalfTy, Unmerge.getReg(1),
2305 Builder.buildConstant(HalfTy, HalfSize - 1));
2306
2307 if (ShiftVal == HalfSize) {
2308 // (G_ASHR i64:x, 32) ->
2309 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2310 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2311 } else if (ShiftVal == Size - 1) {
2312 // Don't need a second shift.
2313 // (G_ASHR i64:x, 63) ->
2314 // %narrowed = (G_ASHR hi_32(x), 31)
2315 // G_MERGE_VALUES %narrowed, %narrowed
2316 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2317 } else {
2318 auto Lo = Builder.buildAShr(
2319 HalfTy, Unmerge.getReg(1),
2320 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2321
2322 // (G_ASHR i64:x, C) ->, for C >= 32
2323 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2324 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2325 }
2326 }
2327
2328 MI.eraseFromParent();
2329}
2330
2332 unsigned TargetShiftAmount) {
2333 unsigned ShiftAmt;
2334 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2335 applyCombineShiftToUnmerge(MI, ShiftAmt);
2336 return true;
2337 }
2338
2339 return false;
2340}
2341
2343 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2344 Register DstReg = MI.getOperand(0).getReg();
2345 LLT DstTy = MRI.getType(DstReg);
2346 Register SrcReg = MI.getOperand(1).getReg();
2347 return mi_match(SrcReg, MRI,
2348 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2349}
2350
2352 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2353 Register DstReg = MI.getOperand(0).getReg();
2354 Builder.buildCopy(DstReg, Reg);
2355 MI.eraseFromParent();
2356}
2357
2359 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2360 Register DstReg = MI.getOperand(0).getReg();
2361 Builder.buildZExtOrTrunc(DstReg, Reg);
2362 MI.eraseFromParent();
2363}
2364
2366 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2367 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2368 Register LHS = MI.getOperand(1).getReg();
2369 Register RHS = MI.getOperand(2).getReg();
2370 LLT IntTy = MRI.getType(LHS);
2371
2372 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2373 // instruction.
2374 PtrReg.second = false;
2375 for (Register SrcReg : {LHS, RHS}) {
2376 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2377 // Don't handle cases where the integer is implicitly converted to the
2378 // pointer width.
2379 LLT PtrTy = MRI.getType(PtrReg.first);
2380 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2381 return true;
2382 }
2383
2384 PtrReg.second = true;
2385 }
2386
2387 return false;
2388}
2389
2391 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2392 Register Dst = MI.getOperand(0).getReg();
2393 Register LHS = MI.getOperand(1).getReg();
2394 Register RHS = MI.getOperand(2).getReg();
2395
2396 const bool DoCommute = PtrReg.second;
2397 if (DoCommute)
2398 std::swap(LHS, RHS);
2399 LHS = PtrReg.first;
2400
2401 LLT PtrTy = MRI.getType(LHS);
2402
2403 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2404 Builder.buildPtrToInt(Dst, PtrAdd);
2405 MI.eraseFromParent();
2406}
2407
2409 APInt &NewCst) {
2410 auto &PtrAdd = cast<GPtrAdd>(MI);
2411 Register LHS = PtrAdd.getBaseReg();
2412 Register RHS = PtrAdd.getOffsetReg();
2414
2415 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2416 APInt Cst;
2417 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2418 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2419 // G_INTTOPTR uses zero-extension
2420 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2421 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2422 return true;
2423 }
2424 }
2425
2426 return false;
2427}
2428
2430 APInt &NewCst) {
2431 auto &PtrAdd = cast<GPtrAdd>(MI);
2432 Register Dst = PtrAdd.getReg(0);
2433
2434 Builder.buildConstant(Dst, NewCst);
2435 PtrAdd.eraseFromParent();
2436}
2437
2439 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2440 Register DstReg = MI.getOperand(0).getReg();
2441 Register SrcReg = MI.getOperand(1).getReg();
2442 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2443 if (OriginalSrcReg.isValid())
2444 SrcReg = OriginalSrcReg;
2445 LLT DstTy = MRI.getType(DstReg);
2446 return mi_match(SrcReg, MRI,
2447 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2448}
2449
2451 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2452 Register DstReg = MI.getOperand(0).getReg();
2453 Register SrcReg = MI.getOperand(1).getReg();
2454 LLT DstTy = MRI.getType(DstReg);
2455 if (mi_match(SrcReg, MRI,
2456 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2457 unsigned DstSize = DstTy.getScalarSizeInBits();
2458 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2459 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2460 }
2461 return false;
2462}
2463
2465 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2466 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2467 MI.getOpcode() == TargetOpcode::G_SEXT ||
2468 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2469 "Expected a G_[ASZ]EXT");
2470 Register SrcReg = MI.getOperand(1).getReg();
2471 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2472 if (OriginalSrcReg.isValid())
2473 SrcReg = OriginalSrcReg;
2474 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2475 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2476 unsigned Opc = MI.getOpcode();
2477 unsigned SrcOpc = SrcMI->getOpcode();
2478 if (Opc == SrcOpc ||
2479 (Opc == TargetOpcode::G_ANYEXT &&
2480 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2481 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2482 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2483 return true;
2484 }
2485 return false;
2486}
2487
2489 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2490 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2491 MI.getOpcode() == TargetOpcode::G_SEXT ||
2492 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2493 "Expected a G_[ASZ]EXT");
2494
2495 Register Reg = std::get<0>(MatchInfo);
2496 unsigned SrcExtOp = std::get<1>(MatchInfo);
2497
2498 // Combine exts with the same opcode.
2499 if (MI.getOpcode() == SrcExtOp) {
2501 MI.getOperand(1).setReg(Reg);
2503 return;
2504 }
2505
2506 // Combine:
2507 // - anyext([sz]ext x) to [sz]ext x
2508 // - sext(zext x) to zext x
2509 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2510 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2511 SrcExtOp == TargetOpcode::G_ZEXT)) {
2512 Register DstReg = MI.getOperand(0).getReg();
2513 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2514 MI.eraseFromParent();
2515 }
2516}
2517
2519 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2520 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2521 Register SrcReg = MI.getOperand(1).getReg();
2522 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2523 unsigned SrcOpc = SrcMI->getOpcode();
2524 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2525 SrcOpc == TargetOpcode::G_ZEXT) {
2526 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2527 return true;
2528 }
2529 return false;
2530}
2531
2533 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2534 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2535 Register SrcReg = MatchInfo.first;
2536 unsigned SrcExtOp = MatchInfo.second;
2537 Register DstReg = MI.getOperand(0).getReg();
2538 LLT SrcTy = MRI.getType(SrcReg);
2539 LLT DstTy = MRI.getType(DstReg);
2540 if (SrcTy == DstTy) {
2541 MI.eraseFromParent();
2542 replaceRegWith(MRI, DstReg, SrcReg);
2543 return;
2544 }
2545 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2546 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2547 else
2548 Builder.buildTrunc(DstReg, SrcReg);
2549 MI.eraseFromParent();
2550}
2551
2553 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2554 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2555
2556 // ShiftTy > 32 > TruncTy -> 32
2557 if (ShiftSize > 32 && TruncSize < 32)
2558 return ShiftTy.changeElementSize(32);
2559
2560 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2561 // Some targets like it, some don't, some only like it under certain
2562 // conditions/processor versions, etc.
2563 // A TL hook might be needed for this.
2564
2565 // Don't combine
2566 return ShiftTy;
2567}
2568
2570 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2571 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2572 Register DstReg = MI.getOperand(0).getReg();
2573 Register SrcReg = MI.getOperand(1).getReg();
2574
2575 if (!MRI.hasOneNonDBGUse(SrcReg))
2576 return false;
2577
2578 LLT SrcTy = MRI.getType(SrcReg);
2579 LLT DstTy = MRI.getType(DstReg);
2580
2581 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2582 const auto &TL = getTargetLowering();
2583
2584 LLT NewShiftTy;
2585 switch (SrcMI->getOpcode()) {
2586 default:
2587 return false;
2588 case TargetOpcode::G_SHL: {
2589 NewShiftTy = DstTy;
2590
2591 // Make sure new shift amount is legal.
2592 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2593 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2594 return false;
2595 break;
2596 }
2597 case TargetOpcode::G_LSHR:
2598 case TargetOpcode::G_ASHR: {
2599 // For right shifts, we conservatively do not do the transform if the TRUNC
2600 // has any STORE users. The reason is that if we change the type of the
2601 // shift, we may break the truncstore combine.
2602 //
2603 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2604 for (auto &User : MRI.use_instructions(DstReg))
2605 if (User.getOpcode() == TargetOpcode::G_STORE)
2606 return false;
2607
2608 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2609 if (NewShiftTy == SrcTy)
2610 return false;
2611
2612 // Make sure we won't lose information by truncating the high bits.
2613 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2614 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2615 DstTy.getScalarSizeInBits()))
2616 return false;
2617 break;
2618 }
2619 }
2620
2622 {SrcMI->getOpcode(),
2623 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2624 return false;
2625
2626 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2627 return true;
2628}
2629
2631 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2632 MachineInstr *ShiftMI = MatchInfo.first;
2633 LLT NewShiftTy = MatchInfo.second;
2634
2635 Register Dst = MI.getOperand(0).getReg();
2636 LLT DstTy = MRI.getType(Dst);
2637
2638 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2639 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2640 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2641
2642 Register NewShift =
2643 Builder
2644 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2645 .getReg(0);
2646
2647 if (NewShiftTy == DstTy)
2648 replaceRegWith(MRI, Dst, NewShift);
2649 else
2650 Builder.buildTrunc(Dst, NewShift);
2651
2652 eraseInst(MI);
2653}
2654
2656 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2657 return MO.isReg() &&
2658 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2659 });
2660}
2661
2663 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2664 return !MO.isReg() ||
2665 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2666 });
2667}
2668
2670 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2671 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2672 return all_of(Mask, [](int Elt) { return Elt < 0; });
2673}
2674
2676 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2677 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2678 MRI);
2679}
2680
2682 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2683 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2684 MRI);
2685}
2686
2688 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2689 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2690 "Expected an insert/extract element op");
2691 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2692 unsigned IdxIdx =
2693 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2694 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2695 if (!Idx)
2696 return false;
2697 return Idx->getZExtValue() >= VecTy.getNumElements();
2698}
2699
2701 GSelect &SelMI = cast<GSelect>(MI);
2702 auto Cst =
2704 if (!Cst)
2705 return false;
2706 OpIdx = Cst->isZero() ? 3 : 2;
2707 return true;
2708}
2709
2710void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2711
2713 const MachineOperand &MOP2) {
2714 if (!MOP1.isReg() || !MOP2.isReg())
2715 return false;
2716 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2717 if (!InstAndDef1)
2718 return false;
2719 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2720 if (!InstAndDef2)
2721 return false;
2722 MachineInstr *I1 = InstAndDef1->MI;
2723 MachineInstr *I2 = InstAndDef2->MI;
2724
2725 // Handle a case like this:
2726 //
2727 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2728 //
2729 // Even though %0 and %1 are produced by the same instruction they are not
2730 // the same values.
2731 if (I1 == I2)
2732 return MOP1.getReg() == MOP2.getReg();
2733
2734 // If we have an instruction which loads or stores, we can't guarantee that
2735 // it is identical.
2736 //
2737 // For example, we may have
2738 //
2739 // %x1 = G_LOAD %addr (load N from @somewhere)
2740 // ...
2741 // call @foo
2742 // ...
2743 // %x2 = G_LOAD %addr (load N from @somewhere)
2744 // ...
2745 // %or = G_OR %x1, %x2
2746 //
2747 // It's possible that @foo will modify whatever lives at the address we're
2748 // loading from. To be safe, let's just assume that all loads and stores
2749 // are different (unless we have something which is guaranteed to not
2750 // change.)
2751 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2752 return false;
2753
2754 // If both instructions are loads or stores, they are equal only if both
2755 // are dereferenceable invariant loads with the same number of bits.
2756 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2757 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2758 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2759 if (!LS1 || !LS2)
2760 return false;
2761
2762 if (!I2->isDereferenceableInvariantLoad() ||
2763 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2764 return false;
2765 }
2766
2767 // Check for physical registers on the instructions first to avoid cases
2768 // like this:
2769 //
2770 // %a = COPY $physreg
2771 // ...
2772 // SOMETHING implicit-def $physreg
2773 // ...
2774 // %b = COPY $physreg
2775 //
2776 // These copies are not equivalent.
2777 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2778 return MO.isReg() && MO.getReg().isPhysical();
2779 })) {
2780 // Check if we have a case like this:
2781 //
2782 // %a = COPY $physreg
2783 // %b = COPY %a
2784 //
2785 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2786 // From that, we know that they must have the same value, since they must
2787 // have come from the same COPY.
2788 return I1->isIdenticalTo(*I2);
2789 }
2790
2791 // We don't have any physical registers, so we don't necessarily need the
2792 // same vreg defs.
2793 //
2794 // On the off-chance that there's some target instruction feeding into the
2795 // instruction, let's use produceSameValue instead of isIdenticalTo.
2796 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2797 // Handle instructions with multiple defs that produce same values. Values
2798 // are same for operands with same index.
2799 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2800 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2801 // I1 and I2 are different instructions but produce same values,
2802 // %1 and %6 are same, %1 and %7 are not the same value.
2803 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2804 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2805 }
2806 return false;
2807}
2808
2810 if (!MOP.isReg())
2811 return false;
2812 auto *MI = MRI.getVRegDef(MOP.getReg());
2813 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2814 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2815 MaybeCst->getSExtValue() == C;
2816}
2817
2819 if (!MOP.isReg())
2820 return false;
2821 std::optional<FPValueAndVReg> MaybeCst;
2822 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2823 return false;
2824
2825 return MaybeCst->Value.isExactlyValue(C);
2826}
2827
2829 unsigned OpIdx) {
2830 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2831 Register OldReg = MI.getOperand(0).getReg();
2832 Register Replacement = MI.getOperand(OpIdx).getReg();
2833 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2834 MI.eraseFromParent();
2835 replaceRegWith(MRI, OldReg, Replacement);
2836}
2837
2839 Register Replacement) {
2840 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2841 Register OldReg = MI.getOperand(0).getReg();
2842 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2843 MI.eraseFromParent();
2844 replaceRegWith(MRI, OldReg, Replacement);
2845}
2846
2848 unsigned ConstIdx) {
2849 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2850 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2851
2852 // Get the shift amount
2853 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2854 if (!VRegAndVal)
2855 return false;
2856
2857 // Return true of shift amount >= Bitwidth
2858 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2859}
2860
2862 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2863 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2864 "This is not a funnel shift operation");
2865
2866 Register ConstReg = MI.getOperand(3).getReg();
2867 LLT ConstTy = MRI.getType(ConstReg);
2868 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2869
2870 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2871 assert((VRegAndVal) && "Value is not a constant");
2872
2873 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2874 APInt NewConst = VRegAndVal->Value.urem(
2875 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2876
2877 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2879 MI.getOpcode(), {MI.getOperand(0)},
2880 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2881
2882 MI.eraseFromParent();
2883}
2884
2886 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2887 // Match (cond ? x : x)
2888 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2889 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2890 MRI);
2891}
2892
2894 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2895 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2896 MRI);
2897}
2898
2900 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2901 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2902 MRI);
2903}
2904
2906 MachineOperand &MO = MI.getOperand(OpIdx);
2907 return MO.isReg() &&
2908 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2909}
2910
2912 unsigned OpIdx) {
2913 MachineOperand &MO = MI.getOperand(OpIdx);
2914 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2915}
2916
2918 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2919 Builder.buildFConstant(MI.getOperand(0), C);
2920 MI.eraseFromParent();
2921}
2922
2924 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2925 Builder.buildConstant(MI.getOperand(0), C);
2926 MI.eraseFromParent();
2927}
2928
2930 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2931 Builder.buildConstant(MI.getOperand(0), C);
2932 MI.eraseFromParent();
2933}
2934
2936 ConstantFP *CFP) {
2937 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2938 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2939 MI.eraseFromParent();
2940}
2941
2943 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2944 Builder.buildUndef(MI.getOperand(0));
2945 MI.eraseFromParent();
2946}
2947
2949 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2950 Register LHS = MI.getOperand(1).getReg();
2951 Register RHS = MI.getOperand(2).getReg();
2952 Register &NewLHS = std::get<0>(MatchInfo);
2953 Register &NewRHS = std::get<1>(MatchInfo);
2954
2955 // Helper lambda to check for opportunities for
2956 // ((0-A) + B) -> B - A
2957 // (A + (0-B)) -> A - B
2958 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2959 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2960 return false;
2961 NewLHS = MaybeNewLHS;
2962 return true;
2963 };
2964
2965 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2966}
2967
2970 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2971 "Invalid opcode");
2972 Register DstReg = MI.getOperand(0).getReg();
2973 LLT DstTy = MRI.getType(DstReg);
2974 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2975 unsigned NumElts = DstTy.getNumElements();
2976 // If this MI is part of a sequence of insert_vec_elts, then
2977 // don't do the combine in the middle of the sequence.
2978 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2979 TargetOpcode::G_INSERT_VECTOR_ELT)
2980 return false;
2981 MachineInstr *CurrInst = &MI;
2982 MachineInstr *TmpInst;
2983 int64_t IntImm;
2984 Register TmpReg;
2985 MatchInfo.resize(NumElts);
2986 while (mi_match(
2987 CurrInst->getOperand(0).getReg(), MRI,
2988 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2989 if (IntImm >= NumElts || IntImm < 0)
2990 return false;
2991 if (!MatchInfo[IntImm])
2992 MatchInfo[IntImm] = TmpReg;
2993 CurrInst = TmpInst;
2994 }
2995 // Variable index.
2996 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2997 return false;
2998 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2999 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3000 if (!MatchInfo[I - 1].isValid())
3001 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3002 }
3003 return true;
3004 }
3005 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3006 // overwritten, bail out.
3007 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3008 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3009}
3010
3013 Register UndefReg;
3014 auto GetUndef = [&]() {
3015 if (UndefReg)
3016 return UndefReg;
3017 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3018 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3019 return UndefReg;
3020 };
3021 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
3022 if (!MatchInfo[I])
3023 MatchInfo[I] = GetUndef();
3024 }
3025 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3026 MI.eraseFromParent();
3027}
3028
3030 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3031 Register SubLHS, SubRHS;
3032 std::tie(SubLHS, SubRHS) = MatchInfo;
3033 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3034 MI.eraseFromParent();
3035}
3036
3039 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3040 //
3041 // Creates the new hand + logic instruction (but does not insert them.)
3042 //
3043 // On success, MatchInfo is populated with the new instructions. These are
3044 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3045 unsigned LogicOpcode = MI.getOpcode();
3046 assert(LogicOpcode == TargetOpcode::G_AND ||
3047 LogicOpcode == TargetOpcode::G_OR ||
3048 LogicOpcode == TargetOpcode::G_XOR);
3049 MachineIRBuilder MIB(MI);
3050 Register Dst = MI.getOperand(0).getReg();
3051 Register LHSReg = MI.getOperand(1).getReg();
3052 Register RHSReg = MI.getOperand(2).getReg();
3053
3054 // Don't recompute anything.
3055 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3056 return false;
3057
3058 // Make sure we have (hand x, ...), (hand y, ...)
3059 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3060 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3061 if (!LeftHandInst || !RightHandInst)
3062 return false;
3063 unsigned HandOpcode = LeftHandInst->getOpcode();
3064 if (HandOpcode != RightHandInst->getOpcode())
3065 return false;
3066 if (!LeftHandInst->getOperand(1).isReg() ||
3067 !RightHandInst->getOperand(1).isReg())
3068 return false;
3069
3070 // Make sure the types match up, and if we're doing this post-legalization,
3071 // we end up with legal types.
3072 Register X = LeftHandInst->getOperand(1).getReg();
3073 Register Y = RightHandInst->getOperand(1).getReg();
3074 LLT XTy = MRI.getType(X);
3075 LLT YTy = MRI.getType(Y);
3076 if (!XTy.isValid() || XTy != YTy)
3077 return false;
3078
3079 // Optional extra source register.
3080 Register ExtraHandOpSrcReg;
3081 switch (HandOpcode) {
3082 default:
3083 return false;
3084 case TargetOpcode::G_ANYEXT:
3085 case TargetOpcode::G_SEXT:
3086 case TargetOpcode::G_ZEXT: {
3087 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3088 break;
3089 }
3090 case TargetOpcode::G_AND:
3091 case TargetOpcode::G_ASHR:
3092 case TargetOpcode::G_LSHR:
3093 case TargetOpcode::G_SHL: {
3094 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3095 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3096 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3097 return false;
3098 ExtraHandOpSrcReg = ZOp.getReg();
3099 break;
3100 }
3101 }
3102
3103 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3104 return false;
3105
3106 // Record the steps to build the new instructions.
3107 //
3108 // Steps to build (logic x, y)
3109 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3110 OperandBuildSteps LogicBuildSteps = {
3111 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3112 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3113 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3114 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3115
3116 // Steps to build hand (logic x, y), ...z
3117 OperandBuildSteps HandBuildSteps = {
3118 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3119 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3120 if (ExtraHandOpSrcReg.isValid())
3121 HandBuildSteps.push_back(
3122 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3123 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3124
3125 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3126 return true;
3127}
3128
3131 assert(MatchInfo.InstrsToBuild.size() &&
3132 "Expected at least one instr to build?");
3133 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3134 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3135 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3136 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3137 for (auto &OperandFn : InstrToBuild.OperandFns)
3138 OperandFn(Instr);
3139 }
3140 MI.eraseFromParent();
3141}
3142
3144 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3145 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3146 int64_t ShlCst, AshrCst;
3147 Register Src;
3148 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3149 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3150 m_ICstOrSplat(AshrCst))))
3151 return false;
3152 if (ShlCst != AshrCst)
3153 return false;
3155 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3156 return false;
3157 MatchInfo = std::make_tuple(Src, ShlCst);
3158 return true;
3159}
3160
3162 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3163 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3164 Register Src;
3165 int64_t ShiftAmt;
3166 std::tie(Src, ShiftAmt) = MatchInfo;
3167 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3168 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3169 MI.eraseFromParent();
3170}
3171
3172/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3174 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3175 assert(MI.getOpcode() == TargetOpcode::G_AND);
3176
3177 Register Dst = MI.getOperand(0).getReg();
3178 LLT Ty = MRI.getType(Dst);
3179
3180 Register R;
3181 int64_t C1;
3182 int64_t C2;
3183 if (!mi_match(
3184 Dst, MRI,
3185 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3186 return false;
3187
3188 MatchInfo = [=](MachineIRBuilder &B) {
3189 if (C1 & C2) {
3190 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3191 return;
3192 }
3193 auto Zero = B.buildConstant(Ty, 0);
3194 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3195 };
3196 return true;
3197}
3198
3200 Register &Replacement) {
3201 // Given
3202 //
3203 // %y:_(sN) = G_SOMETHING
3204 // %x:_(sN) = G_SOMETHING
3205 // %res:_(sN) = G_AND %x, %y
3206 //
3207 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3208 //
3209 // Patterns like this can appear as a result of legalization. E.g.
3210 //
3211 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3212 // %one:_(s32) = G_CONSTANT i32 1
3213 // %and:_(s32) = G_AND %cmp, %one
3214 //
3215 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3216 assert(MI.getOpcode() == TargetOpcode::G_AND);
3217 if (!KB)
3218 return false;
3219
3220 Register AndDst = MI.getOperand(0).getReg();
3221 Register LHS = MI.getOperand(1).getReg();
3222 Register RHS = MI.getOperand(2).getReg();
3223
3224 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3225 // we can't do anything. If we do, then it depends on whether we have
3226 // KnownBits on the LHS.
3227 KnownBits RHSBits = KB->getKnownBits(RHS);
3228 if (RHSBits.isUnknown())
3229 return false;
3230
3231 KnownBits LHSBits = KB->getKnownBits(LHS);
3232
3233 // Check that x & Mask == x.
3234 // x & 1 == x, always
3235 // x & 0 == x, only if x is also 0
3236 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3237 //
3238 // Check if we can replace AndDst with the LHS of the G_AND
3239 if (canReplaceReg(AndDst, LHS, MRI) &&
3240 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3241 Replacement = LHS;
3242 return true;
3243 }
3244
3245 // Check if we can replace AndDst with the RHS of the G_AND
3246 if (canReplaceReg(AndDst, RHS, MRI) &&
3247 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3248 Replacement = RHS;
3249 return true;
3250 }
3251
3252 return false;
3253}
3254
3256 // Given
3257 //
3258 // %y:_(sN) = G_SOMETHING
3259 // %x:_(sN) = G_SOMETHING
3260 // %res:_(sN) = G_OR %x, %y
3261 //
3262 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3263 assert(MI.getOpcode() == TargetOpcode::G_OR);
3264 if (!KB)
3265 return false;
3266
3267 Register OrDst = MI.getOperand(0).getReg();
3268 Register LHS = MI.getOperand(1).getReg();
3269 Register RHS = MI.getOperand(2).getReg();
3270
3271 KnownBits LHSBits = KB->getKnownBits(LHS);
3272 KnownBits RHSBits = KB->getKnownBits(RHS);
3273
3274 // Check that x | Mask == x.
3275 // x | 0 == x, always
3276 // x | 1 == x, only if x is also 1
3277 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3278 //
3279 // Check if we can replace OrDst with the LHS of the G_OR
3280 if (canReplaceReg(OrDst, LHS, MRI) &&
3281 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3282 Replacement = LHS;
3283 return true;
3284 }
3285
3286 // Check if we can replace OrDst with the RHS of the G_OR
3287 if (canReplaceReg(OrDst, RHS, MRI) &&
3288 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3289 Replacement = RHS;
3290 return true;
3291 }
3292
3293 return false;
3294}
3295
3297 // If the input is already sign extended, just drop the extension.
3298 Register Src = MI.getOperand(1).getReg();
3299 unsigned ExtBits = MI.getOperand(2).getImm();
3300 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3301 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3302}
3303
3304static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3305 int64_t Cst, bool IsVector, bool IsFP) {
3306 // For i1, Cst will always be -1 regardless of boolean contents.
3307 return (ScalarSizeBits == 1 && Cst == -1) ||
3308 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3309}
3310
3312 SmallVectorImpl<Register> &RegsToNegate) {
3313 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3314 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3315 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3316 Register XorSrc;
3317 Register CstReg;
3318 // We match xor(src, true) here.
3319 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3320 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3321 return false;
3322
3323 if (!MRI.hasOneNonDBGUse(XorSrc))
3324 return false;
3325
3326 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3327 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3328 // list of tree nodes to visit.
3329 RegsToNegate.push_back(XorSrc);
3330 // Remember whether the comparisons are all integer or all floating point.
3331 bool IsInt = false;
3332 bool IsFP = false;
3333 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3334 Register Reg = RegsToNegate[I];
3335 if (!MRI.hasOneNonDBGUse(Reg))
3336 return false;
3337 MachineInstr *Def = MRI.getVRegDef(Reg);
3338 switch (Def->getOpcode()) {
3339 default:
3340 // Don't match if the tree contains anything other than ANDs, ORs and
3341 // comparisons.
3342 return false;
3343 case TargetOpcode::G_ICMP:
3344 if (IsFP)
3345 return false;
3346 IsInt = true;
3347 // When we apply the combine we will invert the predicate.
3348 break;
3349 case TargetOpcode::G_FCMP:
3350 if (IsInt)
3351 return false;
3352 IsFP = true;
3353 // When we apply the combine we will invert the predicate.
3354 break;
3355 case TargetOpcode::G_AND:
3356 case TargetOpcode::G_OR:
3357 // Implement De Morgan's laws:
3358 // ~(x & y) -> ~x | ~y
3359 // ~(x | y) -> ~x & ~y
3360 // When we apply the combine we will change the opcode and recursively
3361 // negate the operands.
3362 RegsToNegate.push_back(Def->getOperand(1).getReg());
3363 RegsToNegate.push_back(Def->getOperand(2).getReg());
3364 break;
3365 }
3366 }
3367
3368 // Now we know whether the comparisons are integer or floating point, check
3369 // the constant in the xor.
3370 int64_t Cst;
3371 if (Ty.isVector()) {
3372 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3373 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3374 if (!MaybeCst)
3375 return false;
3376 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3377 return false;
3378 } else {
3379 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3380 return false;
3381 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3382 return false;
3383 }
3384
3385 return true;
3386}
3387
3389 SmallVectorImpl<Register> &RegsToNegate) {
3390 for (Register Reg : RegsToNegate) {
3391 MachineInstr *Def = MRI.getVRegDef(Reg);
3392 Observer.changingInstr(*Def);
3393 // For each comparison, invert the opcode. For each AND and OR, change the
3394 // opcode.
3395 switch (Def->getOpcode()) {
3396 default:
3397 llvm_unreachable("Unexpected opcode");
3398 case TargetOpcode::G_ICMP:
3399 case TargetOpcode::G_FCMP: {
3400 MachineOperand &PredOp = Def->getOperand(1);
3403 PredOp.setPredicate(NewP);
3404 break;
3405 }
3406 case TargetOpcode::G_AND:
3407 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3408 break;
3409 case TargetOpcode::G_OR:
3410 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3411 break;
3412 }
3413 Observer.changedInstr(*Def);
3414 }
3415
3416 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3417 MI.eraseFromParent();
3418}
3419
3421 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3422 // Match (xor (and x, y), y) (or any of its commuted cases)
3423 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3424 Register &X = MatchInfo.first;
3425 Register &Y = MatchInfo.second;
3426 Register AndReg = MI.getOperand(1).getReg();
3427 Register SharedReg = MI.getOperand(2).getReg();
3428
3429 // Find a G_AND on either side of the G_XOR.
3430 // Look for one of
3431 //
3432 // (xor (and x, y), SharedReg)
3433 // (xor SharedReg, (and x, y))
3434 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3435 std::swap(AndReg, SharedReg);
3436 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3437 return false;
3438 }
3439
3440 // Only do this if we'll eliminate the G_AND.
3441 if (!MRI.hasOneNonDBGUse(AndReg))
3442 return false;
3443
3444 // We can combine if SharedReg is the same as either the LHS or RHS of the
3445 // G_AND.
3446 if (Y != SharedReg)
3447 std::swap(X, Y);
3448 return Y == SharedReg;
3449}
3450
3452 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3453 // Fold (xor (and x, y), y) -> (and (not x), y)
3454 Register X, Y;
3455 std::tie(X, Y) = MatchInfo;
3456 auto Not = Builder.buildNot(MRI.getType(X), X);
3458 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3459 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3460 MI.getOperand(2).setReg(Y);
3462}
3463
3465 auto &PtrAdd = cast<GPtrAdd>(MI);
3466 Register DstReg = PtrAdd.getReg(0);
3467 LLT Ty = MRI.getType(DstReg);
3469
3470 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3471 return false;
3472
3473 if (Ty.isPointer()) {
3474 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3475 return ConstVal && *ConstVal == 0;
3476 }
3477
3478 assert(Ty.isVector() && "Expecting a vector type");
3479 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3480 return isBuildVectorAllZeros(*VecMI, MRI);
3481}
3482
3484 auto &PtrAdd = cast<GPtrAdd>(MI);
3485 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3486 PtrAdd.eraseFromParent();
3487}
3488
3489/// The second source operand is known to be a power of 2.
3491 Register DstReg = MI.getOperand(0).getReg();
3492 Register Src0 = MI.getOperand(1).getReg();
3493 Register Pow2Src1 = MI.getOperand(2).getReg();
3494 LLT Ty = MRI.getType(DstReg);
3495
3496 // Fold (urem x, pow2) -> (and x, pow2-1)
3497 auto NegOne = Builder.buildConstant(Ty, -1);
3498 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3499 Builder.buildAnd(DstReg, Src0, Add);
3500 MI.eraseFromParent();
3501}
3502
3504 unsigned &SelectOpNo) {
3505 Register LHS = MI.getOperand(1).getReg();
3506 Register RHS = MI.getOperand(2).getReg();
3507
3508 Register OtherOperandReg = RHS;
3509 SelectOpNo = 1;
3511
3512 // Don't do this unless the old select is going away. We want to eliminate the
3513 // binary operator, not replace a binop with a select.
3514 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3516 OtherOperandReg = LHS;
3517 SelectOpNo = 2;
3519 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3521 return false;
3522 }
3523
3524 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3525 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3526
3527 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3528 /*AllowFP*/ true,
3529 /*AllowOpaqueConstants*/ false))
3530 return false;
3531 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3532 /*AllowFP*/ true,
3533 /*AllowOpaqueConstants*/ false))
3534 return false;
3535
3536 unsigned BinOpcode = MI.getOpcode();
3537
3538 // We know that one of the operands is a select of constants. Now verify that
3539 // the other binary operator operand is either a constant, or we can handle a
3540 // variable.
3541 bool CanFoldNonConst =
3542 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3543 (isNullOrNullSplat(*SelectLHS, MRI) ||
3544 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3545 (isNullOrNullSplat(*SelectRHS, MRI) ||
3546 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3547 if (CanFoldNonConst)
3548 return true;
3549
3550 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3551 /*AllowFP*/ true,
3552 /*AllowOpaqueConstants*/ false);
3553}
3554
3555/// \p SelectOperand is the operand in binary operator \p MI that is the select
3556/// to fold.
3558 const unsigned &SelectOperand) {
3559 Register Dst = MI.getOperand(0).getReg();
3560 Register LHS = MI.getOperand(1).getReg();
3561 Register RHS = MI.getOperand(2).getReg();
3562 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3563
3564 Register SelectCond = Select->getOperand(1).getReg();
3565 Register SelectTrue = Select->getOperand(2).getReg();
3566 Register SelectFalse = Select->getOperand(3).getReg();
3567
3568 LLT Ty = MRI.getType(Dst);
3569 unsigned BinOpcode = MI.getOpcode();
3570
3571 Register FoldTrue, FoldFalse;
3572
3573 // We have a select-of-constants followed by a binary operator with a
3574 // constant. Eliminate the binop by pulling the constant math into the select.
3575 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3576 if (SelectOperand == 1) {
3577 // TODO: SelectionDAG verifies this actually constant folds before
3578 // committing to the combine.
3579
3580 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3581 FoldFalse =
3582 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3583 } else {
3584 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3585 FoldFalse =
3586 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3587 }
3588
3589 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3590 MI.eraseFromParent();
3591}
3592
3593std::optional<SmallVector<Register, 8>>
3594CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3595 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3596 // We want to detect if Root is part of a tree which represents a bunch
3597 // of loads being merged into a larger load. We'll try to recognize patterns
3598 // like, for example:
3599 //
3600 // Reg Reg
3601 // \ /
3602 // OR_1 Reg
3603 // \ /
3604 // OR_2
3605 // \ Reg
3606 // .. /
3607 // Root
3608 //
3609 // Reg Reg Reg Reg
3610 // \ / \ /
3611 // OR_1 OR_2
3612 // \ /
3613 // \ /
3614 // ...
3615 // Root
3616 //
3617 // Each "Reg" may have been produced by a load + some arithmetic. This
3618 // function will save each of them.
3619 SmallVector<Register, 8> RegsToVisit;
3621
3622 // In the "worst" case, we're dealing with a load for each byte. So, there
3623 // are at most #bytes - 1 ORs.
3624 const unsigned MaxIter =
3625 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3626 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3627 if (Ors.empty())
3628 break;
3629 const MachineInstr *Curr = Ors.pop_back_val();
3630 Register OrLHS = Curr->getOperand(1).getReg();
3631 Register OrRHS = Curr->getOperand(2).getReg();
3632
3633 // In the combine, we want to elimate the entire tree.
3634 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3635 return std::nullopt;
3636
3637 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3638 // something that may be a load + arithmetic.
3639 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3640 Ors.push_back(Or);
3641 else
3642 RegsToVisit.push_back(OrLHS);
3643 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3644 Ors.push_back(Or);
3645 else
3646 RegsToVisit.push_back(OrRHS);
3647 }
3648
3649 // We're going to try and merge each register into a wider power-of-2 type,
3650 // so we ought to have an even number of registers.
3651 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3652 return std::nullopt;
3653 return RegsToVisit;
3654}
3655
3656/// Helper function for findLoadOffsetsForLoadOrCombine.
3657///
3658/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3659/// and then moving that value into a specific byte offset.
3660///
3661/// e.g. x[i] << 24
3662///
3663/// \returns The load instruction and the byte offset it is moved into.
3664static std::optional<std::pair<GZExtLoad *, int64_t>>
3665matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3666 const MachineRegisterInfo &MRI) {
3667 assert(MRI.hasOneNonDBGUse(Reg) &&
3668 "Expected Reg to only have one non-debug use?");
3669 Register MaybeLoad;
3670 int64_t Shift;
3671 if (!mi_match(Reg, MRI,
3672 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3673 Shift = 0;
3674 MaybeLoad = Reg;
3675 }
3676
3677 if (Shift % MemSizeInBits != 0)
3678 return std::nullopt;
3679
3680 // TODO: Handle other types of loads.
3681 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3682 if (!Load)
3683 return std::nullopt;
3684
3685 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3686 return std::nullopt;
3687
3688 return std::make_pair(Load, Shift / MemSizeInBits);
3689}
3690
3691std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3692CombinerHelper::findLoadOffsetsForLoadOrCombine(
3694 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3695
3696 // Each load found for the pattern. There should be one for each RegsToVisit.
3698
3699 // The lowest index used in any load. (The lowest "i" for each x[i].)
3700 int64_t LowestIdx = INT64_MAX;
3701
3702 // The load which uses the lowest index.
3703 GZExtLoad *LowestIdxLoad = nullptr;
3704
3705 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3706 SmallSet<int64_t, 8> SeenIdx;
3707
3708 // Ensure each load is in the same MBB.
3709 // TODO: Support multiple MachineBasicBlocks.
3710 MachineBasicBlock *MBB = nullptr;
3711 const MachineMemOperand *MMO = nullptr;
3712
3713 // Earliest instruction-order load in the pattern.
3714 GZExtLoad *EarliestLoad = nullptr;
3715
3716 // Latest instruction-order load in the pattern.
3717 GZExtLoad *LatestLoad = nullptr;
3718
3719 // Base pointer which every load should share.
3721
3722 // We want to find a load for each register. Each load should have some
3723 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3724 // track of the load which uses the lowest index. Later, we will check if we
3725 // can use its pointer in the final, combined load.
3726 for (auto Reg : RegsToVisit) {
3727 // Find the load, and find the position that it will end up in (e.g. a
3728 // shifted) value.
3729 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3730 if (!LoadAndPos)
3731 return std::nullopt;
3732 GZExtLoad *Load;
3733 int64_t DstPos;
3734 std::tie(Load, DstPos) = *LoadAndPos;
3735
3736 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3737 // it is difficult to check for stores/calls/etc between loads.
3738 MachineBasicBlock *LoadMBB = Load->getParent();
3739 if (!MBB)
3740 MBB = LoadMBB;
3741 if (LoadMBB != MBB)
3742 return std::nullopt;
3743
3744 // Make sure that the MachineMemOperands of every seen load are compatible.
3745 auto &LoadMMO = Load->getMMO();
3746 if (!MMO)
3747 MMO = &LoadMMO;
3748 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3749 return std::nullopt;
3750
3751 // Find out what the base pointer and index for the load is.
3752 Register LoadPtr;
3753 int64_t Idx;
3754 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3755 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3756 LoadPtr = Load->getOperand(1).getReg();
3757 Idx = 0;
3758 }
3759
3760 // Don't combine things like a[i], a[i] -> a bigger load.
3761 if (!SeenIdx.insert(Idx).second)
3762 return std::nullopt;
3763
3764 // Every load must share the same base pointer; don't combine things like:
3765 //
3766 // a[i], b[i + 1] -> a bigger load.
3767 if (!BasePtr.isValid())
3768 BasePtr = LoadPtr;
3769 if (BasePtr != LoadPtr)
3770 return std::nullopt;
3771
3772 if (Idx < LowestIdx) {
3773 LowestIdx = Idx;
3774 LowestIdxLoad = Load;
3775 }
3776
3777 // Keep track of the byte offset that this load ends up at. If we have seen
3778 // the byte offset, then stop here. We do not want to combine:
3779 //
3780 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3781 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3782 return std::nullopt;
3783 Loads.insert(Load);
3784
3785 // Keep track of the position of the earliest/latest loads in the pattern.
3786 // We will check that there are no load fold barriers between them later
3787 // on.
3788 //
3789 // FIXME: Is there a better way to check for load fold barriers?
3790 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3791 EarliestLoad = Load;
3792 if (!LatestLoad || dominates(*LatestLoad, *Load))
3793 LatestLoad = Load;
3794 }
3795
3796 // We found a load for each register. Let's check if each load satisfies the
3797 // pattern.
3798 assert(Loads.size() == RegsToVisit.size() &&
3799 "Expected to find a load for each register?");
3800 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3801 LatestLoad && "Expected at least two loads?");
3802
3803 // Check if there are any stores, calls, etc. between any of the loads. If
3804 // there are, then we can't safely perform the combine.
3805 //
3806 // MaxIter is chosen based off the (worst case) number of iterations it
3807 // typically takes to succeed in the LLVM test suite plus some padding.
3808 //
3809 // FIXME: Is there a better way to check for load fold barriers?
3810 const unsigned MaxIter = 20;
3811 unsigned Iter = 0;
3812 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3813 LatestLoad->getIterator())) {
3814 if (Loads.count(&MI))
3815 continue;
3816 if (MI.isLoadFoldBarrier())
3817 return std::nullopt;
3818 if (Iter++ == MaxIter)
3819 return std::nullopt;
3820 }
3821
3822 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3823}
3824
3826 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3827 assert(MI.getOpcode() == TargetOpcode::G_OR);
3828 MachineFunction &MF = *MI.getMF();
3829 // Assuming a little-endian target, transform:
3830 // s8 *a = ...
3831 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3832 // =>
3833 // s32 val = *((i32)a)
3834 //
3835 // s8 *a = ...
3836 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3837 // =>
3838 // s32 val = BSWAP(*((s32)a))
3839 Register Dst = MI.getOperand(0).getReg();
3840 LLT Ty = MRI.getType(Dst);
3841 if (Ty.isVector())
3842 return false;
3843
3844 // We need to combine at least two loads into this type. Since the smallest
3845 // possible load is into a byte, we need at least a 16-bit wide type.
3846 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3847 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3848 return false;
3849
3850 // Match a collection of non-OR instructions in the pattern.
3851 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3852 if (!RegsToVisit)
3853 return false;
3854
3855 // We have a collection of non-OR instructions. Figure out how wide each of
3856 // the small loads should be based off of the number of potential loads we
3857 // found.
3858 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3859 if (NarrowMemSizeInBits % 8 != 0)
3860 return false;
3861
3862 // Check if each register feeding into each OR is a load from the same
3863 // base pointer + some arithmetic.
3864 //
3865 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3866 //
3867 // Also verify that each of these ends up putting a[i] into the same memory
3868 // offset as a load into a wide type would.
3870 GZExtLoad *LowestIdxLoad, *LatestLoad;
3871 int64_t LowestIdx;
3872 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3873 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3874 if (!MaybeLoadInfo)
3875 return false;
3876 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3877
3878 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3879 // we found before, check if this corresponds to a big or little endian byte
3880 // pattern. If it does, then we can represent it using a load + possibly a
3881 // BSWAP.
3882 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3883 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3884 if (!IsBigEndian)
3885 return false;
3886 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3887 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3888 return false;
3889
3890 // Make sure that the load from the lowest index produces offset 0 in the
3891 // final value.
3892 //
3893 // This ensures that we won't combine something like this:
3894 //
3895 // load x[i] -> byte 2
3896 // load x[i+1] -> byte 0 ---> wide_load x[i]
3897 // load x[i+2] -> byte 1
3898 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3899 const unsigned ZeroByteOffset =
3900 *IsBigEndian
3901 ? bigEndianByteAt(NumLoadsInTy, 0)
3902 : littleEndianByteAt(NumLoadsInTy, 0);
3903 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3904 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3905 ZeroOffsetIdx->second != LowestIdx)
3906 return false;
3907
3908 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3909 // may not use index 0.
3910 Register Ptr = LowestIdxLoad->getPointerReg();
3911 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3912 LegalityQuery::MemDesc MMDesc(MMO);
3913 MMDesc.MemoryTy = Ty;
3915 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3916 return false;
3917 auto PtrInfo = MMO.getPointerInfo();
3918 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3919
3920 // Load must be allowed and fast on the target.
3922 auto &DL = MF.getDataLayout();
3923 unsigned Fast = 0;
3924 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3925 !Fast)
3926 return false;
3927
3928 MatchInfo = [=](MachineIRBuilder &MIB) {
3929 MIB.setInstrAndDebugLoc(*LatestLoad);
3930 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3931 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3932 if (NeedsBSwap)
3933 MIB.buildBSwap(Dst, LoadDst);
3934 };
3935 return true;
3936}
3937
3939 MachineInstr *&ExtMI) {
3940 auto &PHI = cast<GPhi>(MI);
3941 Register DstReg = PHI.getReg(0);
3942
3943 // TODO: Extending a vector may be expensive, don't do this until heuristics
3944 // are better.
3945 if (MRI.getType(DstReg).isVector())
3946 return false;
3947
3948 // Try to match a phi, whose only use is an extend.
3949 if (!MRI.hasOneNonDBGUse(DstReg))
3950 return false;
3951 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3952 switch (ExtMI->getOpcode()) {
3953 case TargetOpcode::G_ANYEXT:
3954 return true; // G_ANYEXT is usually free.
3955 case TargetOpcode::G_ZEXT:
3956 case TargetOpcode::G_SEXT:
3957 break;
3958 default:
3959 return false;
3960 }
3961
3962 // If the target is likely to fold this extend away, don't propagate.
3964 return false;
3965
3966 // We don't want to propagate the extends unless there's a good chance that
3967 // they'll be optimized in some way.
3968 // Collect the unique incoming values.
3970 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3971 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3972 switch (DefMI->getOpcode()) {
3973 case TargetOpcode::G_LOAD:
3974 case TargetOpcode::G_TRUNC:
3975 case TargetOpcode::G_SEXT:
3976 case TargetOpcode::G_ZEXT:
3977 case TargetOpcode::G_ANYEXT:
3978 case TargetOpcode::G_CONSTANT:
3979 InSrcs.insert(DefMI);
3980 // Don't try to propagate if there are too many places to create new
3981 // extends, chances are it'll increase code size.
3982 if (InSrcs.size() > 2)
3983 return false;
3984 break;
3985 default:
3986 return false;
3987 }
3988 }
3989 return true;
3990}
3991
3993 MachineInstr *&ExtMI) {
3994 auto &PHI = cast<GPhi>(MI);
3995 Register DstReg = ExtMI->getOperand(0).getReg();
3996 LLT ExtTy = MRI.getType(DstReg);
3997
3998 // Propagate the extension into the block of each incoming reg's block.
3999 // Use a SetVector here because PHIs can have duplicate edges, and we want
4000 // deterministic iteration order.
4003 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4004 auto SrcReg = PHI.getIncomingValue(I);
4005 auto *SrcMI = MRI.getVRegDef(SrcReg);
4006 if (!SrcMIs.insert(SrcMI))
4007 continue;
4008
4009 // Build an extend after each src inst.
4010 auto *MBB = SrcMI->getParent();
4011 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4012 if (InsertPt != MBB->end() && InsertPt->isPHI())
4013 InsertPt = MBB->getFirstNonPHI();
4014
4015 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4016 Builder.setDebugLoc(MI.getDebugLoc());
4017 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4018 OldToNewSrcMap[SrcMI] = NewExt;
4019 }
4020
4021 // Create a new phi with the extended inputs.
4023 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4024 NewPhi.addDef(DstReg);
4025 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4026 if (!MO.isReg()) {
4027 NewPhi.addMBB(MO.getMBB());
4028 continue;
4029 }
4030 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4031 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4032 }
4033 Builder.insertInstr(NewPhi);
4034 ExtMI->eraseFromParent();
4035}
4036
4038 Register &Reg) {
4039 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4040 // If we have a constant index, look for a G_BUILD_VECTOR source
4041 // and find the source register that the index maps to.
4042 Register SrcVec = MI.getOperand(1).getReg();
4043 LLT SrcTy = MRI.getType(SrcVec);
4044
4045 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4046 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4047 return false;
4048
4049 unsigned VecIdx = Cst->Value.getZExtValue();
4050
4051 // Check if we have a build_vector or build_vector_trunc with an optional
4052 // trunc in front.
4053 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4054 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4055 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4056 }
4057
4058 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4059 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4060 return false;
4061
4062 EVT Ty(getMVTForLLT(SrcTy));
4063 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4064 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4065 return false;
4066
4067 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4068 return true;
4069}
4070
4072 Register &Reg) {
4073 // Check the type of the register, since it may have come from a
4074 // G_BUILD_VECTOR_TRUNC.
4075 LLT ScalarTy = MRI.getType(Reg);
4076 Register DstReg = MI.getOperand(0).getReg();
4077 LLT DstTy = MRI.getType(DstReg);
4078
4079 if (ScalarTy != DstTy) {
4080 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4081 Builder.buildTrunc(DstReg, Reg);
4082 MI.eraseFromParent();
4083 return;
4084 }
4086}
4087
4090 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4091 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4092 // This combine tries to find build_vector's which have every source element
4093 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4094 // the masked load scalarization is run late in the pipeline. There's already
4095 // a combine for a similar pattern starting from the extract, but that
4096 // doesn't attempt to do it if there are multiple uses of the build_vector,
4097 // which in this case is true. Starting the combine from the build_vector
4098 // feels more natural than trying to find sibling nodes of extracts.
4099 // E.g.
4100 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4101 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4102 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4103 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4104 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4105 // ==>
4106 // replace ext{1,2,3,4} with %s{1,2,3,4}
4107
4108 Register DstReg = MI.getOperand(0).getReg();
4109 LLT DstTy = MRI.getType(DstReg);
4110 unsigned NumElts = DstTy.getNumElements();
4111
4112 SmallBitVector ExtractedElts(NumElts);
4113 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4114 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4115 return false;
4116 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4117 if (!Cst)
4118 return false;
4119 unsigned Idx = Cst->getZExtValue();
4120 if (Idx >= NumElts)
4121 return false; // Out of range.
4122 ExtractedElts.set(Idx);
4123 SrcDstPairs.emplace_back(
4124 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4125 }
4126 // Match if every element was extracted.
4127 return ExtractedElts.all();
4128}
4129
4132 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4133 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4134 for (auto &Pair : SrcDstPairs) {
4135 auto *ExtMI = Pair.second;
4136 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4137 ExtMI->eraseFromParent();
4138 }
4139 MI.eraseFromParent();
4140}
4141
4143 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4144 applyBuildFnNoErase(MI, MatchInfo);
4145 MI.eraseFromParent();
4146}
4147
4149 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4150 MatchInfo(Builder);
4151}
4152
4154 BuildFnTy &MatchInfo) {
4155 assert(MI.getOpcode() == TargetOpcode::G_OR);
4156
4157 Register Dst = MI.getOperand(0).getReg();
4158 LLT Ty = MRI.getType(Dst);
4159 unsigned BitWidth = Ty.getScalarSizeInBits();
4160
4161 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4162 unsigned FshOpc = 0;
4163
4164 // Match (or (shl ...), (lshr ...)).
4165 if (!mi_match(Dst, MRI,
4166 // m_GOr() handles the commuted version as well.
4167 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4168 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4169 return false;
4170
4171 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4172 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4173 int64_t CstShlAmt, CstLShrAmt;
4174 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4175 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4176 CstShlAmt + CstLShrAmt == BitWidth) {
4177 FshOpc = TargetOpcode::G_FSHR;
4178 Amt = LShrAmt;
4179
4180 } else if (mi_match(LShrAmt, MRI,
4182 ShlAmt == Amt) {
4183 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4184 FshOpc = TargetOpcode::G_FSHL;
4185
4186 } else if (mi_match(ShlAmt, MRI,
4188 LShrAmt == Amt) {
4189 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4190 FshOpc = TargetOpcode::G_FSHR;
4191
4192 } else {
4193 return false;
4194 }
4195
4196 LLT AmtTy = MRI.getType(Amt);
4197 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4198 return false;
4199
4200 MatchInfo = [=](MachineIRBuilder &B) {
4201 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4202 };
4203 return true;
4204}
4205
4206/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4208 unsigned Opc = MI.getOpcode();
4209 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4210 Register X = MI.getOperand(1).getReg();
4211 Register Y = MI.getOperand(2).getReg();
4212 if (X != Y)
4213 return false;
4214 unsigned RotateOpc =
4215 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4216 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4217}
4218
4220 unsigned Opc = MI.getOpcode();
4221 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4222 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4224 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4225 : TargetOpcode::G_ROTR));
4226 MI.removeOperand(2);
4228}
4229
4230// Fold (rot x, c) -> (rot x, c % BitSize)
4232 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4233 MI.getOpcode() == TargetOpcode::G_ROTR);
4234 unsigned Bitsize =
4235 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4236 Register AmtReg = MI.getOperand(2).getReg();
4237 bool OutOfRange = false;
4238 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4239 if (auto *CI = dyn_cast<ConstantInt>(C))
4240 OutOfRange |= CI->getValue().uge(Bitsize);
4241 return true;
4242 };
4243 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4244}
4245
4247 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4248 MI.getOpcode() == TargetOpcode::G_ROTR);
4249 unsigned Bitsize =
4250 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4251 Register Amt = MI.getOperand(2).getReg();
4252 LLT AmtTy = MRI.getType(Amt);
4253 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4254 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4256 MI.getOperand(2).setReg(Amt);
4258}
4259
4261 int64_t &MatchInfo) {
4262 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4263 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4264
4265 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4266 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4267 // KnownBits on the LHS in two cases:
4268 //
4269 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4270 // we cannot do any transforms so we can safely bail out early.
4271 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4272 // >=0.
4273 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4274 if (KnownRHS.isUnknown())
4275 return false;
4276
4277 std::optional<bool> KnownVal;
4278 if (KnownRHS.isZero()) {
4279 // ? uge 0 -> always true
4280 // ? ult 0 -> always false
4281 if (Pred == CmpInst::ICMP_UGE)
4282 KnownVal = true;
4283 else if (Pred == CmpInst::ICMP_ULT)
4284 KnownVal = false;
4285 }
4286
4287 if (!KnownVal) {
4288 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4289 switch (Pred) {
4290 default:
4291 llvm_unreachable("Unexpected G_ICMP predicate?");
4292 case CmpInst::ICMP_EQ:
4293 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4294 break;
4295 case CmpInst::ICMP_NE:
4296 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4297 break;
4298 case CmpInst::ICMP_SGE:
4299 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4300 break;
4301 case CmpInst::ICMP_SGT:
4302 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4303 break;
4304 case CmpInst::ICMP_SLE:
4305 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4306 break;
4307 case CmpInst::ICMP_SLT:
4308 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4309 break;
4310 case CmpInst::ICMP_UGE:
4311 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4312 break;
4313 case CmpInst::ICMP_UGT:
4314 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4315 break;
4316 case CmpInst::ICMP_ULE:
4317 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4318 break;
4319 case CmpInst::ICMP_ULT:
4320 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4321 break;
4322 }
4323 }
4324
4325 if (!KnownVal)
4326 return false;
4327 MatchInfo =
4328 *KnownVal
4330 /*IsVector = */
4331 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4332 /* IsFP = */ false)
4333 : 0;
4334 return true;
4335}
4336
4338 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4339 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4340 // Given:
4341 //
4342 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4343 // %cmp = G_ICMP ne %x, 0
4344 //
4345 // Or:
4346 //
4347 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4348 // %cmp = G_ICMP eq %x, 1
4349 //
4350 // We can replace %cmp with %x assuming true is 1 on the target.
4351 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4352 if (!CmpInst::isEquality(Pred))
4353 return false;
4354 Register Dst = MI.getOperand(0).getReg();
4355 LLT DstTy = MRI.getType(Dst);
4357 /* IsFP = */ false) != 1)
4358 return false;
4359 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4360 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4361 return false;
4362 Register LHS = MI.getOperand(2).getReg();
4363 auto KnownLHS = KB->getKnownBits(LHS);
4364 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4365 return false;
4366 // Make sure replacing Dst with the LHS is a legal operation.
4367 LLT LHSTy = MRI.getType(LHS);
4368 unsigned LHSSize = LHSTy.getSizeInBits();
4369 unsigned DstSize = DstTy.getSizeInBits();
4370 unsigned Op = TargetOpcode::COPY;
4371 if (DstSize != LHSSize)
4372 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4373 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4374 return false;
4375 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4376 return true;
4377}
4378
4379// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4381 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4382 assert(MI.getOpcode() == TargetOpcode::G_AND);
4383
4384 // Ignore vector types to simplify matching the two constants.
4385 // TODO: do this for vectors and scalars via a demanded bits analysis.
4386 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4387 if (Ty.isVector())
4388 return false;
4389
4390 Register Src;
4391 Register AndMaskReg;
4392 int64_t AndMaskBits;
4393 int64_t OrMaskBits;
4394 if (!mi_match(MI, MRI,
4395 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4396 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4397 return false;
4398
4399 // Check if OrMask could turn on any bits in Src.
4400 if (AndMaskBits & OrMaskBits)
4401 return false;
4402
4403 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4405 // Canonicalize the result to have the constant on the RHS.
4406 if (MI.getOperand(1).getReg() == AndMaskReg)
4407 MI.getOperand(2).setReg(AndMaskReg);
4408 MI.getOperand(1).setReg(Src);
4410 };
4411 return true;
4412}
4413
4414/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4416 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4417 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4418 Register Dst = MI.getOperand(0).getReg();
4419 Register Src = MI.getOperand(1).getReg();
4420 LLT Ty = MRI.getType(Src);
4422 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4423 return false;
4424 int64_t Width = MI.getOperand(2).getImm();
4425 Register ShiftSrc;
4426 int64_t ShiftImm;
4427 if (!mi_match(
4428 Src, MRI,
4429 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4430 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4431 return false;
4432 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4433 return false;
4434
4435 MatchInfo = [=](MachineIRBuilder &B) {
4436 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4437 auto Cst2 = B.buildConstant(ExtractTy, Width);
4438 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4439 };
4440 return true;
4441}
4442
4443/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4445 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4446 assert(MI.getOpcode() == TargetOpcode::G_AND);
4447 Register Dst = MI.getOperand(0).getReg();
4448 LLT Ty = MRI.getType(Dst);
4450 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4451 return false;
4452
4453 int64_t AndImm, LSBImm;
4454 Register ShiftSrc;
4455 const unsigned Size = Ty.getScalarSizeInBits();
4456 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4457 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4458 m_ICst(AndImm))))
4459 return false;
4460
4461 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4462 auto MaybeMask = static_cast<uint64_t>(AndImm);
4463 if (MaybeMask & (MaybeMask + 1))
4464 return false;
4465
4466 // LSB must fit within the register.
4467 if (static_cast<uint64_t>(LSBImm) >= Size)
4468 return false;
4469
4470 uint64_t Width = APInt(Size, AndImm).countr_one();
4471 MatchInfo = [=](MachineIRBuilder &B) {
4472 auto WidthCst = B.buildConstant(ExtractTy, Width);
4473 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4474 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4475 };
4476 return true;
4477}
4478
4480 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4481 const unsigned Opcode = MI.getOpcode();
4482 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4483
4484 const Register Dst = MI.getOperand(0).getReg();
4485
4486 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4487 ? TargetOpcode::G_SBFX
4488 : TargetOpcode::G_UBFX;
4489
4490 // Check if the type we would use for the extract is legal
4491 LLT Ty = MRI.getType(Dst);
4493 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4494 return false;
4495
4496 Register ShlSrc;
4497 int64_t ShrAmt;
4498 int64_t ShlAmt;
4499 const unsigned Size = Ty.getScalarSizeInBits();
4500
4501 // Try to match shr (shl x, c1), c2
4502 if (!mi_match(Dst, MRI,
4503 m_BinOp(Opcode,
4504 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4505 m_ICst(ShrAmt))))
4506 return false;
4507
4508 // Make sure that the shift sizes can fit a bitfield extract
4509 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4510 return false;
4511
4512 // Skip this combine if the G_SEXT_INREG combine could handle it
4513 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4514 return false;
4515
4516 // Calculate start position and width of the extract
4517 const int64_t Pos = ShrAmt - ShlAmt;
4518 const int64_t Width = Size - ShrAmt;
4519
4520 MatchInfo = [=](MachineIRBuilder &B) {
4521 auto WidthCst = B.buildConstant(ExtractTy, Width);
4522 auto PosCst = B.buildConstant(ExtractTy, Pos);
4523 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4524 };
4525 return true;
4526}
4527
4529 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4530 const unsigned Opcode = MI.getOpcode();
4531 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4532
4533 const Register Dst = MI.getOperand(0).getReg();
4534 LLT Ty = MRI.getType(Dst);
4536 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4537 return false;
4538
4539 // Try to match shr (and x, c1), c2
4540 Register AndSrc;
4541 int64_t ShrAmt;
4542 int64_t SMask;
4543 if (!mi_match(Dst, MRI,
4544 m_BinOp(Opcode,
4545 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4546 m_ICst(ShrAmt))))
4547 return false;
4548
4549 const unsigned Size = Ty.getScalarSizeInBits();
4550 if (ShrAmt < 0 || ShrAmt >= Size)
4551 return false;
4552
4553 // If the shift subsumes the mask, emit the 0 directly.
4554 if (0 == (SMask >> ShrAmt)) {
4555 MatchInfo = [=](MachineIRBuilder &B) {
4556 B.buildConstant(Dst, 0);
4557 };
4558 return true;
4559 }
4560
4561 // Check that ubfx can do the extraction, with no holes in the mask.
4562 uint64_t UMask = SMask;
4563 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4564 UMask &= maskTrailingOnes<uint64_t>(Size);
4565 if (!isMask_64(UMask))
4566 return false;
4567
4568 // Calculate start position and width of the extract.
4569 const int64_t Pos = ShrAmt;
4570 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4571
4572 // It's preferable to keep the shift, rather than form G_SBFX.
4573 // TODO: remove the G_AND via demanded bits analysis.
4574 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4575 return false;
4576
4577 MatchInfo = [=](MachineIRBuilder &B) {
4578 auto WidthCst = B.buildConstant(ExtractTy, Width);
4579 auto PosCst = B.buildConstant(ExtractTy, Pos);
4580 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4581 };
4582 return true;
4583}
4584
4585bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4586 MachineInstr &MI) {
4587 auto &PtrAdd = cast<GPtrAdd>(MI);
4588
4589 Register Src1Reg = PtrAdd.getBaseReg();
4590 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4591 if (!Src1Def)
4592 return false;
4593
4594 Register Src2Reg = PtrAdd.getOffsetReg();
4595
4596 if (MRI.hasOneNonDBGUse(Src1Reg))
4597 return false;
4598
4599 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4600 if (!C1)
4601 return false;
4602 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4603 if (!C2)
4604 return false;
4605
4606 const APInt &C1APIntVal = *C1;
4607 const APInt &C2APIntVal = *C2;
4608 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4609
4610 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4611 // This combine may end up running before ptrtoint/inttoptr combines
4612 // manage to eliminate redundant conversions, so try to look through them.
4613 MachineInstr *ConvUseMI = &UseMI;
4614 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4615 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4616 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4617 Register DefReg = ConvUseMI->getOperand(0).getReg();
4618 if (!MRI.hasOneNonDBGUse(DefReg))
4619 break;
4620 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4621 ConvUseOpc = ConvUseMI->getOpcode();
4622 }
4623 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4624 if (!LdStMI)
4625 continue;
4626 // Is x[offset2] already not a legal addressing mode? If so then
4627 // reassociating the constants breaks nothing (we test offset2 because
4628 // that's the one we hope to fold into the load or store).
4630 AM.HasBaseReg = true;
4631 AM.BaseOffs = C2APIntVal.getSExtValue();
4632 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4633 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4634 PtrAdd.getMF()->getFunction().getContext());
4635 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4636 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4637 AccessTy, AS))
4638 continue;
4639
4640 // Would x[offset1+offset2] still be a legal addressing mode?
4641 AM.BaseOffs = CombinedValue;
4642 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4643 AccessTy, AS))
4644 return true;
4645 }
4646
4647 return false;
4648}
4649
4651 MachineInstr *RHS,
4652 BuildFnTy &MatchInfo) {
4653 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4654 Register Src1Reg = MI.getOperand(1).getReg();
4655 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4656 return false;
4657 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4658 if (!C2)
4659 return false;
4660
4661 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4662 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4663
4664 auto NewBase =
4665 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4667 MI.getOperand(1).setReg(NewBase.getReg(0));
4668 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4670 };
4671 return !reassociationCanBreakAddressingModePattern(MI);
4672}
4673
4675 MachineInstr *LHS,
4676 MachineInstr *RHS,
4677 BuildFnTy &MatchInfo) {
4678 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4679 // if and only if (G_PTR_ADD X, C) has one use.
4680 Register LHSBase;
4681 std::optional<ValueAndVReg> LHSCstOff;
4682 if (!mi_match(MI.getBaseReg(), MRI,
4683 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4684 return false;
4685
4686 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4687 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4688 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4689 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4690 // doesn't happen.
4691 LHSPtrAdd->moveBefore(&MI);
4692 Register RHSReg = MI.getOffsetReg();
4693 // set VReg will cause type mismatch if it comes from extend/trunc
4694 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4696 MI.getOperand(2).setReg(NewCst.getReg(0));
4698 Observer.changingInstr(*LHSPtrAdd);
4699 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4700 Observer.changedInstr(*LHSPtrAdd);
4701 };
4702 return !reassociationCanBreakAddressingModePattern(MI);
4703}
4704
4706 MachineInstr *LHS,
4707 MachineInstr *RHS,
4708 BuildFnTy &MatchInfo) {
4709 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4710 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4711 if (!LHSPtrAdd)
4712 return false;
4713
4714 Register Src2Reg = MI.getOperand(2).getReg();
4715 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4716 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4717 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4718 if (!C1)
4719 return false;
4720 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4721 if (!C2)
4722 return false;
4723
4724 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4725 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4727 MI.getOperand(1).setReg(LHSSrc1);
4728 MI.getOperand(2).setReg(NewCst.getReg(0));
4730 };
4731 return !reassociationCanBreakAddressingModePattern(MI);
4732}
4733
4735 BuildFnTy &MatchInfo) {
4736 auto &PtrAdd = cast<GPtrAdd>(MI);
4737 // We're trying to match a few pointer computation patterns here for
4738 // re-association opportunities.
4739 // 1) Isolating a constant operand to be on the RHS, e.g.:
4740 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4741 //
4742 // 2) Folding two constants in each sub-tree as long as such folding
4743 // doesn't break a legal addressing mode.
4744 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4745 //
4746 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4747 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4748 // iif (G_PTR_ADD X, C) has one use.
4749 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4750 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4751
4752 // Try to match example 2.
4753 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4754 return true;
4755
4756 // Try to match example 3.
4757 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4758 return true;
4759
4760 // Try to match example 1.
4761 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4762 return true;
4763
4764 return false;
4765}
4767 Register OpLHS, Register OpRHS,
4768 BuildFnTy &MatchInfo) {
4769 LLT OpRHSTy = MRI.getType(OpRHS);
4770 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4771
4772 if (OpLHSDef->getOpcode() != Opc)
4773 return false;
4774
4775 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4776 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4777 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4778
4779 // If the inner op is (X op C), pull the constant out so it can be folded with
4780 // other constants in the expression tree. Folding is not guaranteed so we
4781 // might have (C1 op C2). In that case do not pull a constant out because it
4782 // won't help and can lead to infinite loops.
4785 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4786 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4787 MatchInfo = [=](MachineIRBuilder &B) {
4788 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4789 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4790 };
4791 return true;
4792 }
4793 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4794 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4795 // iff (op x, c1) has one use
4796 MatchInfo = [=](MachineIRBuilder &B) {
4797 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4798 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4799 };
4800 return true;
4801 }
4802 }
4803
4804 return false;
4805}
4806
4808 BuildFnTy &MatchInfo) {
4809 // We don't check if the reassociation will break a legal addressing mode
4810 // here since pointer arithmetic is handled by G_PTR_ADD.
4811 unsigned Opc = MI.getOpcode();
4812 Register DstReg = MI.getOperand(0).getReg();
4813 Register LHSReg = MI.getOperand(1).getReg();
4814 Register RHSReg = MI.getOperand(2).getReg();
4815
4816 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4817 return true;
4818 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4819 return true;
4820 return false;
4821}
4822
4824 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4825 Register SrcOp = MI.getOperand(1).getReg();
4826
4827 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4828 MatchInfo = *MaybeCst;
4829 return true;
4830 }
4831
4832 return false;
4833}
4834
4836 Register Op1 = MI.getOperand(1).getReg();
4837 Register Op2 = MI.getOperand(2).getReg();
4838 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4839 if (!MaybeCst)
4840 return false;
4841 MatchInfo = *MaybeCst;
4842 return true;
4843}
4844
4846 Register Op1 = MI.getOperand(1).getReg();
4847 Register Op2 = MI.getOperand(2).getReg();
4848 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4849 if (!MaybeCst)
4850 return false;
4851 MatchInfo =
4852 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4853 return true;
4854}
4855
4857 ConstantFP *&MatchInfo) {
4858 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4859 MI.getOpcode() == TargetOpcode::G_FMAD);
4860 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4861
4862 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4863 if (!Op3Cst)
4864 return false;
4865
4866 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4867 if (!Op2Cst)
4868 return false;
4869
4870 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4871 if (!Op1Cst)
4872 return false;
4873
4874 APFloat Op1F = Op1Cst->getValueAPF();
4875 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4877 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4878 return true;
4879}
4880
4882 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4883 // Look for a binop feeding into an AND with a mask:
4884 //
4885 // %add = G_ADD %lhs, %rhs
4886 // %and = G_AND %add, 000...11111111
4887 //
4888 // Check if it's possible to perform the binop at a narrower width and zext
4889 // back to the original width like so:
4890 //
4891 // %narrow_lhs = G_TRUNC %lhs
4892 // %narrow_rhs = G_TRUNC %rhs
4893 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4894 // %new_add = G_ZEXT %narrow_add
4895 // %and = G_AND %new_add, 000...11111111
4896 //
4897 // This can allow later combines to eliminate the G_AND if it turns out
4898 // that the mask is irrelevant.
4899 assert(MI.getOpcode() == TargetOpcode::G_AND);
4900 Register Dst = MI.getOperand(0).getReg();
4901 Register AndLHS = MI.getOperand(1).getReg();
4902 Register AndRHS = MI.getOperand(2).getReg();
4903 LLT WideTy = MRI.getType(Dst);
4904
4905 // If the potential binop has more than one use, then it's possible that one
4906 // of those uses will need its full width.
4907 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4908 return false;
4909
4910 // Check if the LHS feeding the AND is impacted by the high bits that we're
4911 // masking out.
4912 //
4913 // e.g. for 64-bit x, y:
4914 //
4915 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4916 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4917 if (!LHSInst)
4918 return false;
4919 unsigned LHSOpc = LHSInst->getOpcode();
4920 switch (LHSOpc) {
4921 default:
4922 return false;
4923 case TargetOpcode::G_ADD:
4924 case TargetOpcode::G_SUB:
4925 case TargetOpcode::G_MUL:
4926 case TargetOpcode::G_AND:
4927 case TargetOpcode::G_OR:
4928 case TargetOpcode::G_XOR:
4929 break;
4930 }
4931
4932 // Find the mask on the RHS.
4933 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4934 if (!Cst)
4935 return false;
4936 auto Mask = Cst->Value;
4937 if (!Mask.isMask())
4938 return false;
4939
4940 // No point in combining if there's nothing to truncate.
4941 unsigned NarrowWidth = Mask.countr_one();
4942 if (NarrowWidth == WideTy.getSizeInBits())
4943 return false;
4944 LLT NarrowTy = LLT::scalar(NarrowWidth);
4945
4946 // Check if adding the zext + truncates could be harmful.
4947 auto &MF = *MI.getMF();
4948 const auto &TLI = getTargetLowering();
4949 LLVMContext &Ctx = MF.getFunction().getContext();
4950 auto &DL = MF.getDataLayout();
4951 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4952 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4953 return false;
4954 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4955 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4956 return false;
4957 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4958 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4959 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4960 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4961 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4962 auto NarrowBinOp =
4963 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4964 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4966 MI.getOperand(1).setReg(Ext.getReg(0));
4968 };
4969 return true;
4970}
4971
4973 unsigned Opc = MI.getOpcode();
4974 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4975
4976 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4977 return false;
4978
4979 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4981 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4982 : TargetOpcode::G_SADDO;
4983 MI.setDesc(Builder.getTII().get(NewOpc));
4984 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4986 };
4987 return true;
4988}
4989
4991 // (G_*MULO x, 0) -> 0 + no carry out
4992 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
4993 MI.getOpcode() == TargetOpcode::G_SMULO);
4994 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
4995 return false;
4996 Register Dst = MI.getOperand(0).getReg();
4997 Register Carry = MI.getOperand(1).getReg();
5000 return false;
5001 MatchInfo = [=](MachineIRBuilder &B) {
5002 B.buildConstant(Dst, 0);
5003 B.buildConstant(Carry, 0);
5004 };
5005 return true;
5006}
5007
5009 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5010 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5011 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5012 MI.getOpcode() == TargetOpcode::G_SADDE ||
5013 MI.getOpcode() == TargetOpcode::G_USUBE ||
5014 MI.getOpcode() == TargetOpcode::G_SSUBE);
5015 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5016 return false;
5017 MatchInfo = [&](MachineIRBuilder &B) {
5018 unsigned NewOpcode;
5019 switch (MI.getOpcode()) {
5020 case TargetOpcode::G_UADDE:
5021 NewOpcode = TargetOpcode::G_UADDO;
5022 break;
5023 case TargetOpcode::G_SADDE:
5024 NewOpcode = TargetOpcode::G_SADDO;
5025 break;
5026 case TargetOpcode::G_USUBE:
5027 NewOpcode = TargetOpcode::G_USUBO;
5028 break;
5029 case TargetOpcode::G_SSUBE:
5030 NewOpcode = TargetOpcode::G_SSUBO;
5031 break;
5032 }
5034 MI.setDesc(B.getTII().get(NewOpcode));
5035 MI.removeOperand(4);
5037 };
5038 return true;
5039}
5040
5042 BuildFnTy &MatchInfo) {
5043 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5044 Register Dst = MI.getOperand(0).getReg();
5045 // (x + y) - z -> x (if y == z)
5046 // (x + y) - z -> y (if x == z)
5047 Register X, Y, Z;
5048 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5049 Register ReplaceReg;
5050 int64_t CstX, CstY;
5051 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5053 ReplaceReg = X;
5054 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5056 ReplaceReg = Y;
5057 if (ReplaceReg) {
5058 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5059 return true;
5060 }
5061 }
5062
5063 // x - (y + z) -> 0 - y (if x == z)
5064 // x - (y + z) -> 0 - z (if x == y)
5065 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5066 Register ReplaceReg;
5067 int64_t CstX;
5068 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5070 ReplaceReg = Y;
5071 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5073 ReplaceReg = Z;
5074 if (ReplaceReg) {
5075 MatchInfo = [=](MachineIRBuilder &B) {
5076 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5077 B.buildSub(Dst, Zero, ReplaceReg);
5078 };
5079 return true;
5080 }
5081 }
5082 return false;
5083}
5084
5086 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5087 auto &UDiv = cast<GenericMachineInstr>(MI);
5088 Register Dst = UDiv.getReg(0);
5089 Register LHS = UDiv.getReg(1);
5090 Register RHS = UDiv.getReg(2);
5091 LLT Ty = MRI.getType(Dst);
5092 LLT ScalarTy = Ty.getScalarType();
5093 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5095 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5096
5097 unsigned KnownLeadingZeros =
5099 auto &MIB = Builder;
5100
5101 bool UseNPQ = false;
5102 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5103
5104 auto BuildUDIVPattern = [&](const Constant *C) {
5105 auto *CI = cast<ConstantInt>(C);
5106 const APInt &Divisor = CI->getValue();
5107
5108 bool SelNPQ = false;
5109 APInt Magic(Divisor.getBitWidth(), 0);
5110 unsigned PreShift = 0, PostShift = 0;
5111
5112 // Magic algorithm doesn't work for division by 1. We need to emit a select
5113 // at the end.
5114 // TODO: Use undef values for divisor of 1.
5115 if (!Divisor.isOne()) {
5116
5117 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5118 // in the dividend exceeds the leading zeros for the divisor.
5121 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5122
5123 Magic = std::move(magics.Magic);
5124
5125 assert(magics.PreShift < Divisor.getBitWidth() &&
5126 "We shouldn't generate an undefined shift!");
5127 assert(magics.PostShift < Divisor.getBitWidth() &&
5128 "We shouldn't generate an undefined shift!");
5129 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5130 PreShift = magics.PreShift;
5131 PostShift = magics.PostShift;
5132 SelNPQ = magics.IsAdd;
5133 }
5134
5135 PreShifts.push_back(
5136 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5137 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5138 NPQFactors.push_back(
5139 MIB.buildConstant(ScalarTy,
5140 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5141 : APInt::getZero(EltBits))
5142 .getReg(0));
5143 PostShifts.push_back(
5144 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5145 UseNPQ |= SelNPQ;
5146 return true;
5147 };
5148
5149 // Collect the shifts/magic values from each element.
5150 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5151 (void)Matched;
5152 assert(Matched && "Expected unary predicate match to succeed");
5153
5154 Register PreShift, PostShift, MagicFactor, NPQFactor;
5155 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5156 if (RHSDef) {
5157 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5158 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5159 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5160 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5161 } else {
5163 "Non-build_vector operation should have been a scalar");
5164 PreShift = PreShifts[0];
5165 MagicFactor = MagicFactors[0];
5166 PostShift = PostShifts[0];
5167 }
5168
5169 Register Q = LHS;
5170 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5171
5172 // Multiply the numerator (operand 0) by the magic value.
5173 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5174
5175 if (UseNPQ) {
5176 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5177
5178 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5179 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5180 if (Ty.isVector())
5181 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5182 else
5183 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5184
5185 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5186 }
5187
5188 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5189 auto One = MIB.buildConstant(Ty, 1);
5190 auto IsOne = MIB.buildICmp(
5192 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5193 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5194}
5195
5197 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5198 Register Dst = MI.getOperand(0).getReg();
5199 Register RHS = MI.getOperand(2).getReg();
5200 LLT DstTy = MRI.getType(Dst);
5201 auto *RHSDef = MRI.getVRegDef(RHS);
5202 if (!isConstantOrConstantVector(*RHSDef, MRI))
5203 return false;
5204
5205 auto &MF = *MI.getMF();
5206 AttributeList Attr = MF.getFunction().getAttributes();
5207 const auto &TLI = getTargetLowering();
5208 LLVMContext &Ctx = MF.getFunction().getContext();
5209 auto &DL = MF.getDataLayout();
5210 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5211 return false;
5212
5213 // Don't do this for minsize because the instruction sequence is usually
5214 // larger.
5215 if (MF.getFunction().hasMinSize())
5216 return false;
5217
5218 // Don't do this if the types are not going to be legal.
5219 if (LI) {
5220 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5221 return false;
5222 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5223 return false;
5225 {TargetOpcode::G_ICMP,
5226 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5227 DstTy}}))
5228 return false;
5229 }
5230
5231 return matchUnaryPredicate(
5232 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5233}
5234
5236 auto *NewMI = buildUDivUsingMul(MI);
5237 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5238}
5239
5241 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5242 Register Dst = MI.getOperand(0).getReg();
5243 Register RHS = MI.getOperand(2).getReg();
5244 LLT DstTy = MRI.getType(Dst);
5245
5246 auto &MF = *MI.getMF();
5247 AttributeList Attr = MF.getFunction().getAttributes();
5248 const auto &TLI = getTargetLowering();
5249 LLVMContext &Ctx = MF.getFunction().getContext();
5250 auto &DL = MF.getDataLayout();
5251 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5252 return false;
5253
5254 // Don't do this for minsize because the instruction sequence is usually
5255 // larger.
5256 if (MF.getFunction().hasMinSize())
5257 return false;
5258
5259 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5260 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5261 return matchUnaryPredicate(
5262 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5263 }
5264
5265 // Don't support the general case for now.
5266 return false;
5267}
5268
5270 auto *NewMI = buildSDivUsingMul(MI);
5271 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5272}
5273
5275 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5276 auto &SDiv = cast<GenericMachineInstr>(MI);
5277 Register Dst = SDiv.getReg(0);
5278 Register LHS = SDiv.getReg(1);
5279 Register RHS = SDiv.getReg(2);
5280 LLT Ty = MRI.getType(Dst);
5281 LLT ScalarTy = Ty.getScalarType();
5283 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5284 auto &MIB = Builder;
5285
5286 bool UseSRA = false;
5287 SmallVector<Register, 16> Shifts, Factors;
5288
5289 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5290 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5291
5292 auto BuildSDIVPattern = [&](const Constant *C) {
5293 // Don't recompute inverses for each splat element.
5294 if (IsSplat && !Factors.empty()) {
5295 Shifts.push_back(Shifts[0]);
5296 Factors.push_back(Factors[0]);
5297 return true;
5298 }
5299
5300 auto *CI = cast<ConstantInt>(C);
5301 APInt Divisor = CI->getValue();
5302 unsigned Shift = Divisor.countr_zero();
5303 if (Shift) {
5304 Divisor.ashrInPlace(Shift);
5305 UseSRA = true;
5306 }
5307
5308 // Calculate the multiplicative inverse modulo BW.
5309 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5310 APInt Factor = Divisor.multiplicativeInverse();
5311 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5312 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5313 return true;
5314 };
5315
5316 // Collect all magic values from the build vector.
5317 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5318 (void)Matched;
5319 assert(Matched && "Expected unary predicate match to succeed");
5320
5321 Register Shift, Factor;
5322 if (Ty.isVector()) {
5323 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5324 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5325 } else {
5326 Shift = Shifts[0];
5327 Factor = Factors[0];
5328 }
5329
5330 Register Res = LHS;
5331
5332 if (UseSRA)
5333 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5334
5335 return MIB.buildMul(Ty, Res, Factor);
5336}
5337
5339 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5340 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5341 "Expected SDIV or UDIV");
5342 auto &Div = cast<GenericMachineInstr>(MI);
5343 Register RHS = Div.getReg(2);
5344 auto MatchPow2 = [&](const Constant *C) {
5345 auto *CI = dyn_cast<ConstantInt>(C);
5346 return CI && (CI->getValue().isPowerOf2() ||
5347 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5348 };
5349 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5350}
5351
5353 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5354 auto &SDiv = cast<GenericMachineInstr>(MI);
5355 Register Dst = SDiv.getReg(0);
5356 Register LHS = SDiv.getReg(1);
5357 Register RHS = SDiv.getReg(2);
5358 LLT Ty = MRI.getType(Dst);
5360 LLT CCVT =
5361 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5362
5363 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5364 // to the following version:
5365 //
5366 // %c1 = G_CTTZ %rhs
5367 // %inexact = G_SUB $bitwidth, %c1
5368 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5369 // %lshr = G_LSHR %sign, %inexact
5370 // %add = G_ADD %lhs, %lshr
5371 // %ashr = G_ASHR %add, %c1
5372 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5373 // %zero = G_CONSTANT $0
5374 // %neg = G_NEG %ashr
5375 // %isneg = G_ICMP SLT %rhs, %zero
5376 // %res = G_SELECT %isneg, %neg, %ashr
5377
5378 unsigned BitWidth = Ty.getScalarSizeInBits();
5379 auto Zero = Builder.buildConstant(Ty, 0);
5380
5381 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5382 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5383 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5384 // Splat the sign bit into the register
5385 auto Sign = Builder.buildAShr(
5386 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5387
5388 // Add (LHS < 0) ? abs2 - 1 : 0;
5389 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5390 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5391 auto AShr = Builder.buildAShr(Ty, Add, C1);
5392
5393 // Special case: (sdiv X, 1) -> X
5394 // Special Case: (sdiv X, -1) -> 0-X
5395 auto One = Builder.buildConstant(Ty, 1);
5396 auto MinusOne = Builder.buildConstant(Ty, -1);
5397 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5398 auto IsMinusOne =
5400 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5401 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5402
5403 // If divided by a positive value, we're done. Otherwise, the result must be
5404 // negated.
5405 auto Neg = Builder.buildNeg(Ty, AShr);
5406 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5407 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5408 MI.eraseFromParent();
5409}
5410
5412 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5413 auto &UDiv = cast<GenericMachineInstr>(MI);
5414 Register Dst = UDiv.getReg(0);
5415 Register LHS = UDiv.getReg(1);
5416 Register RHS = UDiv.getReg(2);
5417 LLT Ty = MRI.getType(Dst);
5419
5420 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5421 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5422 MI.eraseFromParent();
5423}
5424
5426 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5427 Register RHS = MI.getOperand(2).getReg();
5428 Register Dst = MI.getOperand(0).getReg();
5429 LLT Ty = MRI.getType(Dst);
5431 auto MatchPow2ExceptOne = [&](const Constant *C) {
5432 if (auto *CI = dyn_cast<ConstantInt>(C))
5433 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5434 return false;
5435 };
5436 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5437 return false;
5438 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5439}
5440
5442 Register LHS = MI.getOperand(1).getReg();
5443 Register RHS = MI.getOperand(2).getReg();
5444 Register Dst = MI.getOperand(0).getReg();
5445 LLT Ty = MRI.getType(Dst);
5447 unsigned NumEltBits = Ty.getScalarSizeInBits();
5448
5449 auto LogBase2 = buildLogBase2(RHS, Builder);
5450 auto ShiftAmt =
5451 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5452 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5453 Builder.buildLShr(Dst, LHS, Trunc);
5454 MI.eraseFromParent();
5455}
5456
5458 BuildFnTy &MatchInfo) {
5459 unsigned Opc = MI.getOpcode();
5460 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5461 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5462 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5463
5464 Register Dst = MI.getOperand(0).getReg();
5465 Register X = MI.getOperand(1).getReg();
5466 Register Y = MI.getOperand(2).getReg();
5467 LLT Type = MRI.getType(Dst);
5468
5469 // fold (fadd x, fneg(y)) -> (fsub x, y)
5470 // fold (fadd fneg(y), x) -> (fsub x, y)
5471 // G_ADD is commutative so both cases are checked by m_GFAdd
5472 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5473 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5474 Opc = TargetOpcode::G_FSUB;
5475 }
5476 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5477 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5478 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5479 Opc = TargetOpcode::G_FADD;
5480 }
5481 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5482 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5483 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5484 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5485 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5486 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5487 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5488 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5489 // no opcode change
5490 } else
5491 return false;
5492
5493 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5495 MI.setDesc(B.getTII().get(Opc));
5496 MI.getOperand(1).setReg(X);
5497 MI.getOperand(2).setReg(Y);
5499 };
5500 return true;
5501}
5502
5504 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5505
5506 Register LHS = MI.getOperand(1).getReg();
5507 MatchInfo = MI.getOperand(2).getReg();
5508 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5509
5510 const auto LHSCst = Ty.isVector()
5511 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5513 if (!LHSCst)
5514 return false;
5515
5516 // -0.0 is always allowed
5517 if (LHSCst->Value.isNegZero())
5518 return true;
5519
5520 // +0.0 is only allowed if nsz is set.
5521 if (LHSCst->Value.isPosZero())
5522 return MI.getFlag(MachineInstr::FmNsz);
5523
5524 return false;
5525}
5526
5528 Register Dst = MI.getOperand(0).getReg();
5530 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5531 eraseInst(MI);
5532}
5533
5534/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5535/// due to global flags or MachineInstr flags.
5536static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5537 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5538 return false;
5539 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5540}
5541
5542static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5543 const MachineRegisterInfo &MRI) {
5544 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5545 MRI.use_instr_nodbg_end()) >
5546 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5547 MRI.use_instr_nodbg_end());
5548}
5549
5551 bool &AllowFusionGlobally,
5552 bool &HasFMAD, bool &Aggressive,
5553 bool CanReassociate) {
5554
5555 auto *MF = MI.getMF();
5556 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5557 const TargetOptions &Options = MF->getTarget().Options;
5558 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5559
5560 if (CanReassociate &&
5561 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5562 return false;
5563
5564 // Floating-point multiply-add with intermediate rounding.
5565 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5566 // Floating-point multiply-add without intermediate rounding.
5567 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5568 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5569 // No valid opcode, do not combine.
5570 if (!HasFMAD && !HasFMA)
5571 return false;
5572
5573 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5574 Options.UnsafeFPMath || HasFMAD;
5575 // If the addition is not contractable, do not combine.
5576 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5577 return false;
5578
5579 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5580 return true;
5581}
5582
5584 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5585 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5586
5587 bool AllowFusionGlobally, HasFMAD, Aggressive;
5588 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5589 return false;
5590
5591 Register Op1 = MI.getOperand(1).getReg();
5592 Register Op2 = MI.getOperand(2).getReg();
5595 unsigned PreferredFusedOpcode =
5596 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5597
5598 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5599 // prefer to fold the multiply with fewer uses.
5600 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5601 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5602 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5603 std::swap(LHS, RHS);
5604 }
5605
5606 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5607 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5608 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5609 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5610 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5611 {LHS.MI->getOperand(1).getReg(),
5612 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5613 };
5614 return true;
5615 }
5616
5617 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5618 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5619 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5620 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5621 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5622 {RHS.MI->getOperand(1).getReg(),
5623 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5624 };
5625 return true;
5626 }
5627
5628 return false;
5629}
5630
5632 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5633 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5634
5635 bool AllowFusionGlobally, HasFMAD, Aggressive;
5636 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5637 return false;
5638
5639 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5640 Register Op1 = MI.getOperand(1).getReg();
5641 Register Op2 = MI.getOperand(2).getReg();
5644 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5645
5646 unsigned PreferredFusedOpcode =
5647 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5648
5649 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5650 // prefer to fold the multiply with fewer uses.
5651 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5652 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5653 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5654 std::swap(LHS, RHS);
5655 }
5656
5657 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5658 MachineInstr *FpExtSrc;
5659 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5660 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5661 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5662 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5663 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5664 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5665 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5666 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5667 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5668 };
5669 return true;
5670 }
5671
5672 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5673 // Note: Commutes FADD operands.
5674 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5675 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5676 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5677 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5678 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5679 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5680 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5681 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5682 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5683 };
5684 return true;
5685 }
5686
5687 return false;
5688}
5689
5691 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5692 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5693
5694 bool AllowFusionGlobally, HasFMAD, Aggressive;
5695 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5696 return false;
5697
5698 Register Op1 = MI.getOperand(1).getReg();
5699 Register Op2 = MI.getOperand(2).getReg();
5702 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5703
5704 unsigned PreferredFusedOpcode =
5705 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5706
5707 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5708 // prefer to fold the multiply with fewer uses.
5709 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5710 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5711 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5712 std::swap(LHS, RHS);
5713 }
5714
5715 MachineInstr *FMA = nullptr;
5716 Register Z;
5717 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5718 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5719 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5720 TargetOpcode::G_FMUL) &&
5721 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5722 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5723 FMA = LHS.MI;
5724 Z = RHS.Reg;
5725 }
5726 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5727 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5728 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5729 TargetOpcode::G_FMUL) &&
5730 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5731 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5732 Z = LHS.Reg;
5733 FMA = RHS.MI;
5734 }
5735
5736 if (FMA) {
5737 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5738 Register X = FMA->getOperand(1).getReg();
5739 Register Y = FMA->getOperand(2).getReg();
5740 Register U = FMulMI->getOperand(1).getReg();
5741 Register V = FMulMI->getOperand(2).getReg();
5742
5743 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5744 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5745 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5746 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5747 {X, Y, InnerFMA});
5748 };
5749 return true;
5750 }
5751
5752 return false;
5753}
5754
5756 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5757 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5758
5759 bool AllowFusionGlobally, HasFMAD, Aggressive;
5760 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5761 return false;
5762
5763 if (!Aggressive)
5764 return false;
5765
5766 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5767 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5768 Register Op1 = MI.getOperand(1).getReg();
5769 Register Op2 = MI.getOperand(2).getReg();
5772
5773 unsigned PreferredFusedOpcode =
5774 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5775
5776 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5777 // prefer to fold the multiply with fewer uses.
5778 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5779 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5780 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5781 std::swap(LHS, RHS);
5782 }
5783
5784 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5785 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5787 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5788 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5789 Register InnerFMA =
5790 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5791 .getReg(0);
5792 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5793 {X, Y, InnerFMA});
5794 };
5795
5796 MachineInstr *FMulMI, *FMAMI;
5797 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5798 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5799 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5800 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5801 m_GFPExt(m_MInstr(FMulMI))) &&
5802 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5803 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5804 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5805 MatchInfo = [=](MachineIRBuilder &B) {
5806 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5807 FMulMI->getOperand(2).getReg(), RHS.Reg,
5808 LHS.MI->getOperand(1).getReg(),
5809 LHS.MI->getOperand(2).getReg(), B);
5810 };
5811 return true;
5812 }
5813
5814 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5815 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5816 // FIXME: This turns two single-precision and one double-precision
5817 // operation into two double-precision operations, which might not be
5818 // interesting for all targets, especially GPUs.
5819 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5820 FMAMI->getOpcode() == PreferredFusedOpcode) {
5821 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5822 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5823 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5824 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5825 MatchInfo = [=](MachineIRBuilder &B) {
5826 Register X = FMAMI->getOperand(1).getReg();
5827 Register Y = FMAMI->getOperand(2).getReg();
5828 X = B.buildFPExt(DstType, X).getReg(0);
5829 Y = B.buildFPExt(DstType, Y).getReg(0);
5830 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5831 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5832 };
5833
5834 return true;
5835 }
5836 }
5837
5838 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5839 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5840 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5841 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5842 m_GFPExt(m_MInstr(FMulMI))) &&
5843 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5844 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5845 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5846 MatchInfo = [=](MachineIRBuilder &B) {
5847 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5848 FMulMI->getOperand(2).getReg(), LHS.Reg,
5849 RHS.MI->getOperand(1).getReg(),
5850 RHS.MI->getOperand(2).getReg(), B);
5851 };
5852 return true;
5853 }
5854
5855 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5856 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5857 // FIXME: This turns two single-precision and one double-precision
5858 // operation into two double-precision operations, which might not be
5859 // interesting for all targets, especially GPUs.
5860 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5861 FMAMI->getOpcode() == PreferredFusedOpcode) {
5862 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5863 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5864 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5865 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5866 MatchInfo = [=](MachineIRBuilder &B) {
5867 Register X = FMAMI->getOperand(1).getReg();
5868 Register Y = FMAMI->getOperand(2).getReg();
5869 X = B.buildFPExt(DstType, X).getReg(0);
5870 Y = B.buildFPExt(DstType, Y).getReg(0);
5871 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5872 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5873 };
5874 return true;
5875 }
5876 }
5877
5878 return false;
5879}
5880
5882 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5883 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5884
5885 bool AllowFusionGlobally, HasFMAD, Aggressive;
5886 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5887 return false;
5888
5889 Register Op1 = MI.getOperand(1).getReg();
5890 Register Op2 = MI.getOperand(2).getReg();
5893 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5894
5895 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5896 // prefer to fold the multiply with fewer uses.
5897 int FirstMulHasFewerUses = true;
5898 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5899 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5900 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5901 FirstMulHasFewerUses = false;
5902
5903 unsigned PreferredFusedOpcode =
5904 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5905
5906 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5907 if (FirstMulHasFewerUses &&
5908 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5909 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5910 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5911 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5912 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5913 {LHS.MI->getOperand(1).getReg(),
5914 LHS.MI->getOperand(2).getReg(), NegZ});
5915 };
5916 return true;
5917 }
5918 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5919 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5920 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5921 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5922 Register NegY =
5923 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5924 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5925 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5926 };
5927 return true;
5928 }
5929
5930 return false;
5931}
5932
5934 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5935 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5936
5937 bool AllowFusionGlobally, HasFMAD, Aggressive;
5938 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5939 return false;
5940
5941 Register LHSReg = MI.getOperand(1).getReg();
5942 Register RHSReg = MI.getOperand(2).getReg();
5943 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5944
5945 unsigned PreferredFusedOpcode =
5946 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5947
5948 MachineInstr *FMulMI;
5949 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5950 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5951 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
5952 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5953 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5954 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5955 Register NegX =
5956 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5957 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5958 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5959 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
5960 };
5961 return true;
5962 }
5963
5964 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5965 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5966 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
5967 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5968 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5969 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5970 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5971 {FMulMI->getOperand(1).getReg(),
5972 FMulMI->getOperand(2).getReg(), LHSReg});
5973 };
5974 return true;
5975 }
5976
5977 return false;
5978}
5979
5981 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5982 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5983
5984 bool AllowFusionGlobally, HasFMAD, Aggressive;
5985 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5986 return false;
5987
5988 Register LHSReg = MI.getOperand(1).getReg();
5989 Register RHSReg = MI.getOperand(2).getReg();
5990 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5991
5992 unsigned PreferredFusedOpcode =
5993 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5994
5995 MachineInstr *FMulMI;
5996 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5997 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5998 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5999 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6000 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6001 Register FpExtX =
6002 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6003 Register FpExtY =
6004 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6005 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6006 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6007 {FpExtX, FpExtY, NegZ});
6008 };
6009 return true;
6010 }
6011
6012 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6013 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6014 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6015 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6016 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6017 Register FpExtY =
6018 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6019 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6020 Register FpExtZ =
6021 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6022 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6023 {NegY, FpExtZ, LHSReg});
6024 };
6025 return true;
6026 }
6027
6028 return false;
6029}
6030
6032 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6033 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6034
6035 bool AllowFusionGlobally, HasFMAD, Aggressive;
6036 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6037 return false;
6038
6039 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6040 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6041 Register LHSReg = MI.getOperand(1).getReg();
6042 Register RHSReg = MI.getOperand(2).getReg();
6043
6044 unsigned PreferredFusedOpcode =
6045 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6046
6047 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6049 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6050 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6051 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6052 };
6053
6054 MachineInstr *FMulMI;
6055 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6056 // (fneg (fma (fpext x), (fpext y), z))
6057 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6058 // (fneg (fma (fpext x), (fpext y), z))
6059 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6060 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6061 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6062 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6063 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6064 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6066 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6067 FMulMI->getOperand(2).getReg(), RHSReg, B);
6068 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6069 };
6070 return true;
6071 }
6072
6073 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6074 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6075 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6076 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6077 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6078 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6079 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6080 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6081 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6082 FMulMI->getOperand(2).getReg(), LHSReg, B);
6083 };
6084 return true;
6085 }
6086
6087 return false;
6088}
6089
6091 unsigned &IdxToPropagate) {
6092 bool PropagateNaN;
6093 switch (MI.getOpcode()) {
6094 default:
6095 return false;
6096 case TargetOpcode::G_FMINNUM:
6097 case TargetOpcode::G_FMAXNUM:
6098 PropagateNaN = false;
6099 break;
6100 case TargetOpcode::G_FMINIMUM:
6101 case TargetOpcode::G_FMAXIMUM:
6102 PropagateNaN = true;
6103 break;
6104 }
6105
6106 auto MatchNaN = [&](unsigned Idx) {
6107 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6108 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6109 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6110 return false;
6111 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6112 return true;
6113 };
6114
6115 return MatchNaN(1) || MatchNaN(2);
6116}
6117
6119 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6120 Register LHS = MI.getOperand(1).getReg();
6121 Register RHS = MI.getOperand(2).getReg();
6122
6123 // Helper lambda to check for opportunities for
6124 // A + (B - A) -> B
6125 // (B - A) + A -> B
6126 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6127 Register Reg;
6128 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6129 Reg == MaybeSameReg;
6130 };
6131 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6132}
6133
6135 Register &MatchInfo) {
6136 // This combine folds the following patterns:
6137 //
6138 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6139 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6140 // into
6141 // x
6142 // if
6143 // k == sizeof(VecEltTy)/2
6144 // type(x) == type(dst)
6145 //
6146 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6147 // into
6148 // x
6149 // if
6150 // type(x) == type(dst)
6151
6152 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6153 LLT DstEltTy = DstVecTy.getElementType();
6154
6155 Register Lo, Hi;
6156
6157 if (mi_match(
6158 MI, MRI,
6160 MatchInfo = Lo;
6161 return MRI.getType(MatchInfo) == DstVecTy;
6162 }
6163
6164 std::optional<ValueAndVReg> ShiftAmount;
6165 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6166 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6167 if (mi_match(
6168 MI, MRI,
6169 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6170 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6171 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6172 MatchInfo = Lo;
6173 return MRI.getType(MatchInfo) == DstVecTy;
6174 }
6175 }
6176
6177 return false;
6178}
6179
6181 Register &MatchInfo) {
6182 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6183 // if type(x) == type(G_TRUNC)
6184 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6185 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6186 return false;
6187
6188 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6189}
6190
6192 Register &MatchInfo) {
6193 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6194 // y if K == size of vector element type
6195 std::optional<ValueAndVReg> ShiftAmt;
6196 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6198 m_GCst(ShiftAmt))))
6199 return false;
6200
6201 LLT MatchTy = MRI.getType(MatchInfo);
6202 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6203 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6204}
6205
6206unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6207 CmpInst::Predicate Pred, LLT DstTy,
6208 SelectPatternNaNBehaviour VsNaNRetVal) const {
6209 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6210 "Expected a NaN behaviour?");
6211 // Choose an opcode based off of legality or the behaviour when one of the
6212 // LHS/RHS may be NaN.
6213 switch (Pred) {
6214 default:
6215 return 0;
6216 case CmpInst::FCMP_UGT:
6217 case CmpInst::FCMP_UGE:
6218 case CmpInst::FCMP_OGT:
6219 case CmpInst::FCMP_OGE:
6220 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6221 return TargetOpcode::G_FMAXNUM;
6222 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6223 return TargetOpcode::G_FMAXIMUM;
6224 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6225 return TargetOpcode::G_FMAXNUM;
6226 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6227 return TargetOpcode::G_FMAXIMUM;
6228 return 0;
6229 case CmpInst::FCMP_ULT:
6230 case CmpInst::FCMP_ULE:
6231 case CmpInst::FCMP_OLT:
6232 case CmpInst::FCMP_OLE:
6233 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6234 return TargetOpcode::G_FMINNUM;
6235 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6236 return TargetOpcode::G_FMINIMUM;
6237 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6238 return TargetOpcode::G_FMINNUM;
6239 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6240 return 0;
6241 return TargetOpcode::G_FMINIMUM;
6242 }
6243}
6244
6245CombinerHelper::SelectPatternNaNBehaviour
6246CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6247 bool IsOrderedComparison) const {
6248 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6249 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6250 // Completely unsafe.
6251 if (!LHSSafe && !RHSSafe)
6252 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6253 if (LHSSafe && RHSSafe)
6254 return SelectPatternNaNBehaviour::RETURNS_ANY;
6255 // An ordered comparison will return false when given a NaN, so it
6256 // returns the RHS.
6257 if (IsOrderedComparison)
6258 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6259 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6260 // An unordered comparison will return true when given a NaN, so it
6261 // returns the LHS.
6262 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6263 : SelectPatternNaNBehaviour::RETURNS_NAN;
6264}
6265
6266bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6267 Register TrueVal, Register FalseVal,
6268 BuildFnTy &MatchInfo) {
6269 // Match: select (fcmp cond x, y) x, y
6270 // select (fcmp cond x, y) y, x
6271 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6272 LLT DstTy = MRI.getType(Dst);
6273 // Bail out early on pointers, since we'll never want to fold to a min/max.
6274 if (DstTy.isPointer())
6275 return false;
6276 // Match a floating point compare with a less-than/greater-than predicate.
6277 // TODO: Allow multiple users of the compare if they are all selects.
6278 CmpInst::Predicate Pred;
6279 Register CmpLHS, CmpRHS;
6280 if (!mi_match(Cond, MRI,
6282 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6283 CmpInst::isEquality(Pred))
6284 return false;
6285 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6286 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6287 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6288 return false;
6289 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6290 std::swap(CmpLHS, CmpRHS);
6291 Pred = CmpInst::getSwappedPredicate(Pred);
6292 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6293 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6294 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6295 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6296 }
6297 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6298 return false;
6299 // Decide what type of max/min this should be based off of the predicate.
6300 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6301 if (!Opc || !isLegal({Opc, {DstTy}}))
6302 return false;
6303 // Comparisons between signed zero and zero may have different results...
6304 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6305 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6306 // We don't know if a comparison between two 0s will give us a consistent
6307 // result. Be conservative and only proceed if at least one side is
6308 // non-zero.
6309 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6310 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6311 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6312 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6313 return false;
6314 }
6315 }
6316 MatchInfo = [=](MachineIRBuilder &B) {
6317 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6318 };
6319 return true;
6320}
6321
6323 BuildFnTy &MatchInfo) {
6324 // TODO: Handle integer cases.
6325 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6326 // Condition may be fed by a truncated compare.
6327 Register Cond = MI.getOperand(1).getReg();
6328 Register MaybeTrunc;
6329 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6330 Cond = MaybeTrunc;
6331 Register Dst = MI.getOperand(0).getReg();
6332 Register TrueVal = MI.getOperand(2).getReg();
6333 Register FalseVal = MI.getOperand(3).getReg();
6334 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6335}
6336
6338 BuildFnTy &MatchInfo) {
6339 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6340 // (X + Y) == X --> Y == 0
6341 // (X + Y) != X --> Y != 0
6342 // (X - Y) == X --> Y == 0
6343 // (X - Y) != X --> Y != 0
6344 // (X ^ Y) == X --> Y == 0
6345 // (X ^ Y) != X --> Y != 0
6346 Register Dst = MI.getOperand(0).getReg();
6347 CmpInst::Predicate Pred;
6348 Register X, Y, OpLHS, OpRHS;
6349 bool MatchedSub = mi_match(
6350 Dst, MRI,
6351 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6352 if (MatchedSub && X != OpLHS)
6353 return false;
6354 if (!MatchedSub) {
6355 if (!mi_match(Dst, MRI,
6356 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6357 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6358 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6359 return false;
6360 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6361 }
6362 MatchInfo = [=](MachineIRBuilder &B) {
6363 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6364 B.buildICmp(Pred, Dst, Y, Zero);
6365 };
6366 return CmpInst::isEquality(Pred) && Y.isValid();
6367}
6368
6370 Register ShiftReg = MI.getOperand(2).getReg();
6371 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6372 auto IsShiftTooBig = [&](const Constant *C) {
6373 auto *CI = dyn_cast<ConstantInt>(C);
6374 return CI && CI->uge(ResTy.getScalarSizeInBits());
6375 };
6376 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6377}
6378
6380 unsigned LHSOpndIdx = 1;
6381 unsigned RHSOpndIdx = 2;
6382 switch (MI.getOpcode()) {
6383 case TargetOpcode::G_UADDO:
6384 case TargetOpcode::G_SADDO:
6385 case TargetOpcode::G_UMULO:
6386 case TargetOpcode::G_SMULO:
6387 LHSOpndIdx = 2;
6388 RHSOpndIdx = 3;
6389 break;
6390 default:
6391 break;
6392 }
6393 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6394 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6395 if (!getIConstantVRegVal(LHS, MRI)) {
6396 // Skip commuting if LHS is not a constant. But, LHS may be a
6397 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6398 // have a constant on the RHS.
6399 if (MRI.getVRegDef(LHS)->getOpcode() !=
6400 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6401 return false;
6402 }
6403 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6404 return MRI.getVRegDef(RHS)->getOpcode() !=
6405 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6407}
6408
6410 Register LHS = MI.getOperand(1).getReg();
6411 Register RHS = MI.getOperand(2).getReg();
6412 std::optional<FPValueAndVReg> ValAndVReg;
6413 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6414 return false;
6415 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6416}
6417
6420 unsigned LHSOpndIdx = 1;
6421 unsigned RHSOpndIdx = 2;
6422 switch (MI.getOpcode()) {
6423 case TargetOpcode::G_UADDO:
6424 case TargetOpcode::G_SADDO:
6425 case TargetOpcode::G_UMULO:
6426 case TargetOpcode::G_SMULO:
6427 LHSOpndIdx = 2;
6428 RHSOpndIdx = 3;
6429 break;
6430 default:
6431 break;
6432 }
6433 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6434 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6435 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6436 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6438}
6439
6440bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6441 LLT SrcTy = MRI.getType(Src);
6442 if (SrcTy.isFixedVector())
6443 return isConstantSplatVector(Src, 1, AllowUndefs);
6444 if (SrcTy.isScalar()) {
6445 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6446 return true;
6447 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6448 return IConstant && IConstant->Value == 1;
6449 }
6450 return false; // scalable vector
6451}
6452
6453bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6454 LLT SrcTy = MRI.getType(Src);
6455 if (SrcTy.isFixedVector())
6456 return isConstantSplatVector(Src, 0, AllowUndefs);
6457 if (SrcTy.isScalar()) {
6458 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6459 return true;
6460 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6461 return IConstant && IConstant->Value == 0;
6462 }
6463 return false; // scalable vector
6464}
6465
6466// Ignores COPYs during conformance checks.
6467// FIXME scalable vectors.
6468bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6469 bool AllowUndefs) {
6470 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6471 if (!BuildVector)
6472 return false;
6473 unsigned NumSources = BuildVector->getNumSources();
6474
6475 for (unsigned I = 0; I < NumSources; ++I) {
6476 GImplicitDef *ImplicitDef =
6477 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6478 if (ImplicitDef && AllowUndefs)
6479 continue;
6480 if (ImplicitDef && !AllowUndefs)
6481 return false;
6482 std::optional<ValueAndVReg> IConstant =
6484 if (IConstant && IConstant->Value == SplatValue)
6485 continue;
6486 return false;
6487 }
6488 return true;
6489}
6490
6491// Ignores COPYs during lookups.
6492// FIXME scalable vectors
6493std::optional<APInt>
6494CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6495 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6496 if (IConstant)
6497 return IConstant->Value;
6498
6499 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6500 if (!BuildVector)
6501 return std::nullopt;
6502 unsigned NumSources = BuildVector->getNumSources();
6503
6504 std::optional<APInt> Value = std::nullopt;
6505 for (unsigned I = 0; I < NumSources; ++I) {
6506 std::optional<ValueAndVReg> IConstant =
6508 if (!IConstant)
6509 return std::nullopt;
6510 if (!Value)
6511 Value = IConstant->Value;
6512 else if (*Value != IConstant->Value)
6513 return std::nullopt;
6514 }
6515 return Value;
6516}
6517
6518// FIXME G_SPLAT_VECTOR
6519bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6520 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6521 if (IConstant)
6522 return true;
6523
6524 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6525 if (!BuildVector)
6526 return false;
6527
6528 unsigned NumSources = BuildVector->getNumSources();
6529 for (unsigned I = 0; I < NumSources; ++I) {
6530 std::optional<ValueAndVReg> IConstant =
6532 if (!IConstant)
6533 return false;
6534 }
6535 return true;
6536}
6537
6538// TODO: use knownbits to determine zeros
6539bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6540 BuildFnTy &MatchInfo) {
6541 uint32_t Flags = Select->getFlags();
6542 Register Dest = Select->getReg(0);
6543 Register Cond = Select->getCondReg();
6544 Register True = Select->getTrueReg();
6545 Register False = Select->getFalseReg();
6546 LLT CondTy = MRI.getType(Select->getCondReg());
6547 LLT TrueTy = MRI.getType(Select->getTrueReg());
6548
6549 // We only do this combine for scalar boolean conditions.
6550 if (CondTy != LLT::scalar(1))
6551 return false;
6552
6553 if (TrueTy.isPointer())
6554 return false;
6555
6556 // Both are scalars.
6557 std::optional<ValueAndVReg> TrueOpt =
6559 std::optional<ValueAndVReg> FalseOpt =
6561
6562 if (!TrueOpt || !FalseOpt)
6563 return false;
6564
6565 APInt TrueValue = TrueOpt->Value;
6566 APInt FalseValue = FalseOpt->Value;
6567
6568 // select Cond, 1, 0 --> zext (Cond)
6569 if (TrueValue.isOne() && FalseValue.isZero()) {
6570 MatchInfo = [=](MachineIRBuilder &B) {
6571 B.setInstrAndDebugLoc(*Select);
6572 B.buildZExtOrTrunc(Dest, Cond);
6573 };
6574 return true;
6575 }
6576
6577 // select Cond, -1, 0 --> sext (Cond)
6578 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6579 MatchInfo = [=](MachineIRBuilder &B) {
6580 B.setInstrAndDebugLoc(*Select);
6581 B.buildSExtOrTrunc(Dest, Cond);
6582 };
6583 return true;
6584 }
6585
6586 // select Cond, 0, 1 --> zext (!Cond)
6587 if (TrueValue.isZero() && FalseValue.isOne()) {
6588 MatchInfo = [=](MachineIRBuilder &B) {
6589 B.setInstrAndDebugLoc(*Select);
6591 B.buildNot(Inner, Cond);
6592 B.buildZExtOrTrunc(Dest, Inner);
6593 };
6594 return true;
6595 }
6596
6597 // select Cond, 0, -1 --> sext (!Cond)
6598 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6599 MatchInfo = [=](MachineIRBuilder &B) {
6600 B.setInstrAndDebugLoc(*Select);
6602 B.buildNot(Inner, Cond);
6603 B.buildSExtOrTrunc(Dest, Inner);
6604 };
6605 return true;
6606 }
6607
6608 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6609 if (TrueValue - 1 == FalseValue) {
6610 MatchInfo = [=](MachineIRBuilder &B) {
6611 B.setInstrAndDebugLoc(*Select);
6613 B.buildZExtOrTrunc(Inner, Cond);
6614 B.buildAdd(Dest, Inner, False);
6615 };
6616 return true;
6617 }
6618
6619 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6620 if (TrueValue + 1 == FalseValue) {
6621 MatchInfo = [=](MachineIRBuilder &B) {
6622 B.setInstrAndDebugLoc(*Select);
6624 B.buildSExtOrTrunc(Inner, Cond);
6625 B.buildAdd(Dest, Inner, False);
6626 };
6627 return true;
6628 }
6629
6630 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6631 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6632 MatchInfo = [=](MachineIRBuilder &B) {
6633 B.setInstrAndDebugLoc(*Select);
6635 B.buildZExtOrTrunc(Inner, Cond);
6636 // The shift amount must be scalar.
6637 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6638 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6639 B.buildShl(Dest, Inner, ShAmtC, Flags);
6640 };
6641 return true;
6642 }
6643 // select Cond, -1, C --> or (sext Cond), C
6644 if (TrueValue.isAllOnes()) {
6645 MatchInfo = [=](MachineIRBuilder &B) {
6646 B.setInstrAndDebugLoc(*Select);
6648 B.buildSExtOrTrunc(Inner, Cond);
6649 B.buildOr(Dest, Inner, False, Flags);
6650 };
6651 return true;
6652 }
6653
6654 // select Cond, C, -1 --> or (sext (not Cond)), C
6655 if (FalseValue.isAllOnes()) {
6656 MatchInfo = [=](MachineIRBuilder &B) {
6657 B.setInstrAndDebugLoc(*Select);
6659 B.buildNot(Not, Cond);
6661 B.buildSExtOrTrunc(Inner, Not);
6662 B.buildOr(Dest, Inner, True, Flags);
6663 };
6664 return true;
6665 }
6666
6667 return false;
6668}
6669
6670// TODO: use knownbits to determine zeros
6671bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6672 BuildFnTy &MatchInfo) {
6673 uint32_t Flags = Select->getFlags();
6674 Register DstReg = Select->getReg(0);
6675 Register Cond = Select->getCondReg();
6676 Register True = Select->getTrueReg();
6677 Register False = Select->getFalseReg();
6678 LLT CondTy = MRI.getType(Select->getCondReg());
6679 LLT TrueTy = MRI.getType(Select->getTrueReg());
6680
6681 // Boolean or fixed vector of booleans.
6682 if (CondTy.isScalableVector() ||
6683 (CondTy.isFixedVector() &&
6684 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6685 CondTy.getScalarSizeInBits() != 1)
6686 return false;
6687
6688 if (CondTy != TrueTy)
6689 return false;
6690
6691 // select Cond, Cond, F --> or Cond, F
6692 // select Cond, 1, F --> or Cond, F
6693 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6694 MatchInfo = [=](MachineIRBuilder &B) {
6695 B.setInstrAndDebugLoc(*Select);
6697 B.buildZExtOrTrunc(Ext, Cond);
6698 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6699 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6700 };
6701 return true;
6702 }
6703
6704 // select Cond, T, Cond --> and Cond, T
6705 // select Cond, T, 0 --> and Cond, T
6706 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6707 MatchInfo = [=](MachineIRBuilder &B) {
6708 B.setInstrAndDebugLoc(*Select);
6710 B.buildZExtOrTrunc(Ext, Cond);
6711 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6712 B.buildAnd(DstReg, Ext, FreezeTrue);
6713 };
6714 return true;
6715 }
6716
6717 // select Cond, T, 1 --> or (not Cond), T
6718 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6719 MatchInfo = [=](MachineIRBuilder &B) {
6720 B.setInstrAndDebugLoc(*Select);
6721 // First the not.
6723 B.buildNot(Inner, Cond);
6724 // Then an ext to match the destination register.
6726 B.buildZExtOrTrunc(Ext, Inner);
6727 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6728 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6729 };
6730 return true;
6731 }
6732
6733 // select Cond, 0, F --> and (not Cond), F
6734 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6735 MatchInfo = [=](MachineIRBuilder &B) {
6736 B.setInstrAndDebugLoc(*Select);
6737 // First the not.
6739 B.buildNot(Inner, Cond);
6740 // Then an ext to match the destination register.
6742 B.buildZExtOrTrunc(Ext, Inner);
6743 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6744 B.buildAnd(DstReg, Ext, FreezeFalse);
6745 };
6746 return true;
6747 }
6748
6749 return false;
6750}
6751
6753 BuildFnTy &MatchInfo) {
6754 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
6755 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
6756
6757 Register DstReg = Select->getReg(0);
6758 Register True = Select->getTrueReg();
6759 Register False = Select->getFalseReg();
6760 LLT DstTy = MRI.getType(DstReg);
6761
6762 if (DstTy.isPointer())
6763 return false;
6764
6765 // We want to fold the icmp and replace the select.
6766 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6767 return false;
6768
6769 CmpInst::Predicate Pred = Cmp->getCond();
6770 // We need a larger or smaller predicate for
6771 // canonicalization.
6772 if (CmpInst::isEquality(Pred))
6773 return false;
6774
6775 [[maybe_unused]] Register CmpLHS = Cmp->getLHSReg();
6776 [[maybe_unused]] Register CmpRHS = Cmp->getRHSReg();
6777
6778 // (icmp X, Y) ? X : Y -> integer minmax.
6779 // see matchSelectPattern in ValueTracking.
6780 // Legality between G_SELECT and integer minmax can differ.
6781 assert(True == CmpLHS && False == CmpRHS && "unexpected MIR pattern");
6782
6783 switch (Pred) {
6784 case ICmpInst::ICMP_UGT:
6785 case ICmpInst::ICMP_UGE: {
6786 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6787 return false;
6788 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
6789 return true;
6790 }
6791 case ICmpInst::ICMP_SGT:
6792 case ICmpInst::ICMP_SGE: {
6793 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6794 return false;
6795 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
6796 return true;
6797 }
6798 case ICmpInst::ICMP_ULT:
6799 case ICmpInst::ICMP_ULE: {
6800 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6801 return false;
6802 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
6803 return true;
6804 }
6805 case ICmpInst::ICMP_SLT:
6806 case ICmpInst::ICMP_SLE: {
6807 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6808 return false;
6809 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
6810 return true;
6811 }
6812 default:
6813 return false;
6814 }
6815}
6816
6818 GSelect *Select = cast<GSelect>(&MI);
6819
6820 if (tryFoldSelectOfConstants(Select, MatchInfo))
6821 return true;
6822
6823 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6824 return true;
6825
6826 return false;
6827}
6828
6829/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6830/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6831/// into a single comparison using range-based reasoning.
6832/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6833bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6834 BuildFnTy &MatchInfo) {
6835 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6836 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6837 Register DstReg = Logic->getReg(0);
6838 Register LHS = Logic->getLHSReg();
6839 Register RHS = Logic->getRHSReg();
6840 unsigned Flags = Logic->getFlags();
6841
6842 // We need an G_ICMP on the LHS register.
6843 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6844 if (!Cmp1)
6845 return false;
6846
6847 // We need an G_ICMP on the RHS register.
6848 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6849 if (!Cmp2)
6850 return false;
6851
6852 // We want to fold the icmps.
6853 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6854 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6855 return false;
6856
6857 APInt C1;
6858 APInt C2;
6859 std::optional<ValueAndVReg> MaybeC1 =
6861 if (!MaybeC1)
6862 return false;
6863 C1 = MaybeC1->Value;
6864
6865 std::optional<ValueAndVReg> MaybeC2 =
6867 if (!MaybeC2)
6868 return false;
6869 C2 = MaybeC2->Value;
6870
6871 Register R1 = Cmp1->getLHSReg();
6872 Register R2 = Cmp2->getLHSReg();
6873 CmpInst::Predicate Pred1 = Cmp1->getCond();
6874 CmpInst::Predicate Pred2 = Cmp2->getCond();
6875 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6876 LLT CmpOperandTy = MRI.getType(R1);
6877
6878 if (CmpOperandTy.isPointer())
6879 return false;
6880
6881 // We build ands, adds, and constants of type CmpOperandTy.
6882 // They must be legal to build.
6883 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6884 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6885 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6886 return false;
6887
6888 // Look through add of a constant offset on R1, R2, or both operands. This
6889 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6890 std::optional<APInt> Offset1;
6891 std::optional<APInt> Offset2;
6892 if (R1 != R2) {
6893 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6894 std::optional<ValueAndVReg> MaybeOffset1 =
6896 if (MaybeOffset1) {
6897 R1 = Add->getLHSReg();
6898 Offset1 = MaybeOffset1->Value;
6899 }
6900 }
6901 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6902 std::optional<ValueAndVReg> MaybeOffset2 =
6904 if (MaybeOffset2) {
6905 R2 = Add->getLHSReg();
6906 Offset2 = MaybeOffset2->Value;
6907 }
6908 }
6909 }
6910
6911 if (R1 != R2)
6912 return false;
6913
6914 // We calculate the icmp ranges including maybe offsets.
6916 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
6917 if (Offset1)
6918 CR1 = CR1.subtract(*Offset1);
6919
6921 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
6922 if (Offset2)
6923 CR2 = CR2.subtract(*Offset2);
6924
6925 bool CreateMask = false;
6926 APInt LowerDiff;
6927 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
6928 if (!CR) {
6929 // We need non-wrapping ranges.
6930 if (CR1.isWrappedSet() || CR2.isWrappedSet())
6931 return false;
6932
6933 // Check whether we have equal-size ranges that only differ by one bit.
6934 // In that case we can apply a mask to map one range onto the other.
6935 LowerDiff = CR1.getLower() ^ CR2.getLower();
6936 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
6937 APInt CR1Size = CR1.getUpper() - CR1.getLower();
6938 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
6939 CR1Size != CR2.getUpper() - CR2.getLower())
6940 return false;
6941
6942 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
6943 CreateMask = true;
6944 }
6945
6946 if (IsAnd)
6947 CR = CR->inverse();
6948
6949 CmpInst::Predicate NewPred;
6950 APInt NewC, Offset;
6951 CR->getEquivalentICmp(NewPred, NewC, Offset);
6952
6953 // We take the result type of one of the original icmps, CmpTy, for
6954 // the to be build icmp. The operand type, CmpOperandTy, is used for
6955 // the other instructions and constants to be build. The types of
6956 // the parameters and output are the same for add and and. CmpTy
6957 // and the type of DstReg might differ. That is why we zext or trunc
6958 // the icmp into the destination register.
6959
6960 MatchInfo = [=](MachineIRBuilder &B) {
6961 if (CreateMask && Offset != 0) {
6962 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6963 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6964 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6965 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
6966 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6967 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6968 B.buildZExtOrTrunc(DstReg, ICmp);
6969 } else if (CreateMask && Offset == 0) {
6970 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6971 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6972 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6973 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
6974 B.buildZExtOrTrunc(DstReg, ICmp);
6975 } else if (!CreateMask && Offset != 0) {
6976 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6977 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
6978 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6979 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6980 B.buildZExtOrTrunc(DstReg, ICmp);
6981 } else if (!CreateMask && Offset == 0) {
6982 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6983 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
6984 B.buildZExtOrTrunc(DstReg, ICmp);
6985 } else {
6986 llvm_unreachable("unexpected configuration of CreateMask and Offset");
6987 }
6988 };
6989 return true;
6990}
6991
6992bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
6993 BuildFnTy &MatchInfo) {
6994 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
6995 Register DestReg = Logic->getReg(0);
6996 Register LHS = Logic->getLHSReg();
6997 Register RHS = Logic->getRHSReg();
6998 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6999
7000 // We need a compare on the LHS register.
7001 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7002 if (!Cmp1)
7003 return false;
7004
7005 // We need a compare on the RHS register.
7006 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7007 if (!Cmp2)
7008 return false;
7009
7010 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7011 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7012
7013 // We build one fcmp, want to fold the fcmps, replace the logic op,
7014 // and the fcmps must have the same shape.
7016 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7017 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7018 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7019 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7020 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7021 return false;
7022
7023 CmpInst::Predicate PredL = Cmp1->getCond();
7024 CmpInst::Predicate PredR = Cmp2->getCond();
7025 Register LHS0 = Cmp1->getLHSReg();
7026 Register LHS1 = Cmp1->getRHSReg();
7027 Register RHS0 = Cmp2->getLHSReg();
7028 Register RHS1 = Cmp2->getRHSReg();
7029
7030 if (LHS0 == RHS1 && LHS1 == RHS0) {
7031 // Swap RHS operands to match LHS.
7032 PredR = CmpInst::getSwappedPredicate(PredR);
7033 std::swap(RHS0, RHS1);
7034 }
7035
7036 if (LHS0 == RHS0 && LHS1 == RHS1) {
7037 // We determine the new predicate.
7038 unsigned CmpCodeL = getFCmpCode(PredL);
7039 unsigned CmpCodeR = getFCmpCode(PredR);
7040 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7041 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7042 MatchInfo = [=](MachineIRBuilder &B) {
7043 // The fcmp predicates fill the lower part of the enum.
7044 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7045 if (Pred == FCmpInst::FCMP_FALSE &&
7047 auto False = B.buildConstant(CmpTy, 0);
7048 B.buildZExtOrTrunc(DestReg, False);
7049 } else if (Pred == FCmpInst::FCMP_TRUE &&
7051 auto True =
7052 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7053 CmpTy.isVector() /*isVector*/,
7054 true /*isFP*/));
7055 B.buildZExtOrTrunc(DestReg, True);
7056 } else { // We take the predicate without predicate optimizations.
7057 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7058 B.buildZExtOrTrunc(DestReg, Cmp);
7059 }
7060 };
7061 return true;
7062 }
7063
7064 return false;
7065}
7066
7068 GAnd *And = cast<GAnd>(&MI);
7069
7070 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7071 return true;
7072
7073 if (tryFoldLogicOfFCmps(And, MatchInfo))
7074 return true;
7075
7076 return false;
7077}
7078
7080 GOr *Or = cast<GOr>(&MI);
7081
7082 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7083 return true;
7084
7085 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7086 return true;
7087
7088 return false;
7089}
7090
7092 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7093
7094 // Addo has no flags
7095 Register Dst = Add->getReg(0);
7096 Register Carry = Add->getReg(1);
7097 Register LHS = Add->getLHSReg();
7098 Register RHS = Add->getRHSReg();
7099 bool IsSigned = Add->isSigned();
7100 LLT DstTy = MRI.getType(Dst);
7101 LLT CarryTy = MRI.getType(Carry);
7102
7103 // Fold addo, if the carry is dead -> add, undef.
7104 if (MRI.use_nodbg_empty(Carry) &&
7105 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7106 MatchInfo = [=](MachineIRBuilder &B) {
7107 B.buildAdd(Dst, LHS, RHS);
7108 B.buildUndef(Carry);
7109 };
7110 return true;
7111 }
7112
7113 // Canonicalize constant to RHS.
7114 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7115 if (IsSigned) {
7116 MatchInfo = [=](MachineIRBuilder &B) {
7117 B.buildSAddo(Dst, Carry, RHS, LHS);
7118 };
7119 return true;
7120 }
7121 // !IsSigned
7122 MatchInfo = [=](MachineIRBuilder &B) {
7123 B.buildUAddo(Dst, Carry, RHS, LHS);
7124 };
7125 return true;
7126 }
7127
7128 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7129 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7130
7131 // Fold addo(c1, c2) -> c3, carry.
7132 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7134 bool Overflow;
7135 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7136 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7137 MatchInfo = [=](MachineIRBuilder &B) {
7138 B.buildConstant(Dst, Result);
7139 B.buildConstant(Carry, Overflow);
7140 };
7141 return true;
7142 }
7143
7144 // Fold (addo x, 0) -> x, no carry
7145 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7146 MatchInfo = [=](MachineIRBuilder &B) {
7147 B.buildCopy(Dst, LHS);
7148 B.buildConstant(Carry, 0);
7149 };
7150 return true;
7151 }
7152
7153 // Given 2 constant operands whose sum does not overflow:
7154 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7155 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7156 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7157 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7158 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7159 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7160 std::optional<APInt> MaybeAddRHS =
7161 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7162 if (MaybeAddRHS) {
7163 bool Overflow;
7164 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7165 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7166 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7167 if (IsSigned) {
7168 MatchInfo = [=](MachineIRBuilder &B) {
7169 auto ConstRHS = B.buildConstant(DstTy, NewC);
7170 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7171 };
7172 return true;
7173 }
7174 // !IsSigned
7175 MatchInfo = [=](MachineIRBuilder &B) {
7176 auto ConstRHS = B.buildConstant(DstTy, NewC);
7177 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7178 };
7179 return true;
7180 }
7181 }
7182 };
7183
7184 // We try to combine addo to non-overflowing add.
7185 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7187 return false;
7188
7189 // We try to combine uaddo to non-overflowing add.
7190 if (!IsSigned) {
7191 ConstantRange CRLHS =
7192 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7193 ConstantRange CRRHS =
7194 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7195
7196 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7198 return false;
7200 MatchInfo = [=](MachineIRBuilder &B) {
7201 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7202 B.buildConstant(Carry, 0);
7203 };
7204 return true;
7205 }
7208 MatchInfo = [=](MachineIRBuilder &B) {
7209 B.buildAdd(Dst, LHS, RHS);
7210 B.buildConstant(Carry, 1);
7211 };
7212 return true;
7213 }
7214 }
7215 return false;
7216 }
7217
7218 // We try to combine saddo to non-overflowing add.
7219
7220 // If LHS and RHS each have at least two sign bits, then there is no signed
7221 // overflow.
7222 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7223 MatchInfo = [=](MachineIRBuilder &B) {
7224 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7225 B.buildConstant(Carry, 0);
7226 };
7227 return true;
7228 }
7229
7230 ConstantRange CRLHS =
7231 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7232 ConstantRange CRRHS =
7233 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7234
7235 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7237 return false;
7239 MatchInfo = [=](MachineIRBuilder &B) {
7240 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7241 B.buildConstant(Carry, 0);
7242 };
7243 return true;
7244 }
7247 MatchInfo = [=](MachineIRBuilder &B) {
7248 B.buildAdd(Dst, LHS, RHS);
7249 B.buildConstant(Carry, 1);
7250 };
7251 return true;
7252 }
7253 }
7254
7255 return false;
7256}
7257
7259 BuildFnTy &MatchInfo) {
7261 MatchInfo(Builder);
7262 Root->eraseFromParent();
7263}
7264
7266 BuildFnTy &MatchInfo) {
7267 GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));
7268 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI));
7269
7270 Register Dst = Sext->getReg(0);
7271 Register Src = Trunc->getSrcReg();
7272
7273 LLT DstTy = MRI.getType(Dst);
7274 LLT SrcTy = MRI.getType(Src);
7275
7276 if (DstTy == SrcTy) {
7277 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7278 return true;
7279 }
7280
7281 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7282 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7283 MatchInfo = [=](MachineIRBuilder &B) {
7284 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoSWrap);
7285 };
7286 return true;
7287 }
7288
7289 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7290 isLegalOrBeforeLegalizer({TargetOpcode::G_SEXT, {DstTy, SrcTy}})) {
7291 MatchInfo = [=](MachineIRBuilder &B) { B.buildSExt(Dst, Src); };
7292 return true;
7293 }
7294
7295 return false;
7296}
7297
7299 BuildFnTy &MatchInfo) {
7300 GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI));
7301 GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI));
7302
7303 Register Dst = Zext->getReg(0);
7304 Register Src = Trunc->getSrcReg();
7305
7306 LLT DstTy = MRI.getType(Dst);
7307 LLT SrcTy = MRI.getType(Src);
7308
7309 if (DstTy == SrcTy) {
7310 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
7311 return true;
7312 }
7313
7314 if (DstTy.getScalarSizeInBits() < SrcTy.getScalarSizeInBits() &&
7315 isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) {
7316 MatchInfo = [=](MachineIRBuilder &B) {
7317 B.buildTrunc(Dst, Src, MachineInstr::MIFlag::NoUWrap);
7318 };
7319 return true;
7320 }
7321
7322 if (DstTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits() &&
7323 isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}})) {
7324 MatchInfo = [=](MachineIRBuilder &B) {
7325 B.buildZExt(Dst, Src, MachineInstr::MIFlag::NonNeg);
7326 };
7327 return true;
7328 }
7329
7330 return false;
7331}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1096
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1732
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1572
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1613
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:1255
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:1010
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:995
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchSextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine sext of trunc.
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
bool matchZextOfTrunc(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine zext of trunc.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine select to integer min/max.
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValue() const
Definition: Constants.h:312
const APFloat & getValueAPF() const
Definition: Constants.h:311
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Register getSrcReg() const
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a sext.
Represents a trunc.
Represents a G_ZEXTLOAD.
Represents a zext.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:391
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:561
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:386
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1426
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1386
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1539
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:727
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1509
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1521
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1554
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1586
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:658
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1489
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:201
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1419
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:946
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:440
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1611
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1404
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:494
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:502
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:542
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:508
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:548
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:524
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:528
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:552
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:532
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:518
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...