LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
228 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
229 "Invalid instruction");
230 bool IsUndef = true;
231 MachineInstr *Undef = nullptr;
232
233 // Walk over all the operands of concat vectors and check if they are
234 // build_vector themselves or undef.
235 // Then collect their operands in Ops.
236 for (const MachineOperand &MO : MI.uses()) {
237 Register Reg = MO.getReg();
238 MachineInstr *Def = MRI.getVRegDef(Reg);
239 assert(Def && "Operand not defined");
240 if (!MRI.hasOneNonDBGUse(Reg))
241 return false;
242 switch (Def->getOpcode()) {
243 case TargetOpcode::G_BUILD_VECTOR:
244 IsUndef = false;
245 // Remember the operands of the build_vector to fold
246 // them into the yet-to-build flattened concat vectors.
247 for (const MachineOperand &BuildVecMO : Def->uses())
248 Ops.push_back(BuildVecMO.getReg());
249 break;
250 case TargetOpcode::G_IMPLICIT_DEF: {
251 LLT OpType = MRI.getType(Reg);
252 // Keep one undef value for all the undef operands.
253 if (!Undef) {
254 Builder.setInsertPt(*MI.getParent(), MI);
255 Undef = Builder.buildUndef(OpType.getScalarType());
256 }
257 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
258 OpType.getScalarType() &&
259 "All undefs should have the same type");
260 // Break the undef vector in as many scalar elements as needed
261 // for the flattening.
262 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
263 EltIdx != EltEnd; ++EltIdx)
264 Ops.push_back(Undef->getOperand(0).getReg());
265 break;
266 }
267 default:
268 return false;
269 }
270 }
271
272 // Check if the combine is illegal
273 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
275 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
276 return false;
277 }
278
279 if (IsUndef)
280 Ops.clear();
281
282 return true;
283}
286 // We determined that the concat_vectors can be flatten.
287 // Generate the flattened build_vector.
288 Register DstReg = MI.getOperand(0).getReg();
289 Builder.setInsertPt(*MI.getParent(), MI);
290 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
291
292 // Note: IsUndef is sort of redundant. We could have determine it by
293 // checking that at all Ops are undef. Alternatively, we could have
294 // generate a build_vector of undefs and rely on another combine to
295 // clean that up. For now, given we already gather this information
296 // in matchCombineConcatVectors, just save compile time and issue the
297 // right thing.
298 if (Ops.empty())
299 Builder.buildUndef(NewDstReg);
300 else
301 Builder.buildBuildVector(NewDstReg, Ops);
302 MI.eraseFromParent();
303 replaceRegWith(MRI, DstReg, NewDstReg);
304}
305
308 if (matchCombineShuffleVector(MI, Ops)) {
310 return true;
311 }
312 return false;
313}
314
317 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
318 "Invalid instruction kind");
319 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
320 Register Src1 = MI.getOperand(1).getReg();
321 LLT SrcType = MRI.getType(Src1);
322 // As bizarre as it may look, shuffle vector can actually produce
323 // scalar! This is because at the IR level a <1 x ty> shuffle
324 // vector is perfectly valid.
325 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
326 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
327
328 // If the resulting vector is smaller than the size of the source
329 // vectors being concatenated, we won't be able to replace the
330 // shuffle vector into a concat_vectors.
331 //
332 // Note: We may still be able to produce a concat_vectors fed by
333 // extract_vector_elt and so on. It is less clear that would
334 // be better though, so don't bother for now.
335 //
336 // If the destination is a scalar, the size of the sources doesn't
337 // matter. we will lower the shuffle to a plain copy. This will
338 // work only if the source and destination have the same size. But
339 // that's covered by the next condition.
340 //
341 // TODO: If the size between the source and destination don't match
342 // we could still emit an extract vector element in that case.
343 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
344 return false;
345
346 // Check that the shuffle mask can be broken evenly between the
347 // different sources.
348 if (DstNumElts % SrcNumElts != 0)
349 return false;
350
351 // Mask length is a multiple of the source vector length.
352 // Check if the shuffle is some kind of concatenation of the input
353 // vectors.
354 unsigned NumConcat = DstNumElts / SrcNumElts;
355 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
356 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
357 for (unsigned i = 0; i != DstNumElts; ++i) {
358 int Idx = Mask[i];
359 // Undef value.
360 if (Idx < 0)
361 continue;
362 // Ensure the indices in each SrcType sized piece are sequential and that
363 // the same source is used for the whole piece.
364 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
365 (ConcatSrcs[i / SrcNumElts] >= 0 &&
366 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
367 return false;
368 // Remember which source this index came from.
369 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
370 }
371
372 // The shuffle is concatenating multiple vectors together.
373 // Collect the different operands for that.
374 Register UndefReg;
375 Register Src2 = MI.getOperand(2).getReg();
376 for (auto Src : ConcatSrcs) {
377 if (Src < 0) {
378 if (!UndefReg) {
379 Builder.setInsertPt(*MI.getParent(), MI);
380 UndefReg = Builder.buildUndef(SrcType).getReg(0);
381 }
382 Ops.push_back(UndefReg);
383 } else if (Src == 0)
384 Ops.push_back(Src1);
385 else
386 Ops.push_back(Src2);
387 }
388 return true;
389}
390
392 const ArrayRef<Register> Ops) {
393 Register DstReg = MI.getOperand(0).getReg();
394 Builder.setInsertPt(*MI.getParent(), MI);
395 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
396
397 if (Ops.size() == 1)
398 Builder.buildCopy(NewDstReg, Ops[0]);
399 else
400 Builder.buildMergeLikeInstr(NewDstReg, Ops);
401
402 MI.eraseFromParent();
403 replaceRegWith(MRI, DstReg, NewDstReg);
404}
405
407 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
408 "Invalid instruction kind");
409
410 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
411 return Mask.size() == 1;
412}
413
415 Register DstReg = MI.getOperand(0).getReg();
416 Builder.setInsertPt(*MI.getParent(), MI);
417
418 int I = MI.getOperand(3).getShuffleMask()[0];
419 Register Src1 = MI.getOperand(1).getReg();
420 LLT Src1Ty = MRI.getType(Src1);
421 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
422 Register SrcReg;
423 if (I >= Src1NumElts) {
424 SrcReg = MI.getOperand(2).getReg();
425 I -= Src1NumElts;
426 } else if (I >= 0)
427 SrcReg = Src1;
428
429 if (I < 0)
430 Builder.buildUndef(DstReg);
431 else if (!MRI.getType(SrcReg).isVector())
432 Builder.buildCopy(DstReg, SrcReg);
433 else
435
436 MI.eraseFromParent();
437}
438
439namespace {
440
441/// Select a preference between two uses. CurrentUse is the current preference
442/// while *ForCandidate is attributes of the candidate under consideration.
443PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
444 PreferredTuple &CurrentUse,
445 const LLT TyForCandidate,
446 unsigned OpcodeForCandidate,
447 MachineInstr *MIForCandidate) {
448 if (!CurrentUse.Ty.isValid()) {
449 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
450 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
451 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
452 return CurrentUse;
453 }
454
455 // We permit the extend to hoist through basic blocks but this is only
456 // sensible if the target has extending loads. If you end up lowering back
457 // into a load and extend during the legalizer then the end result is
458 // hoisting the extend up to the load.
459
460 // Prefer defined extensions to undefined extensions as these are more
461 // likely to reduce the number of instructions.
462 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
463 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
464 return CurrentUse;
465 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
466 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
467 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
468
469 // Prefer sign extensions to zero extensions as sign-extensions tend to be
470 // more expensive. Don't do this if the load is already a zero-extend load
471 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
472 // later.
473 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
474 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
475 OpcodeForCandidate == TargetOpcode::G_ZEXT)
476 return CurrentUse;
477 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
478 OpcodeForCandidate == TargetOpcode::G_SEXT)
479 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
480 }
481
482 // This is potentially target specific. We've chosen the largest type
483 // because G_TRUNC is usually free. One potential catch with this is that
484 // some targets have a reduced number of larger registers than smaller
485 // registers and this choice potentially increases the live-range for the
486 // larger value.
487 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
488 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
489 }
490 return CurrentUse;
491}
492
493/// Find a suitable place to insert some instructions and insert them. This
494/// function accounts for special cases like inserting before a PHI node.
495/// The current strategy for inserting before PHI's is to duplicate the
496/// instructions for each predecessor. However, while that's ok for G_TRUNC
497/// on most targets since it generally requires no code, other targets/cases may
498/// want to try harder to find a dominating block.
499static void InsertInsnsWithoutSideEffectsBeforeUse(
502 MachineOperand &UseMO)>
503 Inserter) {
504 MachineInstr &UseMI = *UseMO.getParent();
505
506 MachineBasicBlock *InsertBB = UseMI.getParent();
507
508 // If the use is a PHI then we want the predecessor block instead.
509 if (UseMI.isPHI()) {
510 MachineOperand *PredBB = std::next(&UseMO);
511 InsertBB = PredBB->getMBB();
512 }
513
514 // If the block is the same block as the def then we want to insert just after
515 // the def instead of at the start of the block.
516 if (InsertBB == DefMI.getParent()) {
518 Inserter(InsertBB, std::next(InsertPt), UseMO);
519 return;
520 }
521
522 // Otherwise we want the start of the BB
523 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
524}
525} // end anonymous namespace
526
528 PreferredTuple Preferred;
529 if (matchCombineExtendingLoads(MI, Preferred)) {
530 applyCombineExtendingLoads(MI, Preferred);
531 return true;
532 }
533 return false;
534}
535
536static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
537 unsigned CandidateLoadOpc;
538 switch (ExtOpc) {
539 case TargetOpcode::G_ANYEXT:
540 CandidateLoadOpc = TargetOpcode::G_LOAD;
541 break;
542 case TargetOpcode::G_SEXT:
543 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
544 break;
545 case TargetOpcode::G_ZEXT:
546 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
547 break;
548 default:
549 llvm_unreachable("Unexpected extend opc");
550 }
551 return CandidateLoadOpc;
552}
553
555 PreferredTuple &Preferred) {
556 // We match the loads and follow the uses to the extend instead of matching
557 // the extends and following the def to the load. This is because the load
558 // must remain in the same position for correctness (unless we also add code
559 // to find a safe place to sink it) whereas the extend is freely movable.
560 // It also prevents us from duplicating the load for the volatile case or just
561 // for performance.
562 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
563 if (!LoadMI)
564 return false;
565
566 Register LoadReg = LoadMI->getDstReg();
567
568 LLT LoadValueTy = MRI.getType(LoadReg);
569 if (!LoadValueTy.isScalar())
570 return false;
571
572 // Most architectures are going to legalize <s8 loads into at least a 1 byte
573 // load, and the MMOs can only describe memory accesses in multiples of bytes.
574 // If we try to perform extload combining on those, we can end up with
575 // %a(s8) = extload %ptr (load 1 byte from %ptr)
576 // ... which is an illegal extload instruction.
577 if (LoadValueTy.getSizeInBits() < 8)
578 return false;
579
580 // For non power-of-2 types, they will very likely be legalized into multiple
581 // loads. Don't bother trying to match them into extending loads.
582 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
583 return false;
584
585 // Find the preferred type aside from the any-extends (unless it's the only
586 // one) and non-extending ops. We'll emit an extending load to that type and
587 // and emit a variant of (extend (trunc X)) for the others according to the
588 // relative type sizes. At the same time, pick an extend to use based on the
589 // extend involved in the chosen type.
590 unsigned PreferredOpcode =
591 isa<GLoad>(&MI)
592 ? TargetOpcode::G_ANYEXT
593 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
594 Preferred = {LLT(), PreferredOpcode, nullptr};
595 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
596 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
597 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
598 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
599 const auto &MMO = LoadMI->getMMO();
600 // Don't do anything for atomics.
601 if (MMO.isAtomic())
602 continue;
603 // Check for legality.
604 if (!isPreLegalize()) {
605 LegalityQuery::MemDesc MMDesc(MMO);
606 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
607 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
608 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
609 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
610 .Action != LegalizeActions::Legal)
611 continue;
612 }
613 Preferred = ChoosePreferredUse(MI, Preferred,
614 MRI.getType(UseMI.getOperand(0).getReg()),
615 UseMI.getOpcode(), &UseMI);
616 }
617 }
618
619 // There were no extends
620 if (!Preferred.MI)
621 return false;
622 // It should be impossible to chose an extend without selecting a different
623 // type since by definition the result of an extend is larger.
624 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
625
626 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
627 return true;
628}
629
631 PreferredTuple &Preferred) {
632 // Rewrite the load to the chosen extending load.
633 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
634
635 // Inserter to insert a truncate back to the original type at a given point
636 // with some basic CSE to limit truncate duplication to one per BB.
638 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
639 MachineBasicBlock::iterator InsertBefore,
640 MachineOperand &UseMO) {
641 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
642 if (PreviouslyEmitted) {
644 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
646 return;
647 }
648
649 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
650 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
651 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
652 EmittedInsns[InsertIntoBB] = NewMI;
653 replaceRegOpWith(MRI, UseMO, NewDstReg);
654 };
655
657 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
658 MI.setDesc(Builder.getTII().get(LoadOpc));
659
660 // Rewrite all the uses to fix up the types.
661 auto &LoadValue = MI.getOperand(0);
663 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
664 Uses.push_back(&UseMO);
665
666 for (auto *UseMO : Uses) {
667 MachineInstr *UseMI = UseMO->getParent();
668
669 // If the extend is compatible with the preferred extend then we should fix
670 // up the type and extend so that it uses the preferred use.
671 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
672 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
673 Register UseDstReg = UseMI->getOperand(0).getReg();
674 MachineOperand &UseSrcMO = UseMI->getOperand(1);
675 const LLT UseDstTy = MRI.getType(UseDstReg);
676 if (UseDstReg != ChosenDstReg) {
677 if (Preferred.Ty == UseDstTy) {
678 // If the use has the same type as the preferred use, then merge
679 // the vregs and erase the extend. For example:
680 // %1:_(s8) = G_LOAD ...
681 // %2:_(s32) = G_SEXT %1(s8)
682 // %3:_(s32) = G_ANYEXT %1(s8)
683 // ... = ... %3(s32)
684 // rewrites to:
685 // %2:_(s32) = G_SEXTLOAD ...
686 // ... = ... %2(s32)
687 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
689 UseMO->getParent()->eraseFromParent();
690 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
691 // If the preferred size is smaller, then keep the extend but extend
692 // from the result of the extending load. For example:
693 // %1:_(s8) = G_LOAD ...
694 // %2:_(s32) = G_SEXT %1(s8)
695 // %3:_(s64) = G_ANYEXT %1(s8)
696 // ... = ... %3(s64)
697 /// rewrites to:
698 // %2:_(s32) = G_SEXTLOAD ...
699 // %3:_(s64) = G_ANYEXT %2:_(s32)
700 // ... = ... %3(s64)
701 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
702 } else {
703 // If the preferred size is large, then insert a truncate. For
704 // example:
705 // %1:_(s8) = G_LOAD ...
706 // %2:_(s64) = G_SEXT %1(s8)
707 // %3:_(s32) = G_ZEXT %1(s8)
708 // ... = ... %3(s32)
709 /// rewrites to:
710 // %2:_(s64) = G_SEXTLOAD ...
711 // %4:_(s8) = G_TRUNC %2:_(s32)
712 // %3:_(s64) = G_ZEXT %2:_(s8)
713 // ... = ... %3(s64)
714 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
715 InsertTruncAt);
716 }
717 continue;
718 }
719 // The use is (one of) the uses of the preferred use we chose earlier.
720 // We're going to update the load to def this value later so just erase
721 // the old extend.
723 UseMO->getParent()->eraseFromParent();
724 continue;
725 }
726
727 // The use isn't an extend. Truncate back to the type we originally loaded.
728 // This is free on many targets.
729 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
730 }
731
732 MI.getOperand(0).setReg(ChosenDstReg);
734}
735
737 BuildFnTy &MatchInfo) {
738 assert(MI.getOpcode() == TargetOpcode::G_AND);
739
740 // If we have the following code:
741 // %mask = G_CONSTANT 255
742 // %ld = G_LOAD %ptr, (load s16)
743 // %and = G_AND %ld, %mask
744 //
745 // Try to fold it into
746 // %ld = G_ZEXTLOAD %ptr, (load s8)
747
748 Register Dst = MI.getOperand(0).getReg();
749 if (MRI.getType(Dst).isVector())
750 return false;
751
752 auto MaybeMask =
753 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
754 if (!MaybeMask)
755 return false;
756
757 APInt MaskVal = MaybeMask->Value;
758
759 if (!MaskVal.isMask())
760 return false;
761
762 Register SrcReg = MI.getOperand(1).getReg();
763 // Don't use getOpcodeDef() here since intermediate instructions may have
764 // multiple users.
765 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
766 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
767 return false;
768
769 Register LoadReg = LoadMI->getDstReg();
770 LLT RegTy = MRI.getType(LoadReg);
771 Register PtrReg = LoadMI->getPointerReg();
772 unsigned RegSize = RegTy.getSizeInBits();
773 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
774 unsigned MaskSizeBits = MaskVal.countr_one();
775
776 // The mask may not be larger than the in-memory type, as it might cover sign
777 // extended bits
778 if (MaskSizeBits > LoadSizeBits.getValue())
779 return false;
780
781 // If the mask covers the whole destination register, there's nothing to
782 // extend
783 if (MaskSizeBits >= RegSize)
784 return false;
785
786 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
787 // at least byte loads. Avoid creating such loads here
788 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
789 return false;
790
791 const MachineMemOperand &MMO = LoadMI->getMMO();
792 LegalityQuery::MemDesc MemDesc(MMO);
793
794 // Don't modify the memory access size if this is atomic/volatile, but we can
795 // still adjust the opcode to indicate the high bit behavior.
796 if (LoadMI->isSimple())
797 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
798 else if (LoadSizeBits.getValue() > MaskSizeBits ||
799 LoadSizeBits.getValue() == RegSize)
800 return false;
801
802 // TODO: Could check if it's legal with the reduced or original memory size.
804 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
805 return false;
806
807 MatchInfo = [=](MachineIRBuilder &B) {
808 B.setInstrAndDebugLoc(*LoadMI);
809 auto &MF = B.getMF();
810 auto PtrInfo = MMO.getPointerInfo();
811 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
812 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
813 LoadMI->eraseFromParent();
814 };
815 return true;
816}
817
819 const MachineInstr &UseMI) {
820 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
821 "shouldn't consider debug uses");
822 assert(DefMI.getParent() == UseMI.getParent());
823 if (&DefMI == &UseMI)
824 return true;
825 const MachineBasicBlock &MBB = *DefMI.getParent();
826 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
827 return &MI == &DefMI || &MI == &UseMI;
828 });
829 if (DefOrUse == MBB.end())
830 llvm_unreachable("Block must contain both DefMI and UseMI!");
831 return &*DefOrUse == &DefMI;
832}
833
835 const MachineInstr &UseMI) {
836 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
837 "shouldn't consider debug uses");
838 if (MDT)
839 return MDT->dominates(&DefMI, &UseMI);
840 else if (DefMI.getParent() != UseMI.getParent())
841 return false;
842
843 return isPredecessor(DefMI, UseMI);
844}
845
847 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
848 Register SrcReg = MI.getOperand(1).getReg();
849 Register LoadUser = SrcReg;
850
851 if (MRI.getType(SrcReg).isVector())
852 return false;
853
854 Register TruncSrc;
855 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
856 LoadUser = TruncSrc;
857
858 uint64_t SizeInBits = MI.getOperand(2).getImm();
859 // If the source is a G_SEXTLOAD from the same bit width, then we don't
860 // need any extend at all, just a truncate.
861 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
862 // If truncating more than the original extended value, abort.
863 auto LoadSizeBits = LoadMI->getMemSizeInBits();
864 if (TruncSrc &&
865 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
866 return false;
867 if (LoadSizeBits == SizeInBits)
868 return true;
869 }
870 return false;
871}
872
874 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
875 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
876 MI.eraseFromParent();
877}
878
880 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
881 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
882
883 Register DstReg = MI.getOperand(0).getReg();
884 LLT RegTy = MRI.getType(DstReg);
885
886 // Only supports scalars for now.
887 if (RegTy.isVector())
888 return false;
889
890 Register SrcReg = MI.getOperand(1).getReg();
891 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
892 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
893 return false;
894
895 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
896
897 // If the sign extend extends from a narrower width than the load's width,
898 // then we can narrow the load width when we combine to a G_SEXTLOAD.
899 // Avoid widening the load at all.
900 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
901
902 // Don't generate G_SEXTLOADs with a < 1 byte width.
903 if (NewSizeBits < 8)
904 return false;
905 // Don't bother creating a non-power-2 sextload, it will likely be broken up
906 // anyway for most targets.
907 if (!isPowerOf2_32(NewSizeBits))
908 return false;
909
910 const MachineMemOperand &MMO = LoadDef->getMMO();
911 LegalityQuery::MemDesc MMDesc(MMO);
912
913 // Don't modify the memory access size if this is atomic/volatile, but we can
914 // still adjust the opcode to indicate the high bit behavior.
915 if (LoadDef->isSimple())
916 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
917 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
918 return false;
919
920 // TODO: Could check if it's legal with the reduced or original memory size.
921 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
922 {MRI.getType(LoadDef->getDstReg()),
923 MRI.getType(LoadDef->getPointerReg())},
924 {MMDesc}}))
925 return false;
926
927 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
928 return true;
929}
930
932 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
933 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
934 Register LoadReg;
935 unsigned ScalarSizeBits;
936 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
937 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
938
939 // If we have the following:
940 // %ld = G_LOAD %ptr, (load 2)
941 // %ext = G_SEXT_INREG %ld, 8
942 // ==>
943 // %ld = G_SEXTLOAD %ptr (load 1)
944
945 auto &MMO = LoadDef->getMMO();
947 auto &MF = Builder.getMF();
948 auto PtrInfo = MMO.getPointerInfo();
949 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
950 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
951 LoadDef->getPointerReg(), *NewMMO);
952 MI.eraseFromParent();
953}
954
956 if (Ty.isVector())
958 Ty.getNumElements());
959 return IntegerType::get(C, Ty.getSizeInBits());
960}
961
962/// Return true if 'MI' is a load or a store that may be fold it's address
963/// operand into the load / store addressing mode.
967 auto *MF = MI->getMF();
968 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
969 if (!Addr)
970 return false;
971
972 AM.HasBaseReg = true;
973 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
974 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
975 else
976 AM.Scale = 1; // [reg +/- reg]
977
978 return TLI.isLegalAddressingMode(
979 MF->getDataLayout(), AM,
980 getTypeForLLT(MI->getMMO().getMemoryType(),
981 MF->getFunction().getContext()),
982 MI->getMMO().getAddrSpace());
983}
984
985static unsigned getIndexedOpc(unsigned LdStOpc) {
986 switch (LdStOpc) {
987 case TargetOpcode::G_LOAD:
988 return TargetOpcode::G_INDEXED_LOAD;
989 case TargetOpcode::G_STORE:
990 return TargetOpcode::G_INDEXED_STORE;
991 case TargetOpcode::G_ZEXTLOAD:
992 return TargetOpcode::G_INDEXED_ZEXTLOAD;
993 case TargetOpcode::G_SEXTLOAD:
994 return TargetOpcode::G_INDEXED_SEXTLOAD;
995 default:
996 llvm_unreachable("Unexpected opcode");
997 }
998}
999
1000bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1001 // Check for legality.
1002 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1003 LLT Ty = MRI.getType(LdSt.getReg(0));
1004 LLT MemTy = LdSt.getMMO().getMemoryType();
1006 {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
1007 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1008 SmallVector<LLT> OpTys;
1009 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1010 OpTys = {PtrTy, Ty, Ty};
1011 else
1012 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1013
1014 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1015 return isLegal(Q);
1016}
1017
1019 "post-index-use-threshold", cl::Hidden, cl::init(32),
1020 cl::desc("Number of uses of a base pointer to check before it is no longer "
1021 "considered for post-indexing."));
1022
1023bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1025 bool &RematOffset) {
1026 // We're looking for the following pattern, for either load or store:
1027 // %baseptr:_(p0) = ...
1028 // G_STORE %val(s64), %baseptr(p0)
1029 // %offset:_(s64) = G_CONSTANT i64 -256
1030 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1031 const auto &TLI = getTargetLowering();
1032
1033 Register Ptr = LdSt.getPointerReg();
1034 // If the store is the only use, don't bother.
1035 if (MRI.hasOneNonDBGUse(Ptr))
1036 return false;
1037
1038 if (!isIndexedLoadStoreLegal(LdSt))
1039 return false;
1040
1041 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1042 return false;
1043
1044 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1045 auto *PtrDef = MRI.getVRegDef(Ptr);
1046
1047 unsigned NumUsesChecked = 0;
1048 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1049 if (++NumUsesChecked > PostIndexUseThreshold)
1050 return false; // Try to avoid exploding compile time.
1051
1052 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1053 // The use itself might be dead. This can happen during combines if DCE
1054 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1055 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1056 continue;
1057
1058 // Check the user of this isn't the store, otherwise we'd be generate a
1059 // indexed store defining its own use.
1060 if (StoredValDef == &Use)
1061 continue;
1062
1063 Offset = PtrAdd->getOffsetReg();
1064 if (!ForceLegalIndexing &&
1065 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1066 /*IsPre*/ false, MRI))
1067 continue;
1068
1069 // Make sure the offset calculation is before the potentially indexed op.
1070 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1071 RematOffset = false;
1072 if (!dominates(*OffsetDef, LdSt)) {
1073 // If the offset however is just a G_CONSTANT, we can always just
1074 // rematerialize it where we need it.
1075 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1076 continue;
1077 RematOffset = true;
1078 }
1079
1080 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1081 if (&BasePtrUse == PtrDef)
1082 continue;
1083
1084 // If the user is a later load/store that can be post-indexed, then don't
1085 // combine this one.
1086 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1087 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1088 dominates(LdSt, *BasePtrLdSt) &&
1089 isIndexedLoadStoreLegal(*BasePtrLdSt))
1090 return false;
1091
1092 // Now we're looking for the key G_PTR_ADD instruction, which contains
1093 // the offset add that we want to fold.
1094 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1095 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1096 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1097 // If the use is in a different block, then we may produce worse code
1098 // due to the extra register pressure.
1099 if (BaseUseUse.getParent() != LdSt.getParent())
1100 return false;
1101
1102 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1103 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1104 return false;
1105 }
1106 if (!dominates(LdSt, BasePtrUse))
1107 return false; // All use must be dominated by the load/store.
1108 }
1109 }
1110
1111 Addr = PtrAdd->getReg(0);
1112 Base = PtrAdd->getBaseReg();
1113 return true;
1114 }
1115
1116 return false;
1117}
1118
1119bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1121 auto &MF = *LdSt.getParent()->getParent();
1122 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1123
1124 Addr = LdSt.getPointerReg();
1127 return false;
1128
1129 if (!ForceLegalIndexing &&
1130 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1131 return false;
1132
1133 if (!isIndexedLoadStoreLegal(LdSt))
1134 return false;
1135
1137 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1138 return false;
1139
1140 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1141 // Would require a copy.
1142 if (Base == St->getValueReg())
1143 return false;
1144
1145 // We're expecting one use of Addr in MI, but it could also be the
1146 // value stored, which isn't actually dominated by the instruction.
1147 if (St->getValueReg() == Addr)
1148 return false;
1149 }
1150
1151 // Avoid increasing cross-block register pressure.
1152 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1153 if (AddrUse.getParent() != LdSt.getParent())
1154 return false;
1155
1156 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1157 // That might allow us to end base's liveness here by adjusting the constant.
1158 bool RealUse = false;
1159 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1160 if (!dominates(LdSt, AddrUse))
1161 return false; // All use must be dominated by the load/store.
1162
1163 // If Ptr may be folded in addressing mode of other use, then it's
1164 // not profitable to do this transformation.
1165 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1166 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1167 RealUse = true;
1168 } else {
1169 RealUse = true;
1170 }
1171 }
1172 return RealUse;
1173}
1174
1176 BuildFnTy &MatchInfo) {
1177 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1178
1179 // Check if there is a load that defines the vector being extracted from.
1180 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1181 if (!LoadMI)
1182 return false;
1183
1184 Register Vector = MI.getOperand(1).getReg();
1185 LLT VecEltTy = MRI.getType(Vector).getElementType();
1186
1187 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1188
1189 // Checking whether we should reduce the load width.
1191 return false;
1192
1193 // Check if the defining load is simple.
1194 if (!LoadMI->isSimple())
1195 return false;
1196
1197 // If the vector element type is not a multiple of a byte then we are unable
1198 // to correctly compute an address to load only the extracted element as a
1199 // scalar.
1200 if (!VecEltTy.isByteSized())
1201 return false;
1202
1203 // Check for load fold barriers between the extraction and the load.
1204 if (MI.getParent() != LoadMI->getParent())
1205 return false;
1206 const unsigned MaxIter = 20;
1207 unsigned Iter = 0;
1208 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1209 if (II->isLoadFoldBarrier())
1210 return false;
1211 if (Iter++ == MaxIter)
1212 return false;
1213 }
1214
1215 // Check if the new load that we are going to create is legal
1216 // if we are in the post-legalization phase.
1217 MachineMemOperand MMO = LoadMI->getMMO();
1218 Align Alignment = MMO.getAlign();
1219 MachinePointerInfo PtrInfo;
1221
1222 // Finding the appropriate PtrInfo if offset is a known constant.
1223 // This is required to create the memory operand for the narrowed load.
1224 // This machine memory operand object helps us infer about legality
1225 // before we proceed to combine the instruction.
1226 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1227 int Elt = CVal->getZExtValue();
1228 // FIXME: should be (ABI size)*Elt.
1229 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1230 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1231 } else {
1232 // Discard the pointer info except the address space because the memory
1233 // operand can't represent this new access since the offset is variable.
1234 Offset = VecEltTy.getSizeInBits() / 8;
1236 }
1237
1238 Alignment = commonAlignment(Alignment, Offset);
1239
1240 Register VecPtr = LoadMI->getPointerReg();
1241 LLT PtrTy = MRI.getType(VecPtr);
1242
1243 MachineFunction &MF = *MI.getMF();
1244 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1245
1246 LegalityQuery::MemDesc MMDesc(*NewMMO);
1247
1248 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1249
1251 return false;
1252
1253 // Load must be allowed and fast on the target.
1255 auto &DL = MF.getDataLayout();
1256 unsigned Fast = 0;
1257 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1258 &Fast) ||
1259 !Fast)
1260 return false;
1261
1262 Register Result = MI.getOperand(0).getReg();
1263 Register Index = MI.getOperand(2).getReg();
1264
1265 MatchInfo = [=](MachineIRBuilder &B) {
1266 GISelObserverWrapper DummyObserver;
1267 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1268 //// Get pointer to the vector element.
1269 Register finalPtr = Helper.getVectorElementPointer(
1270 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1271 Index);
1272 // New G_LOAD instruction.
1273 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1274 // Remove original GLOAD instruction.
1275 LoadMI->eraseFromParent();
1276 };
1277
1278 return true;
1279}
1280
1283 auto &LdSt = cast<GLoadStore>(MI);
1284
1285 if (LdSt.isAtomic())
1286 return false;
1287
1288 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1289 MatchInfo.Offset);
1290 if (!MatchInfo.IsPre &&
1291 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1292 MatchInfo.Offset, MatchInfo.RematOffset))
1293 return false;
1294
1295 return true;
1296}
1297
1300 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1301 unsigned Opcode = MI.getOpcode();
1302 bool IsStore = Opcode == TargetOpcode::G_STORE;
1303 unsigned NewOpcode = getIndexedOpc(Opcode);
1304
1305 // If the offset constant didn't happen to dominate the load/store, we can
1306 // just clone it as needed.
1307 if (MatchInfo.RematOffset) {
1308 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1309 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1310 *OldCst->getOperand(1).getCImm());
1311 MatchInfo.Offset = NewCst.getReg(0);
1312 }
1313
1314 auto MIB = Builder.buildInstr(NewOpcode);
1315 if (IsStore) {
1316 MIB.addDef(MatchInfo.Addr);
1317 MIB.addUse(MI.getOperand(0).getReg());
1318 } else {
1319 MIB.addDef(MI.getOperand(0).getReg());
1320 MIB.addDef(MatchInfo.Addr);
1321 }
1322
1323 MIB.addUse(MatchInfo.Base);
1324 MIB.addUse(MatchInfo.Offset);
1325 MIB.addImm(MatchInfo.IsPre);
1326 MIB->cloneMemRefs(*MI.getMF(), MI);
1327 MI.eraseFromParent();
1328 AddrDef.eraseFromParent();
1329
1330 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1331}
1332
1334 MachineInstr *&OtherMI) {
1335 unsigned Opcode = MI.getOpcode();
1336 bool IsDiv, IsSigned;
1337
1338 switch (Opcode) {
1339 default:
1340 llvm_unreachable("Unexpected opcode!");
1341 case TargetOpcode::G_SDIV:
1342 case TargetOpcode::G_UDIV: {
1343 IsDiv = true;
1344 IsSigned = Opcode == TargetOpcode::G_SDIV;
1345 break;
1346 }
1347 case TargetOpcode::G_SREM:
1348 case TargetOpcode::G_UREM: {
1349 IsDiv = false;
1350 IsSigned = Opcode == TargetOpcode::G_SREM;
1351 break;
1352 }
1353 }
1354
1355 Register Src1 = MI.getOperand(1).getReg();
1356 unsigned DivOpcode, RemOpcode, DivremOpcode;
1357 if (IsSigned) {
1358 DivOpcode = TargetOpcode::G_SDIV;
1359 RemOpcode = TargetOpcode::G_SREM;
1360 DivremOpcode = TargetOpcode::G_SDIVREM;
1361 } else {
1362 DivOpcode = TargetOpcode::G_UDIV;
1363 RemOpcode = TargetOpcode::G_UREM;
1364 DivremOpcode = TargetOpcode::G_UDIVREM;
1365 }
1366
1367 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1368 return false;
1369
1370 // Combine:
1371 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1372 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1373 // into:
1374 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1375
1376 // Combine:
1377 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1378 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1379 // into:
1380 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1381
1382 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1383 if (MI.getParent() == UseMI.getParent() &&
1384 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1385 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1386 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1387 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1388 OtherMI = &UseMI;
1389 return true;
1390 }
1391 }
1392
1393 return false;
1394}
1395
1397 MachineInstr *&OtherMI) {
1398 unsigned Opcode = MI.getOpcode();
1399 assert(OtherMI && "OtherMI shouldn't be empty.");
1400
1401 Register DestDivReg, DestRemReg;
1402 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1403 DestDivReg = MI.getOperand(0).getReg();
1404 DestRemReg = OtherMI->getOperand(0).getReg();
1405 } else {
1406 DestDivReg = OtherMI->getOperand(0).getReg();
1407 DestRemReg = MI.getOperand(0).getReg();
1408 }
1409
1410 bool IsSigned =
1411 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1412
1413 // Check which instruction is first in the block so we don't break def-use
1414 // deps by "moving" the instruction incorrectly. Also keep track of which
1415 // instruction is first so we pick it's operands, avoiding use-before-def
1416 // bugs.
1417 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1418 Builder.setInstrAndDebugLoc(*FirstInst);
1419
1420 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1421 : TargetOpcode::G_UDIVREM,
1422 {DestDivReg, DestRemReg},
1423 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1424 MI.eraseFromParent();
1425 OtherMI->eraseFromParent();
1426}
1427
1429 MachineInstr *&BrCond) {
1430 assert(MI.getOpcode() == TargetOpcode::G_BR);
1431
1432 // Try to match the following:
1433 // bb1:
1434 // G_BRCOND %c1, %bb2
1435 // G_BR %bb3
1436 // bb2:
1437 // ...
1438 // bb3:
1439
1440 // The above pattern does not have a fall through to the successor bb2, always
1441 // resulting in a branch no matter which path is taken. Here we try to find
1442 // and replace that pattern with conditional branch to bb3 and otherwise
1443 // fallthrough to bb2. This is generally better for branch predictors.
1444
1445 MachineBasicBlock *MBB = MI.getParent();
1447 if (BrIt == MBB->begin())
1448 return false;
1449 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1450
1451 BrCond = &*std::prev(BrIt);
1452 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1453 return false;
1454
1455 // Check that the next block is the conditional branch target. Also make sure
1456 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1457 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1458 return BrCondTarget != MI.getOperand(0).getMBB() &&
1459 MBB->isLayoutSuccessor(BrCondTarget);
1460}
1461
1463 MachineInstr *&BrCond) {
1464 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1466 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1467 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1468 // this to i1 only since we might not know for sure what kind of
1469 // compare generated the condition value.
1470 auto True = Builder.buildConstant(
1471 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1472 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1473
1474 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1476 MI.getOperand(0).setMBB(FallthroughBB);
1478
1479 // Change the conditional branch to use the inverted condition and
1480 // new target block.
1481 Observer.changingInstr(*BrCond);
1482 BrCond->getOperand(0).setReg(Xor.getReg(0));
1483 BrCond->getOperand(1).setMBB(BrTarget);
1484 Observer.changedInstr(*BrCond);
1485}
1486
1487
1489 MachineIRBuilder HelperBuilder(MI);
1490 GISelObserverWrapper DummyObserver;
1491 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1492 return Helper.lowerMemcpyInline(MI) ==
1494}
1495
1497 MachineIRBuilder HelperBuilder(MI);
1498 GISelObserverWrapper DummyObserver;
1499 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1500 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1502}
1503
1505 const MachineRegisterInfo &MRI,
1506 const APFloat &Val) {
1507 APFloat Result(Val);
1508 switch (MI.getOpcode()) {
1509 default:
1510 llvm_unreachable("Unexpected opcode!");
1511 case TargetOpcode::G_FNEG: {
1512 Result.changeSign();
1513 return Result;
1514 }
1515 case TargetOpcode::G_FABS: {
1516 Result.clearSign();
1517 return Result;
1518 }
1519 case TargetOpcode::G_FPTRUNC: {
1520 bool Unused;
1521 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1523 &Unused);
1524 return Result;
1525 }
1526 case TargetOpcode::G_FSQRT: {
1527 bool Unused;
1529 &Unused);
1530 Result = APFloat(sqrt(Result.convertToDouble()));
1531 break;
1532 }
1533 case TargetOpcode::G_FLOG2: {
1534 bool Unused;
1536 &Unused);
1537 Result = APFloat(log2(Result.convertToDouble()));
1538 break;
1539 }
1540 }
1541 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1542 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1543 // `G_FLOG2` reach here.
1544 bool Unused;
1545 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1546 return Result;
1547}
1548
1550 const ConstantFP *Cst) {
1551 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1552 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1553 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1554 MI.eraseFromParent();
1555}
1556
1558 PtrAddChain &MatchInfo) {
1559 // We're trying to match the following pattern:
1560 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1561 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1562 // -->
1563 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1564
1565 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1566 return false;
1567
1568 Register Add2 = MI.getOperand(1).getReg();
1569 Register Imm1 = MI.getOperand(2).getReg();
1570 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1571 if (!MaybeImmVal)
1572 return false;
1573
1574 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1575 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1576 return false;
1577
1578 Register Base = Add2Def->getOperand(1).getReg();
1579 Register Imm2 = Add2Def->getOperand(2).getReg();
1580 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1581 if (!MaybeImm2Val)
1582 return false;
1583
1584 // Check if the new combined immediate forms an illegal addressing mode.
1585 // Do not combine if it was legal before but would get illegal.
1586 // To do so, we need to find a load/store user of the pointer to get
1587 // the access type.
1588 Type *AccessTy = nullptr;
1589 auto &MF = *MI.getMF();
1590 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1591 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1592 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1593 MF.getFunction().getContext());
1594 break;
1595 }
1596 }
1598 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1599 AMNew.BaseOffs = CombinedImm.getSExtValue();
1600 if (AccessTy) {
1601 AMNew.HasBaseReg = true;
1603 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1604 AMOld.HasBaseReg = true;
1605 unsigned AS = MRI.getType(Add2).getAddressSpace();
1606 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1607 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1608 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1609 return false;
1610 }
1611
1612 // Pass the combined immediate to the apply function.
1613 MatchInfo.Imm = AMNew.BaseOffs;
1614 MatchInfo.Base = Base;
1615 MatchInfo.Bank = getRegBank(Imm2);
1616 return true;
1617}
1618
1620 PtrAddChain &MatchInfo) {
1621 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1622 MachineIRBuilder MIB(MI);
1623 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1624 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1625 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1627 MI.getOperand(1).setReg(MatchInfo.Base);
1628 MI.getOperand(2).setReg(NewOffset.getReg(0));
1630}
1631
1633 RegisterImmPair &MatchInfo) {
1634 // We're trying to match the following pattern with any of
1635 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1636 // %t1 = SHIFT %base, G_CONSTANT imm1
1637 // %root = SHIFT %t1, G_CONSTANT imm2
1638 // -->
1639 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1640
1641 unsigned Opcode = MI.getOpcode();
1642 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1643 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1644 Opcode == TargetOpcode::G_USHLSAT) &&
1645 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1646
1647 Register Shl2 = MI.getOperand(1).getReg();
1648 Register Imm1 = MI.getOperand(2).getReg();
1649 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1650 if (!MaybeImmVal)
1651 return false;
1652
1653 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1654 if (Shl2Def->getOpcode() != Opcode)
1655 return false;
1656
1657 Register Base = Shl2Def->getOperand(1).getReg();
1658 Register Imm2 = Shl2Def->getOperand(2).getReg();
1659 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1660 if (!MaybeImm2Val)
1661 return false;
1662
1663 // Pass the combined immediate to the apply function.
1664 MatchInfo.Imm =
1665 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1666 MatchInfo.Reg = Base;
1667
1668 // There is no simple replacement for a saturating unsigned left shift that
1669 // exceeds the scalar size.
1670 if (Opcode == TargetOpcode::G_USHLSAT &&
1671 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1672 return false;
1673
1674 return true;
1675}
1676
1678 RegisterImmPair &MatchInfo) {
1679 unsigned Opcode = MI.getOpcode();
1680 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1681 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1682 Opcode == TargetOpcode::G_USHLSAT) &&
1683 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1684
1685 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1686 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1687 auto Imm = MatchInfo.Imm;
1688
1689 if (Imm >= ScalarSizeInBits) {
1690 // Any logical shift that exceeds scalar size will produce zero.
1691 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1692 Builder.buildConstant(MI.getOperand(0), 0);
1693 MI.eraseFromParent();
1694 return;
1695 }
1696 // Arithmetic shift and saturating signed left shift have no effect beyond
1697 // scalar size.
1698 Imm = ScalarSizeInBits - 1;
1699 }
1700
1701 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1702 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1704 MI.getOperand(1).setReg(MatchInfo.Reg);
1705 MI.getOperand(2).setReg(NewImm);
1707}
1708
1710 ShiftOfShiftedLogic &MatchInfo) {
1711 // We're trying to match the following pattern with any of
1712 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1713 // with any of G_AND/G_OR/G_XOR logic instructions.
1714 // %t1 = SHIFT %X, G_CONSTANT C0
1715 // %t2 = LOGIC %t1, %Y
1716 // %root = SHIFT %t2, G_CONSTANT C1
1717 // -->
1718 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1719 // %t4 = SHIFT %Y, G_CONSTANT C1
1720 // %root = LOGIC %t3, %t4
1721 unsigned ShiftOpcode = MI.getOpcode();
1722 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1723 ShiftOpcode == TargetOpcode::G_ASHR ||
1724 ShiftOpcode == TargetOpcode::G_LSHR ||
1725 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1726 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1727 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1728
1729 // Match a one-use bitwise logic op.
1730 Register LogicDest = MI.getOperand(1).getReg();
1731 if (!MRI.hasOneNonDBGUse(LogicDest))
1732 return false;
1733
1734 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1735 unsigned LogicOpcode = LogicMI->getOpcode();
1736 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1737 LogicOpcode != TargetOpcode::G_XOR)
1738 return false;
1739
1740 // Find a matching one-use shift by constant.
1741 const Register C1 = MI.getOperand(2).getReg();
1742 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1743 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1744 return false;
1745
1746 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1747
1748 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1749 // Shift should match previous one and should be a one-use.
1750 if (MI->getOpcode() != ShiftOpcode ||
1751 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1752 return false;
1753
1754 // Must be a constant.
1755 auto MaybeImmVal =
1756 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1757 if (!MaybeImmVal)
1758 return false;
1759
1760 ShiftVal = MaybeImmVal->Value.getSExtValue();
1761 return true;
1762 };
1763
1764 // Logic ops are commutative, so check each operand for a match.
1765 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1766 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1767 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1768 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1769 uint64_t C0Val;
1770
1771 if (matchFirstShift(LogicMIOp1, C0Val)) {
1772 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1773 MatchInfo.Shift2 = LogicMIOp1;
1774 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1775 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1776 MatchInfo.Shift2 = LogicMIOp2;
1777 } else
1778 return false;
1779
1780 MatchInfo.ValSum = C0Val + C1Val;
1781
1782 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1783 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1784 return false;
1785
1786 MatchInfo.Logic = LogicMI;
1787 return true;
1788}
1789
1791 ShiftOfShiftedLogic &MatchInfo) {
1792 unsigned Opcode = MI.getOpcode();
1793 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1794 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1795 Opcode == TargetOpcode::G_SSHLSAT) &&
1796 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1797
1798 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1799 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1800
1801 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1802
1803 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1804 Register Shift1 =
1805 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1806
1807 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1808 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1809 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1810 // remove old shift1. And it will cause crash later. So erase it earlier to
1811 // avoid the crash.
1812 MatchInfo.Shift2->eraseFromParent();
1813
1814 Register Shift2Const = MI.getOperand(2).getReg();
1815 Register Shift2 = Builder
1816 .buildInstr(Opcode, {DestType},
1817 {MatchInfo.LogicNonShiftReg, Shift2Const})
1818 .getReg(0);
1819
1820 Register Dest = MI.getOperand(0).getReg();
1821 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1822
1823 // This was one use so it's safe to remove it.
1824 MatchInfo.Logic->eraseFromParent();
1825
1826 MI.eraseFromParent();
1827}
1828
1830 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1831 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1832 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1833 auto &Shl = cast<GenericMachineInstr>(MI);
1834 Register DstReg = Shl.getReg(0);
1835 Register SrcReg = Shl.getReg(1);
1836 Register ShiftReg = Shl.getReg(2);
1837 Register X, C1;
1838
1839 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1840 return false;
1841
1842 if (!mi_match(SrcReg, MRI,
1844 m_GOr(m_Reg(X), m_Reg(C1))))))
1845 return false;
1846
1847 APInt C1Val, C2Val;
1848 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1849 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1850 return false;
1851
1852 auto *SrcDef = MRI.getVRegDef(SrcReg);
1853 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1854 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1855 LLT SrcTy = MRI.getType(SrcReg);
1856 MatchInfo = [=](MachineIRBuilder &B) {
1857 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1858 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
1859 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
1860 };
1861 return true;
1862}
1863
1865 unsigned &ShiftVal) {
1866 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1867 auto MaybeImmVal =
1868 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1869 if (!MaybeImmVal)
1870 return false;
1871
1872 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1873 return (static_cast<int32_t>(ShiftVal) != -1);
1874}
1875
1877 unsigned &ShiftVal) {
1878 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1879 MachineIRBuilder MIB(MI);
1880 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1881 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1883 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1884 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1886}
1887
1888// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1890 RegisterImmPair &MatchData) {
1891 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1892 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
1893 return false;
1894
1895 Register LHS = MI.getOperand(1).getReg();
1896
1897 Register ExtSrc;
1898 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1899 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1900 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1901 return false;
1902
1903 Register RHS = MI.getOperand(2).getReg();
1904 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
1905 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
1906 if (!MaybeShiftAmtVal)
1907 return false;
1908
1909 if (LI) {
1910 LLT SrcTy = MRI.getType(ExtSrc);
1911
1912 // We only really care about the legality with the shifted value. We can
1913 // pick any type the constant shift amount, so ask the target what to
1914 // use. Otherwise we would have to guess and hope it is reported as legal.
1915 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1916 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1917 return false;
1918 }
1919
1920 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
1921 MatchData.Reg = ExtSrc;
1922 MatchData.Imm = ShiftAmt;
1923
1924 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
1925 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
1926 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
1927}
1928
1930 const RegisterImmPair &MatchData) {
1931 Register ExtSrcReg = MatchData.Reg;
1932 int64_t ShiftAmtVal = MatchData.Imm;
1933
1934 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1935 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1936 auto NarrowShift =
1937 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1938 Builder.buildZExt(MI.getOperand(0), NarrowShift);
1939 MI.eraseFromParent();
1940}
1941
1943 Register &MatchInfo) {
1944 GMerge &Merge = cast<GMerge>(MI);
1945 SmallVector<Register, 16> MergedValues;
1946 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
1947 MergedValues.emplace_back(Merge.getSourceReg(I));
1948
1949 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
1950 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
1951 return false;
1952
1953 for (unsigned I = 0; I < MergedValues.size(); ++I)
1954 if (MergedValues[I] != Unmerge->getReg(I))
1955 return false;
1956
1957 MatchInfo = Unmerge->getSourceReg();
1958 return true;
1959}
1960
1962 const MachineRegisterInfo &MRI) {
1963 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
1964 ;
1965
1966 return Reg;
1967}
1968
1971 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1972 "Expected an unmerge");
1973 auto &Unmerge = cast<GUnmerge>(MI);
1974 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
1975
1976 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
1977 if (!SrcInstr)
1978 return false;
1979
1980 // Check the source type of the merge.
1981 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
1982 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
1983 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
1984 if (SrcMergeTy != Dst0Ty && !SameSize)
1985 return false;
1986 // They are the same now (modulo a bitcast).
1987 // We can collect all the src registers.
1988 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
1989 Operands.push_back(SrcInstr->getSourceReg(Idx));
1990 return true;
1991}
1992
1995 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1996 "Expected an unmerge");
1997 assert((MI.getNumOperands() - 1 == Operands.size()) &&
1998 "Not enough operands to replace all defs");
1999 unsigned NumElems = MI.getNumOperands() - 1;
2000
2001 LLT SrcTy = MRI.getType(Operands[0]);
2002 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2003 bool CanReuseInputDirectly = DstTy == SrcTy;
2004 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2005 Register DstReg = MI.getOperand(Idx).getReg();
2006 Register SrcReg = Operands[Idx];
2007
2008 // This combine may run after RegBankSelect, so we need to be aware of
2009 // register banks.
2010 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2011 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2012 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2013 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2014 }
2015
2016 if (CanReuseInputDirectly)
2017 replaceRegWith(MRI, DstReg, SrcReg);
2018 else
2019 Builder.buildCast(DstReg, SrcReg);
2020 }
2021 MI.eraseFromParent();
2022}
2023
2025 SmallVectorImpl<APInt> &Csts) {
2026 unsigned SrcIdx = MI.getNumOperands() - 1;
2027 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2028 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2029 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2030 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2031 return false;
2032 // Break down the big constant in smaller ones.
2033 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2034 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2035 ? CstVal.getCImm()->getValue()
2036 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2037
2038 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2039 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2040 // Unmerge a constant.
2041 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2042 Csts.emplace_back(Val.trunc(ShiftAmt));
2043 Val = Val.lshr(ShiftAmt);
2044 }
2045
2046 return true;
2047}
2048
2050 SmallVectorImpl<APInt> &Csts) {
2051 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2052 "Expected an unmerge");
2053 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2054 "Not enough operands to replace all defs");
2055 unsigned NumElems = MI.getNumOperands() - 1;
2056 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2057 Register DstReg = MI.getOperand(Idx).getReg();
2058 Builder.buildConstant(DstReg, Csts[Idx]);
2059 }
2060
2061 MI.eraseFromParent();
2062}
2063
2065 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2066 unsigned SrcIdx = MI.getNumOperands() - 1;
2067 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2068 MatchInfo = [&MI](MachineIRBuilder &B) {
2069 unsigned NumElems = MI.getNumOperands() - 1;
2070 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2071 Register DstReg = MI.getOperand(Idx).getReg();
2072 B.buildUndef(DstReg);
2073 }
2074 };
2075 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2076}
2077
2079 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2080 "Expected an unmerge");
2081 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2082 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2083 return false;
2084 // Check that all the lanes are dead except the first one.
2085 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2086 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2087 return false;
2088 }
2089 return true;
2090}
2091
2093 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2094 Register Dst0Reg = MI.getOperand(0).getReg();
2095 Builder.buildTrunc(Dst0Reg, SrcReg);
2096 MI.eraseFromParent();
2097}
2098
2100 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2101 "Expected an unmerge");
2102 Register Dst0Reg = MI.getOperand(0).getReg();
2103 LLT Dst0Ty = MRI.getType(Dst0Reg);
2104 // G_ZEXT on vector applies to each lane, so it will
2105 // affect all destinations. Therefore we won't be able
2106 // to simplify the unmerge to just the first definition.
2107 if (Dst0Ty.isVector())
2108 return false;
2109 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2110 LLT SrcTy = MRI.getType(SrcReg);
2111 if (SrcTy.isVector())
2112 return false;
2113
2114 Register ZExtSrcReg;
2115 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2116 return false;
2117
2118 // Finally we can replace the first definition with
2119 // a zext of the source if the definition is big enough to hold
2120 // all of ZExtSrc bits.
2121 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2122 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2123}
2124
2126 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2127 "Expected an unmerge");
2128
2129 Register Dst0Reg = MI.getOperand(0).getReg();
2130
2131 MachineInstr *ZExtInstr =
2132 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2133 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2134 "Expecting a G_ZEXT");
2135
2136 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2137 LLT Dst0Ty = MRI.getType(Dst0Reg);
2138 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2139
2140 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2141 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2142 } else {
2143 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2144 "ZExt src doesn't fit in destination");
2145 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2146 }
2147
2148 Register ZeroReg;
2149 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2150 if (!ZeroReg)
2151 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2152 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2153 }
2154 MI.eraseFromParent();
2155}
2156
2158 unsigned TargetShiftSize,
2159 unsigned &ShiftVal) {
2160 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2161 MI.getOpcode() == TargetOpcode::G_LSHR ||
2162 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2163
2164 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2165 if (Ty.isVector()) // TODO:
2166 return false;
2167
2168 // Don't narrow further than the requested size.
2169 unsigned Size = Ty.getSizeInBits();
2170 if (Size <= TargetShiftSize)
2171 return false;
2172
2173 auto MaybeImmVal =
2174 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2175 if (!MaybeImmVal)
2176 return false;
2177
2178 ShiftVal = MaybeImmVal->Value.getSExtValue();
2179 return ShiftVal >= Size / 2 && ShiftVal < Size;
2180}
2181
2183 const unsigned &ShiftVal) {
2184 Register DstReg = MI.getOperand(0).getReg();
2185 Register SrcReg = MI.getOperand(1).getReg();
2186 LLT Ty = MRI.getType(SrcReg);
2187 unsigned Size = Ty.getSizeInBits();
2188 unsigned HalfSize = Size / 2;
2189 assert(ShiftVal >= HalfSize);
2190
2191 LLT HalfTy = LLT::scalar(HalfSize);
2192
2193 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2194 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2195
2196 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2197 Register Narrowed = Unmerge.getReg(1);
2198
2199 // dst = G_LSHR s64:x, C for C >= 32
2200 // =>
2201 // lo, hi = G_UNMERGE_VALUES x
2202 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2203
2204 if (NarrowShiftAmt != 0) {
2205 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2206 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2207 }
2208
2209 auto Zero = Builder.buildConstant(HalfTy, 0);
2210 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2211 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2212 Register Narrowed = Unmerge.getReg(0);
2213 // dst = G_SHL s64:x, C for C >= 32
2214 // =>
2215 // lo, hi = G_UNMERGE_VALUES x
2216 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2217 if (NarrowShiftAmt != 0) {
2218 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2219 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2220 }
2221
2222 auto Zero = Builder.buildConstant(HalfTy, 0);
2223 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2224 } else {
2225 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2226 auto Hi = Builder.buildAShr(
2227 HalfTy, Unmerge.getReg(1),
2228 Builder.buildConstant(HalfTy, HalfSize - 1));
2229
2230 if (ShiftVal == HalfSize) {
2231 // (G_ASHR i64:x, 32) ->
2232 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2233 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2234 } else if (ShiftVal == Size - 1) {
2235 // Don't need a second shift.
2236 // (G_ASHR i64:x, 63) ->
2237 // %narrowed = (G_ASHR hi_32(x), 31)
2238 // G_MERGE_VALUES %narrowed, %narrowed
2239 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2240 } else {
2241 auto Lo = Builder.buildAShr(
2242 HalfTy, Unmerge.getReg(1),
2243 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2244
2245 // (G_ASHR i64:x, C) ->, for C >= 32
2246 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2247 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2248 }
2249 }
2250
2251 MI.eraseFromParent();
2252}
2253
2255 unsigned TargetShiftAmount) {
2256 unsigned ShiftAmt;
2257 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2258 applyCombineShiftToUnmerge(MI, ShiftAmt);
2259 return true;
2260 }
2261
2262 return false;
2263}
2264
2266 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2267 Register DstReg = MI.getOperand(0).getReg();
2268 LLT DstTy = MRI.getType(DstReg);
2269 Register SrcReg = MI.getOperand(1).getReg();
2270 return mi_match(SrcReg, MRI,
2271 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2272}
2273
2275 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2276 Register DstReg = MI.getOperand(0).getReg();
2277 Builder.buildCopy(DstReg, Reg);
2278 MI.eraseFromParent();
2279}
2280
2282 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2283 Register DstReg = MI.getOperand(0).getReg();
2284 Builder.buildZExtOrTrunc(DstReg, Reg);
2285 MI.eraseFromParent();
2286}
2287
2289 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2290 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2291 Register LHS = MI.getOperand(1).getReg();
2292 Register RHS = MI.getOperand(2).getReg();
2293 LLT IntTy = MRI.getType(LHS);
2294
2295 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2296 // instruction.
2297 PtrReg.second = false;
2298 for (Register SrcReg : {LHS, RHS}) {
2299 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2300 // Don't handle cases where the integer is implicitly converted to the
2301 // pointer width.
2302 LLT PtrTy = MRI.getType(PtrReg.first);
2303 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2304 return true;
2305 }
2306
2307 PtrReg.second = true;
2308 }
2309
2310 return false;
2311}
2312
2314 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2315 Register Dst = MI.getOperand(0).getReg();
2316 Register LHS = MI.getOperand(1).getReg();
2317 Register RHS = MI.getOperand(2).getReg();
2318
2319 const bool DoCommute = PtrReg.second;
2320 if (DoCommute)
2321 std::swap(LHS, RHS);
2322 LHS = PtrReg.first;
2323
2324 LLT PtrTy = MRI.getType(LHS);
2325
2326 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2327 Builder.buildPtrToInt(Dst, PtrAdd);
2328 MI.eraseFromParent();
2329}
2330
2332 APInt &NewCst) {
2333 auto &PtrAdd = cast<GPtrAdd>(MI);
2334 Register LHS = PtrAdd.getBaseReg();
2335 Register RHS = PtrAdd.getOffsetReg();
2337
2338 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2339 APInt Cst;
2340 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2341 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2342 // G_INTTOPTR uses zero-extension
2343 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2344 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2345 return true;
2346 }
2347 }
2348
2349 return false;
2350}
2351
2353 APInt &NewCst) {
2354 auto &PtrAdd = cast<GPtrAdd>(MI);
2355 Register Dst = PtrAdd.getReg(0);
2356
2357 Builder.buildConstant(Dst, NewCst);
2358 PtrAdd.eraseFromParent();
2359}
2360
2362 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2363 Register DstReg = MI.getOperand(0).getReg();
2364 Register SrcReg = MI.getOperand(1).getReg();
2365 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2366 if (OriginalSrcReg.isValid())
2367 SrcReg = OriginalSrcReg;
2368 LLT DstTy = MRI.getType(DstReg);
2369 return mi_match(SrcReg, MRI,
2370 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2371}
2372
2374 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2375 Register DstReg = MI.getOperand(0).getReg();
2376 Register SrcReg = MI.getOperand(1).getReg();
2377 LLT DstTy = MRI.getType(DstReg);
2378 if (mi_match(SrcReg, MRI,
2379 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2380 unsigned DstSize = DstTy.getScalarSizeInBits();
2381 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2382 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2383 }
2384 return false;
2385}
2386
2388 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2389 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2390 MI.getOpcode() == TargetOpcode::G_SEXT ||
2391 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2392 "Expected a G_[ASZ]EXT");
2393 Register SrcReg = MI.getOperand(1).getReg();
2394 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2395 if (OriginalSrcReg.isValid())
2396 SrcReg = OriginalSrcReg;
2397 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2398 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2399 unsigned Opc = MI.getOpcode();
2400 unsigned SrcOpc = SrcMI->getOpcode();
2401 if (Opc == SrcOpc ||
2402 (Opc == TargetOpcode::G_ANYEXT &&
2403 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2404 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2405 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2406 return true;
2407 }
2408 return false;
2409}
2410
2412 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2413 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2414 MI.getOpcode() == TargetOpcode::G_SEXT ||
2415 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2416 "Expected a G_[ASZ]EXT");
2417
2418 Register Reg = std::get<0>(MatchInfo);
2419 unsigned SrcExtOp = std::get<1>(MatchInfo);
2420
2421 // Combine exts with the same opcode.
2422 if (MI.getOpcode() == SrcExtOp) {
2424 MI.getOperand(1).setReg(Reg);
2426 return;
2427 }
2428
2429 // Combine:
2430 // - anyext([sz]ext x) to [sz]ext x
2431 // - sext(zext x) to zext x
2432 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2433 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2434 SrcExtOp == TargetOpcode::G_ZEXT)) {
2435 Register DstReg = MI.getOperand(0).getReg();
2436 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2437 MI.eraseFromParent();
2438 }
2439}
2440
2442 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2443 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2444 Register SrcReg = MI.getOperand(1).getReg();
2445 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2446 unsigned SrcOpc = SrcMI->getOpcode();
2447 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2448 SrcOpc == TargetOpcode::G_ZEXT) {
2449 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2450 return true;
2451 }
2452 return false;
2453}
2454
2456 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2457 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2458 Register SrcReg = MatchInfo.first;
2459 unsigned SrcExtOp = MatchInfo.second;
2460 Register DstReg = MI.getOperand(0).getReg();
2461 LLT SrcTy = MRI.getType(SrcReg);
2462 LLT DstTy = MRI.getType(DstReg);
2463 if (SrcTy == DstTy) {
2464 MI.eraseFromParent();
2465 replaceRegWith(MRI, DstReg, SrcReg);
2466 return;
2467 }
2468 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2469 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2470 else
2471 Builder.buildTrunc(DstReg, SrcReg);
2472 MI.eraseFromParent();
2473}
2474
2476 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2477 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2478
2479 // ShiftTy > 32 > TruncTy -> 32
2480 if (ShiftSize > 32 && TruncSize < 32)
2481 return ShiftTy.changeElementSize(32);
2482
2483 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2484 // Some targets like it, some don't, some only like it under certain
2485 // conditions/processor versions, etc.
2486 // A TL hook might be needed for this.
2487
2488 // Don't combine
2489 return ShiftTy;
2490}
2491
2493 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2494 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2495 Register DstReg = MI.getOperand(0).getReg();
2496 Register SrcReg = MI.getOperand(1).getReg();
2497
2498 if (!MRI.hasOneNonDBGUse(SrcReg))
2499 return false;
2500
2501 LLT SrcTy = MRI.getType(SrcReg);
2502 LLT DstTy = MRI.getType(DstReg);
2503
2504 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2505 const auto &TL = getTargetLowering();
2506
2507 LLT NewShiftTy;
2508 switch (SrcMI->getOpcode()) {
2509 default:
2510 return false;
2511 case TargetOpcode::G_SHL: {
2512 NewShiftTy = DstTy;
2513
2514 // Make sure new shift amount is legal.
2515 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2516 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2517 return false;
2518 break;
2519 }
2520 case TargetOpcode::G_LSHR:
2521 case TargetOpcode::G_ASHR: {
2522 // For right shifts, we conservatively do not do the transform if the TRUNC
2523 // has any STORE users. The reason is that if we change the type of the
2524 // shift, we may break the truncstore combine.
2525 //
2526 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2527 for (auto &User : MRI.use_instructions(DstReg))
2528 if (User.getOpcode() == TargetOpcode::G_STORE)
2529 return false;
2530
2531 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2532 if (NewShiftTy == SrcTy)
2533 return false;
2534
2535 // Make sure we won't lose information by truncating the high bits.
2536 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2537 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2538 DstTy.getScalarSizeInBits()))
2539 return false;
2540 break;
2541 }
2542 }
2543
2545 {SrcMI->getOpcode(),
2546 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2547 return false;
2548
2549 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2550 return true;
2551}
2552
2554 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2555 MachineInstr *ShiftMI = MatchInfo.first;
2556 LLT NewShiftTy = MatchInfo.second;
2557
2558 Register Dst = MI.getOperand(0).getReg();
2559 LLT DstTy = MRI.getType(Dst);
2560
2561 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2562 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2563 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2564
2565 Register NewShift =
2566 Builder
2567 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2568 .getReg(0);
2569
2570 if (NewShiftTy == DstTy)
2571 replaceRegWith(MRI, Dst, NewShift);
2572 else
2573 Builder.buildTrunc(Dst, NewShift);
2574
2575 eraseInst(MI);
2576}
2577
2579 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2580 return MO.isReg() &&
2581 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2582 });
2583}
2584
2586 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2587 return !MO.isReg() ||
2588 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2589 });
2590}
2591
2593 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2594 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2595 return all_of(Mask, [](int Elt) { return Elt < 0; });
2596}
2597
2599 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2600 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2601 MRI);
2602}
2603
2605 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2606 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2607 MRI);
2608}
2609
2611 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2612 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2613 "Expected an insert/extract element op");
2614 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2615 unsigned IdxIdx =
2616 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2617 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2618 if (!Idx)
2619 return false;
2620 return Idx->getZExtValue() >= VecTy.getNumElements();
2621}
2622
2624 GSelect &SelMI = cast<GSelect>(MI);
2625 auto Cst =
2627 if (!Cst)
2628 return false;
2629 OpIdx = Cst->isZero() ? 3 : 2;
2630 return true;
2631}
2632
2633void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2634
2636 const MachineOperand &MOP2) {
2637 if (!MOP1.isReg() || !MOP2.isReg())
2638 return false;
2639 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2640 if (!InstAndDef1)
2641 return false;
2642 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2643 if (!InstAndDef2)
2644 return false;
2645 MachineInstr *I1 = InstAndDef1->MI;
2646 MachineInstr *I2 = InstAndDef2->MI;
2647
2648 // Handle a case like this:
2649 //
2650 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2651 //
2652 // Even though %0 and %1 are produced by the same instruction they are not
2653 // the same values.
2654 if (I1 == I2)
2655 return MOP1.getReg() == MOP2.getReg();
2656
2657 // If we have an instruction which loads or stores, we can't guarantee that
2658 // it is identical.
2659 //
2660 // For example, we may have
2661 //
2662 // %x1 = G_LOAD %addr (load N from @somewhere)
2663 // ...
2664 // call @foo
2665 // ...
2666 // %x2 = G_LOAD %addr (load N from @somewhere)
2667 // ...
2668 // %or = G_OR %x1, %x2
2669 //
2670 // It's possible that @foo will modify whatever lives at the address we're
2671 // loading from. To be safe, let's just assume that all loads and stores
2672 // are different (unless we have something which is guaranteed to not
2673 // change.)
2674 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2675 return false;
2676
2677 // If both instructions are loads or stores, they are equal only if both
2678 // are dereferenceable invariant loads with the same number of bits.
2679 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2680 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2681 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2682 if (!LS1 || !LS2)
2683 return false;
2684
2685 if (!I2->isDereferenceableInvariantLoad() ||
2686 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2687 return false;
2688 }
2689
2690 // Check for physical registers on the instructions first to avoid cases
2691 // like this:
2692 //
2693 // %a = COPY $physreg
2694 // ...
2695 // SOMETHING implicit-def $physreg
2696 // ...
2697 // %b = COPY $physreg
2698 //
2699 // These copies are not equivalent.
2700 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2701 return MO.isReg() && MO.getReg().isPhysical();
2702 })) {
2703 // Check if we have a case like this:
2704 //
2705 // %a = COPY $physreg
2706 // %b = COPY %a
2707 //
2708 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2709 // From that, we know that they must have the same value, since they must
2710 // have come from the same COPY.
2711 return I1->isIdenticalTo(*I2);
2712 }
2713
2714 // We don't have any physical registers, so we don't necessarily need the
2715 // same vreg defs.
2716 //
2717 // On the off-chance that there's some target instruction feeding into the
2718 // instruction, let's use produceSameValue instead of isIdenticalTo.
2719 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2720 // Handle instructions with multiple defs that produce same values. Values
2721 // are same for operands with same index.
2722 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2723 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2724 // I1 and I2 are different instructions but produce same values,
2725 // %1 and %6 are same, %1 and %7 are not the same value.
2726 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
2727 I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
2728 }
2729 return false;
2730}
2731
2733 if (!MOP.isReg())
2734 return false;
2735 auto *MI = MRI.getVRegDef(MOP.getReg());
2736 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2737 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2738 MaybeCst->getSExtValue() == C;
2739}
2740
2742 if (!MOP.isReg())
2743 return false;
2744 std::optional<FPValueAndVReg> MaybeCst;
2745 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2746 return false;
2747
2748 return MaybeCst->Value.isExactlyValue(C);
2749}
2750
2752 unsigned OpIdx) {
2753 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2754 Register OldReg = MI.getOperand(0).getReg();
2755 Register Replacement = MI.getOperand(OpIdx).getReg();
2756 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2757 MI.eraseFromParent();
2758 replaceRegWith(MRI, OldReg, Replacement);
2759}
2760
2762 Register Replacement) {
2763 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2764 Register OldReg = MI.getOperand(0).getReg();
2765 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2766 MI.eraseFromParent();
2767 replaceRegWith(MRI, OldReg, Replacement);
2768}
2769
2771 unsigned ConstIdx) {
2772 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2773 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2774
2775 // Get the shift amount
2776 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2777 if (!VRegAndVal)
2778 return false;
2779
2780 // Return true of shift amount >= Bitwidth
2781 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2782}
2783
2785 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2786 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2787 "This is not a funnel shift operation");
2788
2789 Register ConstReg = MI.getOperand(3).getReg();
2790 LLT ConstTy = MRI.getType(ConstReg);
2791 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2792
2793 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2794 assert((VRegAndVal) && "Value is not a constant");
2795
2796 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2797 APInt NewConst = VRegAndVal->Value.urem(
2798 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2799
2800 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2802 MI.getOpcode(), {MI.getOperand(0)},
2803 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2804
2805 MI.eraseFromParent();
2806}
2807
2809 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2810 // Match (cond ? x : x)
2811 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2812 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2813 MRI);
2814}
2815
2817 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2818 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2819 MRI);
2820}
2821
2823 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2824 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2825 MRI);
2826}
2827
2829 MachineOperand &MO = MI.getOperand(OpIdx);
2830 return MO.isReg() &&
2831 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2832}
2833
2835 unsigned OpIdx) {
2836 MachineOperand &MO = MI.getOperand(OpIdx);
2837 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2838}
2839
2841 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2842 Builder.buildFConstant(MI.getOperand(0), C);
2843 MI.eraseFromParent();
2844}
2845
2847 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2848 Builder.buildConstant(MI.getOperand(0), C);
2849 MI.eraseFromParent();
2850}
2851
2853 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2854 Builder.buildConstant(MI.getOperand(0), C);
2855 MI.eraseFromParent();
2856}
2857
2859 ConstantFP *CFP) {
2860 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2861 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2862 MI.eraseFromParent();
2863}
2864
2866 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2867 Builder.buildUndef(MI.getOperand(0));
2868 MI.eraseFromParent();
2869}
2870
2872 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2873 Register LHS = MI.getOperand(1).getReg();
2874 Register RHS = MI.getOperand(2).getReg();
2875 Register &NewLHS = std::get<0>(MatchInfo);
2876 Register &NewRHS = std::get<1>(MatchInfo);
2877
2878 // Helper lambda to check for opportunities for
2879 // ((0-A) + B) -> B - A
2880 // (A + (0-B)) -> A - B
2881 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2882 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2883 return false;
2884 NewLHS = MaybeNewLHS;
2885 return true;
2886 };
2887
2888 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2889}
2890
2893 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2894 "Invalid opcode");
2895 Register DstReg = MI.getOperand(0).getReg();
2896 LLT DstTy = MRI.getType(DstReg);
2897 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2898 unsigned NumElts = DstTy.getNumElements();
2899 // If this MI is part of a sequence of insert_vec_elts, then
2900 // don't do the combine in the middle of the sequence.
2901 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2902 TargetOpcode::G_INSERT_VECTOR_ELT)
2903 return false;
2904 MachineInstr *CurrInst = &MI;
2905 MachineInstr *TmpInst;
2906 int64_t IntImm;
2907 Register TmpReg;
2908 MatchInfo.resize(NumElts);
2909 while (mi_match(
2910 CurrInst->getOperand(0).getReg(), MRI,
2911 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2912 if (IntImm >= NumElts || IntImm < 0)
2913 return false;
2914 if (!MatchInfo[IntImm])
2915 MatchInfo[IntImm] = TmpReg;
2916 CurrInst = TmpInst;
2917 }
2918 // Variable index.
2919 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2920 return false;
2921 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2922 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2923 if (!MatchInfo[I - 1].isValid())
2924 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2925 }
2926 return true;
2927 }
2928 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
2929 // overwritten, bail out.
2930 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
2931 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
2932}
2933
2936 Register UndefReg;
2937 auto GetUndef = [&]() {
2938 if (UndefReg)
2939 return UndefReg;
2940 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2941 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2942 return UndefReg;
2943 };
2944 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2945 if (!MatchInfo[I])
2946 MatchInfo[I] = GetUndef();
2947 }
2948 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2949 MI.eraseFromParent();
2950}
2951
2953 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2954 Register SubLHS, SubRHS;
2955 std::tie(SubLHS, SubRHS) = MatchInfo;
2956 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2957 MI.eraseFromParent();
2958}
2959
2962 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2963 //
2964 // Creates the new hand + logic instruction (but does not insert them.)
2965 //
2966 // On success, MatchInfo is populated with the new instructions. These are
2967 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2968 unsigned LogicOpcode = MI.getOpcode();
2969 assert(LogicOpcode == TargetOpcode::G_AND ||
2970 LogicOpcode == TargetOpcode::G_OR ||
2971 LogicOpcode == TargetOpcode::G_XOR);
2972 MachineIRBuilder MIB(MI);
2973 Register Dst = MI.getOperand(0).getReg();
2974 Register LHSReg = MI.getOperand(1).getReg();
2975 Register RHSReg = MI.getOperand(2).getReg();
2976
2977 // Don't recompute anything.
2978 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2979 return false;
2980
2981 // Make sure we have (hand x, ...), (hand y, ...)
2982 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2983 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2984 if (!LeftHandInst || !RightHandInst)
2985 return false;
2986 unsigned HandOpcode = LeftHandInst->getOpcode();
2987 if (HandOpcode != RightHandInst->getOpcode())
2988 return false;
2989 if (!LeftHandInst->getOperand(1).isReg() ||
2990 !RightHandInst->getOperand(1).isReg())
2991 return false;
2992
2993 // Make sure the types match up, and if we're doing this post-legalization,
2994 // we end up with legal types.
2995 Register X = LeftHandInst->getOperand(1).getReg();
2996 Register Y = RightHandInst->getOperand(1).getReg();
2997 LLT XTy = MRI.getType(X);
2998 LLT YTy = MRI.getType(Y);
2999 if (!XTy.isValid() || XTy != YTy)
3000 return false;
3001
3002 // Optional extra source register.
3003 Register ExtraHandOpSrcReg;
3004 switch (HandOpcode) {
3005 default:
3006 return false;
3007 case TargetOpcode::G_ANYEXT:
3008 case TargetOpcode::G_SEXT:
3009 case TargetOpcode::G_ZEXT: {
3010 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3011 break;
3012 }
3013 case TargetOpcode::G_AND:
3014 case TargetOpcode::G_ASHR:
3015 case TargetOpcode::G_LSHR:
3016 case TargetOpcode::G_SHL: {
3017 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3018 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3019 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3020 return false;
3021 ExtraHandOpSrcReg = ZOp.getReg();
3022 break;
3023 }
3024 }
3025
3026 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3027 return false;
3028
3029 // Record the steps to build the new instructions.
3030 //
3031 // Steps to build (logic x, y)
3032 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3033 OperandBuildSteps LogicBuildSteps = {
3034 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3035 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3036 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3037 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3038
3039 // Steps to build hand (logic x, y), ...z
3040 OperandBuildSteps HandBuildSteps = {
3041 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3042 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3043 if (ExtraHandOpSrcReg.isValid())
3044 HandBuildSteps.push_back(
3045 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3046 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3047
3048 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3049 return true;
3050}
3051
3054 assert(MatchInfo.InstrsToBuild.size() &&
3055 "Expected at least one instr to build?");
3056 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3057 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3058 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3059 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3060 for (auto &OperandFn : InstrToBuild.OperandFns)
3061 OperandFn(Instr);
3062 }
3063 MI.eraseFromParent();
3064}
3065
3067 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3068 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3069 int64_t ShlCst, AshrCst;
3070 Register Src;
3071 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3072 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3073 m_ICstOrSplat(AshrCst))))
3074 return false;
3075 if (ShlCst != AshrCst)
3076 return false;
3078 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3079 return false;
3080 MatchInfo = std::make_tuple(Src, ShlCst);
3081 return true;
3082}
3083
3085 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3086 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3087 Register Src;
3088 int64_t ShiftAmt;
3089 std::tie(Src, ShiftAmt) = MatchInfo;
3090 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3091 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3092 MI.eraseFromParent();
3093}
3094
3095/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3097 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3098 assert(MI.getOpcode() == TargetOpcode::G_AND);
3099
3100 Register Dst = MI.getOperand(0).getReg();
3101 LLT Ty = MRI.getType(Dst);
3102
3103 Register R;
3104 int64_t C1;
3105 int64_t C2;
3106 if (!mi_match(
3107 Dst, MRI,
3108 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3109 return false;
3110
3111 MatchInfo = [=](MachineIRBuilder &B) {
3112 if (C1 & C2) {
3113 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3114 return;
3115 }
3116 auto Zero = B.buildConstant(Ty, 0);
3117 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3118 };
3119 return true;
3120}
3121
3123 Register &Replacement) {
3124 // Given
3125 //
3126 // %y:_(sN) = G_SOMETHING
3127 // %x:_(sN) = G_SOMETHING
3128 // %res:_(sN) = G_AND %x, %y
3129 //
3130 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3131 //
3132 // Patterns like this can appear as a result of legalization. E.g.
3133 //
3134 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3135 // %one:_(s32) = G_CONSTANT i32 1
3136 // %and:_(s32) = G_AND %cmp, %one
3137 //
3138 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3139 assert(MI.getOpcode() == TargetOpcode::G_AND);
3140 if (!KB)
3141 return false;
3142
3143 Register AndDst = MI.getOperand(0).getReg();
3144 Register LHS = MI.getOperand(1).getReg();
3145 Register RHS = MI.getOperand(2).getReg();
3146 KnownBits LHSBits = KB->getKnownBits(LHS);
3147 KnownBits RHSBits = KB->getKnownBits(RHS);
3148
3149 // Check that x & Mask == x.
3150 // x & 1 == x, always
3151 // x & 0 == x, only if x is also 0
3152 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3153 //
3154 // Check if we can replace AndDst with the LHS of the G_AND
3155 if (canReplaceReg(AndDst, LHS, MRI) &&
3156 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3157 Replacement = LHS;
3158 return true;
3159 }
3160
3161 // Check if we can replace AndDst with the RHS of the G_AND
3162 if (canReplaceReg(AndDst, RHS, MRI) &&
3163 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3164 Replacement = RHS;
3165 return true;
3166 }
3167
3168 return false;
3169}
3170
3172 // Given
3173 //
3174 // %y:_(sN) = G_SOMETHING
3175 // %x:_(sN) = G_SOMETHING
3176 // %res:_(sN) = G_OR %x, %y
3177 //
3178 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3179 assert(MI.getOpcode() == TargetOpcode::G_OR);
3180 if (!KB)
3181 return false;
3182
3183 Register OrDst = MI.getOperand(0).getReg();
3184 Register LHS = MI.getOperand(1).getReg();
3185 Register RHS = MI.getOperand(2).getReg();
3186 KnownBits LHSBits = KB->getKnownBits(LHS);
3187 KnownBits RHSBits = KB->getKnownBits(RHS);
3188
3189 // Check that x | Mask == x.
3190 // x | 0 == x, always
3191 // x | 1 == x, only if x is also 1
3192 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3193 //
3194 // Check if we can replace OrDst with the LHS of the G_OR
3195 if (canReplaceReg(OrDst, LHS, MRI) &&
3196 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3197 Replacement = LHS;
3198 return true;
3199 }
3200
3201 // Check if we can replace OrDst with the RHS of the G_OR
3202 if (canReplaceReg(OrDst, RHS, MRI) &&
3203 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3204 Replacement = RHS;
3205 return true;
3206 }
3207
3208 return false;
3209}
3210
3212 // If the input is already sign extended, just drop the extension.
3213 Register Src = MI.getOperand(1).getReg();
3214 unsigned ExtBits = MI.getOperand(2).getImm();
3215 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3216 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3217}
3218
3219static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3220 int64_t Cst, bool IsVector, bool IsFP) {
3221 // For i1, Cst will always be -1 regardless of boolean contents.
3222 return (ScalarSizeBits == 1 && Cst == -1) ||
3223 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3224}
3225
3227 SmallVectorImpl<Register> &RegsToNegate) {
3228 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3229 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3230 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3231 Register XorSrc;
3232 Register CstReg;
3233 // We match xor(src, true) here.
3234 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3235 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3236 return false;
3237
3238 if (!MRI.hasOneNonDBGUse(XorSrc))
3239 return false;
3240
3241 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3242 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3243 // list of tree nodes to visit.
3244 RegsToNegate.push_back(XorSrc);
3245 // Remember whether the comparisons are all integer or all floating point.
3246 bool IsInt = false;
3247 bool IsFP = false;
3248 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3249 Register Reg = RegsToNegate[I];
3250 if (!MRI.hasOneNonDBGUse(Reg))
3251 return false;
3252 MachineInstr *Def = MRI.getVRegDef(Reg);
3253 switch (Def->getOpcode()) {
3254 default:
3255 // Don't match if the tree contains anything other than ANDs, ORs and
3256 // comparisons.
3257 return false;
3258 case TargetOpcode::G_ICMP:
3259 if (IsFP)
3260 return false;
3261 IsInt = true;
3262 // When we apply the combine we will invert the predicate.
3263 break;
3264 case TargetOpcode::G_FCMP:
3265 if (IsInt)
3266 return false;
3267 IsFP = true;
3268 // When we apply the combine we will invert the predicate.
3269 break;
3270 case TargetOpcode::G_AND:
3271 case TargetOpcode::G_OR:
3272 // Implement De Morgan's laws:
3273 // ~(x & y) -> ~x | ~y
3274 // ~(x | y) -> ~x & ~y
3275 // When we apply the combine we will change the opcode and recursively
3276 // negate the operands.
3277 RegsToNegate.push_back(Def->getOperand(1).getReg());
3278 RegsToNegate.push_back(Def->getOperand(2).getReg());
3279 break;
3280 }
3281 }
3282
3283 // Now we know whether the comparisons are integer or floating point, check
3284 // the constant in the xor.
3285 int64_t Cst;
3286 if (Ty.isVector()) {
3287 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3288 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3289 if (!MaybeCst)
3290 return false;
3291 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3292 return false;
3293 } else {
3294 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3295 return false;
3296 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3297 return false;
3298 }
3299
3300 return true;
3301}
3302
3304 SmallVectorImpl<Register> &RegsToNegate) {
3305 for (Register Reg : RegsToNegate) {
3306 MachineInstr *Def = MRI.getVRegDef(Reg);
3307 Observer.changingInstr(*Def);
3308 // For each comparison, invert the opcode. For each AND and OR, change the
3309 // opcode.
3310 switch (Def->getOpcode()) {
3311 default:
3312 llvm_unreachable("Unexpected opcode");
3313 case TargetOpcode::G_ICMP:
3314 case TargetOpcode::G_FCMP: {
3315 MachineOperand &PredOp = Def->getOperand(1);
3318 PredOp.setPredicate(NewP);
3319 break;
3320 }
3321 case TargetOpcode::G_AND:
3322 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3323 break;
3324 case TargetOpcode::G_OR:
3325 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3326 break;
3327 }
3328 Observer.changedInstr(*Def);
3329 }
3330
3331 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3332 MI.eraseFromParent();
3333}
3334
3336 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3337 // Match (xor (and x, y), y) (or any of its commuted cases)
3338 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3339 Register &X = MatchInfo.first;
3340 Register &Y = MatchInfo.second;
3341 Register AndReg = MI.getOperand(1).getReg();
3342 Register SharedReg = MI.getOperand(2).getReg();
3343
3344 // Find a G_AND on either side of the G_XOR.
3345 // Look for one of
3346 //
3347 // (xor (and x, y), SharedReg)
3348 // (xor SharedReg, (and x, y))
3349 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3350 std::swap(AndReg, SharedReg);
3351 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3352 return false;
3353 }
3354
3355 // Only do this if we'll eliminate the G_AND.
3356 if (!MRI.hasOneNonDBGUse(AndReg))
3357 return false;
3358
3359 // We can combine if SharedReg is the same as either the LHS or RHS of the
3360 // G_AND.
3361 if (Y != SharedReg)
3362 std::swap(X, Y);
3363 return Y == SharedReg;
3364}
3365
3367 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3368 // Fold (xor (and x, y), y) -> (and (not x), y)
3369 Register X, Y;
3370 std::tie(X, Y) = MatchInfo;
3371 auto Not = Builder.buildNot(MRI.getType(X), X);
3373 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3374 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3375 MI.getOperand(2).setReg(Y);
3377}
3378
3380 auto &PtrAdd = cast<GPtrAdd>(MI);
3381 Register DstReg = PtrAdd.getReg(0);
3382 LLT Ty = MRI.getType(DstReg);
3384
3385 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3386 return false;
3387
3388 if (Ty.isPointer()) {
3389 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3390 return ConstVal && *ConstVal == 0;
3391 }
3392
3393 assert(Ty.isVector() && "Expecting a vector type");
3394 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3395 return isBuildVectorAllZeros(*VecMI, MRI);
3396}
3397
3399 auto &PtrAdd = cast<GPtrAdd>(MI);
3400 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3401 PtrAdd.eraseFromParent();
3402}
3403
3404/// The second source operand is known to be a power of 2.
3406 Register DstReg = MI.getOperand(0).getReg();
3407 Register Src0 = MI.getOperand(1).getReg();
3408 Register Pow2Src1 = MI.getOperand(2).getReg();
3409 LLT Ty = MRI.getType(DstReg);
3410
3411 // Fold (urem x, pow2) -> (and x, pow2-1)
3412 auto NegOne = Builder.buildConstant(Ty, -1);
3413 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3414 Builder.buildAnd(DstReg, Src0, Add);
3415 MI.eraseFromParent();
3416}
3417
3419 unsigned &SelectOpNo) {
3420 Register LHS = MI.getOperand(1).getReg();
3421 Register RHS = MI.getOperand(2).getReg();
3422
3423 Register OtherOperandReg = RHS;
3424 SelectOpNo = 1;
3426
3427 // Don't do this unless the old select is going away. We want to eliminate the
3428 // binary operator, not replace a binop with a select.
3429 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3431 OtherOperandReg = LHS;
3432 SelectOpNo = 2;
3434 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3436 return false;
3437 }
3438
3439 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3440 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3441
3442 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3443 /*AllowFP*/ true,
3444 /*AllowOpaqueConstants*/ false))
3445 return false;
3446 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3447 /*AllowFP*/ true,
3448 /*AllowOpaqueConstants*/ false))
3449 return false;
3450
3451 unsigned BinOpcode = MI.getOpcode();
3452
3453 // We know that one of the operands is a select of constants. Now verify that
3454 // the other binary operator operand is either a constant, or we can handle a
3455 // variable.
3456 bool CanFoldNonConst =
3457 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3458 (isNullOrNullSplat(*SelectLHS, MRI) ||
3459 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3460 (isNullOrNullSplat(*SelectRHS, MRI) ||
3461 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3462 if (CanFoldNonConst)
3463 return true;
3464
3465 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3466 /*AllowFP*/ true,
3467 /*AllowOpaqueConstants*/ false);
3468}
3469
3470/// \p SelectOperand is the operand in binary operator \p MI that is the select
3471/// to fold.
3473 const unsigned &SelectOperand) {
3474 Register Dst = MI.getOperand(0).getReg();
3475 Register LHS = MI.getOperand(1).getReg();
3476 Register RHS = MI.getOperand(2).getReg();
3477 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3478
3479 Register SelectCond = Select->getOperand(1).getReg();
3480 Register SelectTrue = Select->getOperand(2).getReg();
3481 Register SelectFalse = Select->getOperand(3).getReg();
3482
3483 LLT Ty = MRI.getType(Dst);
3484 unsigned BinOpcode = MI.getOpcode();
3485
3486 Register FoldTrue, FoldFalse;
3487
3488 // We have a select-of-constants followed by a binary operator with a
3489 // constant. Eliminate the binop by pulling the constant math into the select.
3490 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3491 if (SelectOperand == 1) {
3492 // TODO: SelectionDAG verifies this actually constant folds before
3493 // committing to the combine.
3494
3495 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3496 FoldFalse =
3497 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3498 } else {
3499 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3500 FoldFalse =
3501 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3502 }
3503
3504 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3505 MI.eraseFromParent();
3506}
3507
3508std::optional<SmallVector<Register, 8>>
3509CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3510 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3511 // We want to detect if Root is part of a tree which represents a bunch
3512 // of loads being merged into a larger load. We'll try to recognize patterns
3513 // like, for example:
3514 //
3515 // Reg Reg
3516 // \ /
3517 // OR_1 Reg
3518 // \ /
3519 // OR_2
3520 // \ Reg
3521 // .. /
3522 // Root
3523 //
3524 // Reg Reg Reg Reg
3525 // \ / \ /
3526 // OR_1 OR_2
3527 // \ /
3528 // \ /
3529 // ...
3530 // Root
3531 //
3532 // Each "Reg" may have been produced by a load + some arithmetic. This
3533 // function will save each of them.
3534 SmallVector<Register, 8> RegsToVisit;
3536
3537 // In the "worst" case, we're dealing with a load for each byte. So, there
3538 // are at most #bytes - 1 ORs.
3539 const unsigned MaxIter =
3540 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3541 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3542 if (Ors.empty())
3543 break;
3544 const MachineInstr *Curr = Ors.pop_back_val();
3545 Register OrLHS = Curr->getOperand(1).getReg();
3546 Register OrRHS = Curr->getOperand(2).getReg();
3547
3548 // In the combine, we want to elimate the entire tree.
3549 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3550 return std::nullopt;
3551
3552 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3553 // something that may be a load + arithmetic.
3554 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3555 Ors.push_back(Or);
3556 else
3557 RegsToVisit.push_back(OrLHS);
3558 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3559 Ors.push_back(Or);
3560 else
3561 RegsToVisit.push_back(OrRHS);
3562 }
3563
3564 // We're going to try and merge each register into a wider power-of-2 type,
3565 // so we ought to have an even number of registers.
3566 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3567 return std::nullopt;
3568 return RegsToVisit;
3569}
3570
3571/// Helper function for findLoadOffsetsForLoadOrCombine.
3572///
3573/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3574/// and then moving that value into a specific byte offset.
3575///
3576/// e.g. x[i] << 24
3577///
3578/// \returns The load instruction and the byte offset it is moved into.
3579static std::optional<std::pair<GZExtLoad *, int64_t>>
3580matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3581 const MachineRegisterInfo &MRI) {
3582 assert(MRI.hasOneNonDBGUse(Reg) &&
3583 "Expected Reg to only have one non-debug use?");
3584 Register MaybeLoad;
3585 int64_t Shift;
3586 if (!mi_match(Reg, MRI,
3587 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3588 Shift = 0;
3589 MaybeLoad = Reg;
3590 }
3591
3592 if (Shift % MemSizeInBits != 0)
3593 return std::nullopt;
3594
3595 // TODO: Handle other types of loads.
3596 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3597 if (!Load)
3598 return std::nullopt;
3599
3600 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3601 return std::nullopt;
3602
3603 return std::make_pair(Load, Shift / MemSizeInBits);
3604}
3605
3606std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3607CombinerHelper::findLoadOffsetsForLoadOrCombine(
3609 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3610
3611 // Each load found for the pattern. There should be one for each RegsToVisit.
3613
3614 // The lowest index used in any load. (The lowest "i" for each x[i].)
3615 int64_t LowestIdx = INT64_MAX;
3616
3617 // The load which uses the lowest index.
3618 GZExtLoad *LowestIdxLoad = nullptr;
3619
3620 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3621 SmallSet<int64_t, 8> SeenIdx;
3622
3623 // Ensure each load is in the same MBB.
3624 // TODO: Support multiple MachineBasicBlocks.
3625 MachineBasicBlock *MBB = nullptr;
3626 const MachineMemOperand *MMO = nullptr;
3627
3628 // Earliest instruction-order load in the pattern.
3629 GZExtLoad *EarliestLoad = nullptr;
3630
3631 // Latest instruction-order load in the pattern.
3632 GZExtLoad *LatestLoad = nullptr;
3633
3634 // Base pointer which every load should share.
3636
3637 // We want to find a load for each register. Each load should have some
3638 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3639 // track of the load which uses the lowest index. Later, we will check if we
3640 // can use its pointer in the final, combined load.
3641 for (auto Reg : RegsToVisit) {
3642 // Find the load, and find the position that it will end up in (e.g. a
3643 // shifted) value.
3644 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3645 if (!LoadAndPos)
3646 return std::nullopt;
3647 GZExtLoad *Load;
3648 int64_t DstPos;
3649 std::tie(Load, DstPos) = *LoadAndPos;
3650
3651 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3652 // it is difficult to check for stores/calls/etc between loads.
3653 MachineBasicBlock *LoadMBB = Load->getParent();
3654 if (!MBB)
3655 MBB = LoadMBB;
3656 if (LoadMBB != MBB)
3657 return std::nullopt;
3658
3659 // Make sure that the MachineMemOperands of every seen load are compatible.
3660 auto &LoadMMO = Load->getMMO();
3661 if (!MMO)
3662 MMO = &LoadMMO;
3663 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3664 return std::nullopt;
3665
3666 // Find out what the base pointer and index for the load is.
3667 Register LoadPtr;
3668 int64_t Idx;
3669 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3670 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3671 LoadPtr = Load->getOperand(1).getReg();
3672 Idx = 0;
3673 }
3674
3675 // Don't combine things like a[i], a[i] -> a bigger load.
3676 if (!SeenIdx.insert(Idx).second)
3677 return std::nullopt;
3678
3679 // Every load must share the same base pointer; don't combine things like:
3680 //
3681 // a[i], b[i + 1] -> a bigger load.
3682 if (!BasePtr.isValid())
3683 BasePtr = LoadPtr;
3684 if (BasePtr != LoadPtr)
3685 return std::nullopt;
3686
3687 if (Idx < LowestIdx) {
3688 LowestIdx = Idx;
3689 LowestIdxLoad = Load;
3690 }
3691
3692 // Keep track of the byte offset that this load ends up at. If we have seen
3693 // the byte offset, then stop here. We do not want to combine:
3694 //
3695 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3696 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3697 return std::nullopt;
3698 Loads.insert(Load);
3699
3700 // Keep track of the position of the earliest/latest loads in the pattern.
3701 // We will check that there are no load fold barriers between them later
3702 // on.
3703 //
3704 // FIXME: Is there a better way to check for load fold barriers?
3705 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3706 EarliestLoad = Load;
3707 if (!LatestLoad || dominates(*LatestLoad, *Load))
3708 LatestLoad = Load;
3709 }
3710
3711 // We found a load for each register. Let's check if each load satisfies the
3712 // pattern.
3713 assert(Loads.size() == RegsToVisit.size() &&
3714 "Expected to find a load for each register?");
3715 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3716 LatestLoad && "Expected at least two loads?");
3717
3718 // Check if there are any stores, calls, etc. between any of the loads. If
3719 // there are, then we can't safely perform the combine.
3720 //
3721 // MaxIter is chosen based off the (worst case) number of iterations it
3722 // typically takes to succeed in the LLVM test suite plus some padding.
3723 //
3724 // FIXME: Is there a better way to check for load fold barriers?
3725 const unsigned MaxIter = 20;
3726 unsigned Iter = 0;
3727 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3728 LatestLoad->getIterator())) {
3729 if (Loads.count(&MI))
3730 continue;
3731 if (MI.isLoadFoldBarrier())
3732 return std::nullopt;
3733 if (Iter++ == MaxIter)
3734 return std::nullopt;
3735 }
3736
3737 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3738}
3739
3741 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3742 assert(MI.getOpcode() == TargetOpcode::G_OR);
3743 MachineFunction &MF = *MI.getMF();
3744 // Assuming a little-endian target, transform:
3745 // s8 *a = ...
3746 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3747 // =>
3748 // s32 val = *((i32)a)
3749 //
3750 // s8 *a = ...
3751 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3752 // =>
3753 // s32 val = BSWAP(*((s32)a))
3754 Register Dst = MI.getOperand(0).getReg();
3755 LLT Ty = MRI.getType(Dst);
3756 if (Ty.isVector())
3757 return false;
3758
3759 // We need to combine at least two loads into this type. Since the smallest
3760 // possible load is into a byte, we need at least a 16-bit wide type.
3761 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3762 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3763 return false;
3764
3765 // Match a collection of non-OR instructions in the pattern.
3766 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3767 if (!RegsToVisit)
3768 return false;
3769
3770 // We have a collection of non-OR instructions. Figure out how wide each of
3771 // the small loads should be based off of the number of potential loads we
3772 // found.
3773 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3774 if (NarrowMemSizeInBits % 8 != 0)
3775 return false;
3776
3777 // Check if each register feeding into each OR is a load from the same
3778 // base pointer + some arithmetic.
3779 //
3780 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3781 //
3782 // Also verify that each of these ends up putting a[i] into the same memory
3783 // offset as a load into a wide type would.
3785 GZExtLoad *LowestIdxLoad, *LatestLoad;
3786 int64_t LowestIdx;
3787 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3788 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3789 if (!MaybeLoadInfo)
3790 return false;
3791 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3792
3793 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3794 // we found before, check if this corresponds to a big or little endian byte
3795 // pattern. If it does, then we can represent it using a load + possibly a
3796 // BSWAP.
3797 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3798 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3799 if (!IsBigEndian)
3800 return false;
3801 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3802 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3803 return false;
3804
3805 // Make sure that the load from the lowest index produces offset 0 in the
3806 // final value.
3807 //
3808 // This ensures that we won't combine something like this:
3809 //
3810 // load x[i] -> byte 2
3811 // load x[i+1] -> byte 0 ---> wide_load x[i]
3812 // load x[i+2] -> byte 1
3813 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3814 const unsigned ZeroByteOffset =
3815 *IsBigEndian
3816 ? bigEndianByteAt(NumLoadsInTy, 0)
3817 : littleEndianByteAt(NumLoadsInTy, 0);
3818 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3819 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3820 ZeroOffsetIdx->second != LowestIdx)
3821 return false;
3822
3823 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3824 // may not use index 0.
3825 Register Ptr = LowestIdxLoad->getPointerReg();
3826 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3827 LegalityQuery::MemDesc MMDesc(MMO);
3828 MMDesc.MemoryTy = Ty;
3830 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3831 return false;
3832 auto PtrInfo = MMO.getPointerInfo();
3833 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3834
3835 // Load must be allowed and fast on the target.
3837 auto &DL = MF.getDataLayout();
3838 unsigned Fast = 0;
3839 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3840 !Fast)
3841 return false;
3842
3843 MatchInfo = [=](MachineIRBuilder &MIB) {
3844 MIB.setInstrAndDebugLoc(*LatestLoad);
3845 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3846 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3847 if (NeedsBSwap)
3848 MIB.buildBSwap(Dst, LoadDst);
3849 };
3850 return true;
3851}
3852
3854 MachineInstr *&ExtMI) {
3855 auto &PHI = cast<GPhi>(MI);
3856 Register DstReg = PHI.getReg(0);
3857
3858 // TODO: Extending a vector may be expensive, don't do this until heuristics
3859 // are better.
3860 if (MRI.getType(DstReg).isVector())
3861 return false;
3862
3863 // Try to match a phi, whose only use is an extend.
3864 if (!MRI.hasOneNonDBGUse(DstReg))
3865 return false;
3866 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3867 switch (ExtMI->getOpcode()) {
3868 case TargetOpcode::G_ANYEXT:
3869 return true; // G_ANYEXT is usually free.
3870 case TargetOpcode::G_ZEXT:
3871 case TargetOpcode::G_SEXT:
3872 break;
3873 default:
3874 return false;
3875 }
3876
3877 // If the target is likely to fold this extend away, don't propagate.
3879 return false;
3880
3881 // We don't want to propagate the extends unless there's a good chance that
3882 // they'll be optimized in some way.
3883 // Collect the unique incoming values.
3885 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3886 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3887 switch (DefMI->getOpcode()) {
3888 case TargetOpcode::G_LOAD:
3889 case TargetOpcode::G_TRUNC:
3890 case TargetOpcode::G_SEXT:
3891 case TargetOpcode::G_ZEXT:
3892 case TargetOpcode::G_ANYEXT:
3893 case TargetOpcode::G_CONSTANT:
3894 InSrcs.insert(DefMI);
3895 // Don't try to propagate if there are too many places to create new
3896 // extends, chances are it'll increase code size.
3897 if (InSrcs.size() > 2)
3898 return false;
3899 break;
3900 default:
3901 return false;
3902 }
3903 }
3904 return true;
3905}
3906
3908 MachineInstr *&ExtMI) {
3909 auto &PHI = cast<GPhi>(MI);
3910 Register DstReg = ExtMI->getOperand(0).getReg();
3911 LLT ExtTy = MRI.getType(DstReg);
3912
3913 // Propagate the extension into the block of each incoming reg's block.
3914 // Use a SetVector here because PHIs can have duplicate edges, and we want
3915 // deterministic iteration order.
3918 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3919 auto SrcReg = PHI.getIncomingValue(I);
3920 auto *SrcMI = MRI.getVRegDef(SrcReg);
3921 if (!SrcMIs.insert(SrcMI))
3922 continue;
3923
3924 // Build an extend after each src inst.
3925 auto *MBB = SrcMI->getParent();
3926 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3927 if (InsertPt != MBB->end() && InsertPt->isPHI())
3928 InsertPt = MBB->getFirstNonPHI();
3929
3930 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3931 Builder.setDebugLoc(MI.getDebugLoc());
3932 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
3933 OldToNewSrcMap[SrcMI] = NewExt;
3934 }
3935
3936 // Create a new phi with the extended inputs.
3938 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3939 NewPhi.addDef(DstReg);
3940 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
3941 if (!MO.isReg()) {
3942 NewPhi.addMBB(MO.getMBB());
3943 continue;
3944 }
3945 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3946 NewPhi.addUse(NewSrc->getOperand(0).getReg());
3947 }
3948 Builder.insertInstr(NewPhi);
3949 ExtMI->eraseFromParent();
3950}
3951
3953 Register &Reg) {
3954 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3955 // If we have a constant index, look for a G_BUILD_VECTOR source
3956 // and find the source register that the index maps to.
3957 Register SrcVec = MI.getOperand(1).getReg();
3958 LLT SrcTy = MRI.getType(SrcVec);
3959
3960 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3961 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3962 return false;
3963
3964 unsigned VecIdx = Cst->Value.getZExtValue();
3965
3966 // Check if we have a build_vector or build_vector_trunc with an optional
3967 // trunc in front.
3968 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
3969 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
3970 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
3971 }
3972
3973 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
3974 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
3975 return false;
3976
3977 EVT Ty(getMVTForLLT(SrcTy));
3978 if (!MRI.hasOneNonDBGUse(SrcVec) &&
3979 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3980 return false;
3981
3982 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
3983 return true;
3984}
3985
3987 Register &Reg) {
3988 // Check the type of the register, since it may have come from a
3989 // G_BUILD_VECTOR_TRUNC.
3990 LLT ScalarTy = MRI.getType(Reg);
3991 Register DstReg = MI.getOperand(0).getReg();
3992 LLT DstTy = MRI.getType(DstReg);
3993
3994 if (ScalarTy != DstTy) {
3995 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
3996 Builder.buildTrunc(DstReg, Reg);
3997 MI.eraseFromParent();
3998 return;
3999 }
4001}
4002
4005 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4006 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4007 // This combine tries to find build_vector's which have every source element
4008 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4009 // the masked load scalarization is run late in the pipeline. There's already
4010 // a combine for a similar pattern starting from the extract, but that
4011 // doesn't attempt to do it if there are multiple uses of the build_vector,
4012 // which in this case is true. Starting the combine from the build_vector
4013 // feels more natural than trying to find sibling nodes of extracts.
4014 // E.g.
4015 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4016 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4017 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4018 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4019 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4020 // ==>
4021 // replace ext{1,2,3,4} with %s{1,2,3,4}
4022
4023 Register DstReg = MI.getOperand(0).getReg();
4024 LLT DstTy = MRI.getType(DstReg);
4025 unsigned NumElts = DstTy.getNumElements();
4026
4027 SmallBitVector ExtractedElts(NumElts);
4028 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4029 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4030 return false;
4031 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4032 if (!Cst)
4033 return false;
4034 unsigned Idx = Cst->getZExtValue();
4035 if (Idx >= NumElts)
4036 return false; // Out of range.
4037 ExtractedElts.set(Idx);
4038 SrcDstPairs.emplace_back(
4039 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4040 }
4041 // Match if every element was extracted.
4042 return ExtractedElts.all();
4043}
4044
4047 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4048 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4049 for (auto &Pair : SrcDstPairs) {
4050 auto *ExtMI = Pair.second;
4051 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4052 ExtMI->eraseFromParent();
4053 }
4054 MI.eraseFromParent();
4055}
4056
4058 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4059 applyBuildFnNoErase(MI, MatchInfo);
4060 MI.eraseFromParent();
4061}
4062
4064 BuildFnTy &MatchInfo) {
4067 MatchInfo(Builder);
4068 Root->eraseFromParent();
4069}
4070
4072 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4073 MatchInfo(Builder);
4074}
4075
4077 BuildFnTy &MatchInfo) {
4078 assert(MI.getOpcode() == TargetOpcode::G_OR);
4079
4080 Register Dst = MI.getOperand(0).getReg();
4081 LLT Ty = MRI.getType(Dst);
4082 unsigned BitWidth = Ty.getScalarSizeInBits();
4083
4084 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4085 unsigned FshOpc = 0;
4086
4087 // Match (or (shl ...), (lshr ...)).
4088 if (!mi_match(Dst, MRI,
4089 // m_GOr() handles the commuted version as well.
4090 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4091 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4092 return false;
4093
4094 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4095 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4096 int64_t CstShlAmt, CstLShrAmt;
4097 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4098 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4099 CstShlAmt + CstLShrAmt == BitWidth) {
4100 FshOpc = TargetOpcode::G_FSHR;
4101 Amt = LShrAmt;
4102
4103 } else if (mi_match(LShrAmt, MRI,
4105 ShlAmt == Amt) {
4106 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4107 FshOpc = TargetOpcode::G_FSHL;
4108
4109 } else if (mi_match(ShlAmt, MRI,
4111 LShrAmt == Amt) {
4112 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4113 FshOpc = TargetOpcode::G_FSHR;
4114
4115 } else {
4116 return false;
4117 }
4118
4119 LLT AmtTy = MRI.getType(Amt);
4120 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4121 return false;
4122
4123 MatchInfo = [=](MachineIRBuilder &B) {
4124 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4125 };
4126 return true;
4127}
4128
4129/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4131 unsigned Opc = MI.getOpcode();
4132 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4133 Register X = MI.getOperand(1).getReg();
4134 Register Y = MI.getOperand(2).getReg();
4135 if (X != Y)
4136 return false;
4137 unsigned RotateOpc =
4138 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4139 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4140}
4141
4143 unsigned Opc = MI.getOpcode();
4144 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4145 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4147 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4148 : TargetOpcode::G_ROTR));
4149 MI.removeOperand(2);
4151}
4152
4153// Fold (rot x, c) -> (rot x, c % BitSize)
4155 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4156 MI.getOpcode() == TargetOpcode::G_ROTR);
4157 unsigned Bitsize =
4158 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4159 Register AmtReg = MI.getOperand(2).getReg();
4160 bool OutOfRange = false;
4161 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4162 if (auto *CI = dyn_cast<ConstantInt>(C))
4163 OutOfRange |= CI->getValue().uge(Bitsize);
4164 return true;
4165 };
4166 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4167}
4168
4170 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4171 MI.getOpcode() == TargetOpcode::G_ROTR);
4172 unsigned Bitsize =
4173 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4174 Register Amt = MI.getOperand(2).getReg();
4175 LLT AmtTy = MRI.getType(Amt);
4176 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4177 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4179 MI.getOperand(2).setReg(Amt);
4181}
4182
4184 int64_t &MatchInfo) {
4185 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4186 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4187 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4188 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4189 std::optional<bool> KnownVal;
4190 switch (Pred) {
4191 default:
4192 llvm_unreachable("Unexpected G_ICMP predicate?");
4193 case CmpInst::ICMP_EQ:
4194 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4195 break;
4196 case CmpInst::ICMP_NE:
4197 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4198 break;
4199 case CmpInst::ICMP_SGE:
4200 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4201 break;
4202 case CmpInst::ICMP_SGT:
4203 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4204 break;
4205 case CmpInst::ICMP_SLE:
4206 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4207 break;
4208 case CmpInst::ICMP_SLT:
4209 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4210 break;
4211 case CmpInst::ICMP_UGE:
4212 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4213 break;
4214 case CmpInst::ICMP_UGT:
4215 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4216 break;
4217 case CmpInst::ICMP_ULE:
4218 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4219 break;
4220 case CmpInst::ICMP_ULT:
4221 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4222 break;
4223 }
4224 if (!KnownVal)
4225 return false;
4226 MatchInfo =
4227 *KnownVal
4229 /*IsVector = */
4230 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4231 /* IsFP = */ false)
4232 : 0;
4233 return true;
4234}
4235
4237 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4238 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4239 // Given:
4240 //
4241 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4242 // %cmp = G_ICMP ne %x, 0
4243 //
4244 // Or:
4245 //
4246 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4247 // %cmp = G_ICMP eq %x, 1
4248 //
4249 // We can replace %cmp with %x assuming true is 1 on the target.
4250 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4251 if (!CmpInst::isEquality(Pred))
4252 return false;
4253 Register Dst = MI.getOperand(0).getReg();
4254 LLT DstTy = MRI.getType(Dst);
4256 /* IsFP = */ false) != 1)
4257 return false;
4258 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4259 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4260 return false;
4261 Register LHS = MI.getOperand(2).getReg();
4262 auto KnownLHS = KB->getKnownBits(LHS);
4263 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4264 return false;
4265 // Make sure replacing Dst with the LHS is a legal operation.
4266 LLT LHSTy = MRI.getType(LHS);
4267 unsigned LHSSize = LHSTy.getSizeInBits();
4268 unsigned DstSize = DstTy.getSizeInBits();
4269 unsigned Op = TargetOpcode::COPY;
4270 if (DstSize != LHSSize)
4271 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4272 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4273 return false;
4274 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4275 return true;
4276}
4277
4278// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4280 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4281 assert(MI.getOpcode() == TargetOpcode::G_AND);
4282
4283 // Ignore vector types to simplify matching the two constants.
4284 // TODO: do this for vectors and scalars via a demanded bits analysis.
4285 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4286 if (Ty.isVector())
4287 return false;
4288
4289 Register Src;
4290 Register AndMaskReg;
4291 int64_t AndMaskBits;
4292 int64_t OrMaskBits;
4293 if (!mi_match(MI, MRI,
4294 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4295 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4296 return false;
4297
4298 // Check if OrMask could turn on any bits in Src.
4299 if (AndMaskBits & OrMaskBits)
4300 return false;
4301
4302 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4304 // Canonicalize the result to have the constant on the RHS.
4305 if (MI.getOperand(1).getReg() == AndMaskReg)
4306 MI.getOperand(2).setReg(AndMaskReg);
4307 MI.getOperand(1).setReg(Src);
4309 };
4310 return true;
4311}
4312
4313/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4315 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4316 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4317 Register Dst = MI.getOperand(0).getReg();
4318 Register Src = MI.getOperand(1).getReg();
4319 LLT Ty = MRI.getType(Src);
4321 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4322 return false;
4323 int64_t Width = MI.getOperand(2).getImm();
4324 Register ShiftSrc;
4325 int64_t ShiftImm;
4326 if (!mi_match(
4327 Src, MRI,
4328 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4329 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4330 return false;
4331 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4332 return false;
4333
4334 MatchInfo = [=](MachineIRBuilder &B) {
4335 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4336 auto Cst2 = B.buildConstant(ExtractTy, Width);
4337 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4338 };
4339 return true;
4340}
4341
4342/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4344 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4345 assert(MI.getOpcode() == TargetOpcode::G_AND);
4346 Register Dst = MI.getOperand(0).getReg();
4347 LLT Ty = MRI.getType(Dst);
4349 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4350 return false;
4351
4352 int64_t AndImm, LSBImm;
4353 Register ShiftSrc;
4354 const unsigned Size = Ty.getScalarSizeInBits();
4355 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4356 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4357 m_ICst(AndImm))))
4358 return false;
4359
4360 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4361 auto MaybeMask = static_cast<uint64_t>(AndImm);
4362 if (MaybeMask & (MaybeMask + 1))
4363 return false;
4364
4365 // LSB must fit within the register.
4366 if (static_cast<uint64_t>(LSBImm) >= Size)
4367 return false;
4368
4369 uint64_t Width = APInt(Size, AndImm).countr_one();
4370 MatchInfo = [=](MachineIRBuilder &B) {
4371 auto WidthCst = B.buildConstant(ExtractTy, Width);
4372 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4373 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4374 };
4375 return true;
4376}
4377
4379 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4380 const unsigned Opcode = MI.getOpcode();
4381 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4382
4383 const Register Dst = MI.getOperand(0).getReg();
4384
4385 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4386 ? TargetOpcode::G_SBFX
4387 : TargetOpcode::G_UBFX;
4388
4389 // Check if the type we would use for the extract is legal
4390 LLT Ty = MRI.getType(Dst);
4392 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4393 return false;
4394
4395 Register ShlSrc;
4396 int64_t ShrAmt;
4397 int64_t ShlAmt;
4398 const unsigned Size = Ty.getScalarSizeInBits();
4399
4400 // Try to match shr (shl x, c1), c2
4401 if (!mi_match(Dst, MRI,
4402 m_BinOp(Opcode,
4403 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4404 m_ICst(ShrAmt))))
4405 return false;
4406
4407 // Make sure that the shift sizes can fit a bitfield extract
4408 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
44