LLVM 20.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
72 return Builder.getMF();
73}
74
77}
78
80
81/// \returns The little endian in-memory byte position of byte \p I in a
82/// \p ByteWidth bytes wide type.
83///
84/// E.g. Given a 4-byte type x, x[0] -> byte 0
85static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
86 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
87 return I;
88}
89
90/// Determines the LogBase2 value for a non-null input value using the
91/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
93 auto &MRI = *MIB.getMRI();
94 LLT Ty = MRI.getType(V);
95 auto Ctlz = MIB.buildCTLZ(Ty, V);
96 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
97 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
98}
99
100/// \returns The big endian in-memory byte position of byte \p I in a
101/// \p ByteWidth bytes wide type.
102///
103/// E.g. Given a 4-byte type x, x[0] -> byte 3
104static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
105 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
106 return ByteWidth - I - 1;
107}
108
109/// Given a map from byte offsets in memory to indices in a load/store,
110/// determine if that map corresponds to a little or big endian byte pattern.
111///
112/// \param MemOffset2Idx maps memory offsets to address offsets.
113/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
114///
115/// \returns true if the map corresponds to a big endian byte pattern, false if
116/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
117///
118/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
119/// are as follows:
120///
121/// AddrOffset Little endian Big endian
122/// 0 0 3
123/// 1 1 2
124/// 2 2 1
125/// 3 3 0
126static std::optional<bool>
128 int64_t LowestIdx) {
129 // Need at least two byte positions to decide on endianness.
130 unsigned Width = MemOffset2Idx.size();
131 if (Width < 2)
132 return std::nullopt;
133 bool BigEndian = true, LittleEndian = true;
134 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
135 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
136 if (MemOffsetAndIdx == MemOffset2Idx.end())
137 return std::nullopt;
138 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
139 assert(Idx >= 0 && "Expected non-negative byte offset?");
140 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
141 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
142 if (!BigEndian && !LittleEndian)
143 return std::nullopt;
144 }
145
146 assert((BigEndian != LittleEndian) &&
147 "Pattern cannot be both big and little endian!");
148 return BigEndian;
149}
150
152
153bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
154 assert(LI && "Must have LegalizerInfo to query isLegal!");
155 return LI->getAction(Query).Action == LegalizeActions::Legal;
156}
157
159 const LegalityQuery &Query) const {
160 return isPreLegalize() || isLegal(Query);
161}
162
164 if (!Ty.isVector())
165 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
166 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
167 if (isPreLegalize())
168 return true;
169 LLT EltTy = Ty.getElementType();
170 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
171 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
172}
173
175 Register ToReg) const {
177
178 if (MRI.constrainRegAttrs(ToReg, FromReg))
179 MRI.replaceRegWith(FromReg, ToReg);
180 else
181 Builder.buildCopy(ToReg, FromReg);
182
184}
185
187 MachineOperand &FromRegOp,
188 Register ToReg) const {
189 assert(FromRegOp.getParent() && "Expected an operand in an MI");
190 Observer.changingInstr(*FromRegOp.getParent());
191
192 FromRegOp.setReg(ToReg);
193
194 Observer.changedInstr(*FromRegOp.getParent());
195}
196
198 unsigned ToOpcode) const {
199 Observer.changingInstr(FromMI);
200
201 FromMI.setDesc(Builder.getTII().get(ToOpcode));
202
203 Observer.changedInstr(FromMI);
204}
205
207 return RBI->getRegBank(Reg, MRI, *TRI);
208}
209
211 if (RegBank)
212 MRI.setRegBank(Reg, *RegBank);
213}
214
216 if (matchCombineCopy(MI)) {
218 return true;
219 }
220 return false;
221}
223 if (MI.getOpcode() != TargetOpcode::COPY)
224 return false;
225 Register DstReg = MI.getOperand(0).getReg();
226 Register SrcReg = MI.getOperand(1).getReg();
227 return canReplaceReg(DstReg, SrcReg, MRI);
228}
230 Register DstReg = MI.getOperand(0).getReg();
231 Register SrcReg = MI.getOperand(1).getReg();
232 MI.eraseFromParent();
233 replaceRegWith(MRI, DstReg, SrcReg);
234}
235
237 MachineInstr &MI, BuildFnTy &MatchInfo) {
238 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
239 Register DstOp = MI.getOperand(0).getReg();
240 Register OrigOp = MI.getOperand(1).getReg();
241
242 if (!MRI.hasOneNonDBGUse(OrigOp))
243 return false;
244
245 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
246 // Even if only a single operand of the PHI is not guaranteed non-poison,
247 // moving freeze() backwards across a PHI can cause optimization issues for
248 // other users of that operand.
249 //
250 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
251 // the source register is unprofitable because it makes the freeze() more
252 // strict than is necessary (it would affect the whole register instead of
253 // just the subreg being frozen).
254 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
255 return false;
256
257 if (canCreateUndefOrPoison(OrigOp, MRI,
258 /*ConsiderFlagsAndMetadata=*/false))
259 return false;
260
261 std::optional<MachineOperand> MaybePoisonOperand;
262 for (MachineOperand &Operand : OrigDef->uses()) {
263 if (!Operand.isReg())
264 return false;
265
266 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
267 continue;
268
269 if (!MaybePoisonOperand)
270 MaybePoisonOperand = Operand;
271 else {
272 // We have more than one maybe-poison operand. Moving the freeze is
273 // unsafe.
274 return false;
275 }
276 }
277
278 // Eliminate freeze if all operands are guaranteed non-poison.
279 if (!MaybePoisonOperand) {
280 MatchInfo = [=](MachineIRBuilder &B) {
281 Observer.changingInstr(*OrigDef);
282 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
283 Observer.changedInstr(*OrigDef);
284 B.buildCopy(DstOp, OrigOp);
285 };
286 return true;
287 }
288
289 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
290 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
291
292 MatchInfo = [=](MachineIRBuilder &B) mutable {
293 Observer.changingInstr(*OrigDef);
294 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
295 Observer.changedInstr(*OrigDef);
296 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
297 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
299 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
300 Freeze.getReg(0));
301 replaceRegWith(MRI, DstOp, OrigOp);
302 };
303 return true;
304}
305
308 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
309 "Invalid instruction");
310 bool IsUndef = true;
311 MachineInstr *Undef = nullptr;
312
313 // Walk over all the operands of concat vectors and check if they are
314 // build_vector themselves or undef.
315 // Then collect their operands in Ops.
316 for (const MachineOperand &MO : MI.uses()) {
317 Register Reg = MO.getReg();
318 MachineInstr *Def = MRI.getVRegDef(Reg);
319 assert(Def && "Operand not defined");
320 if (!MRI.hasOneNonDBGUse(Reg))
321 return false;
322 switch (Def->getOpcode()) {
323 case TargetOpcode::G_BUILD_VECTOR:
324 IsUndef = false;
325 // Remember the operands of the build_vector to fold
326 // them into the yet-to-build flattened concat vectors.
327 for (const MachineOperand &BuildVecMO : Def->uses())
328 Ops.push_back(BuildVecMO.getReg());
329 break;
330 case TargetOpcode::G_IMPLICIT_DEF: {
331 LLT OpType = MRI.getType(Reg);
332 // Keep one undef value for all the undef operands.
333 if (!Undef) {
334 Builder.setInsertPt(*MI.getParent(), MI);
335 Undef = Builder.buildUndef(OpType.getScalarType());
336 }
337 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
338 OpType.getScalarType() &&
339 "All undefs should have the same type");
340 // Break the undef vector in as many scalar elements as needed
341 // for the flattening.
342 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
343 EltIdx != EltEnd; ++EltIdx)
344 Ops.push_back(Undef->getOperand(0).getReg());
345 break;
346 }
347 default:
348 return false;
349 }
350 }
351
352 // Check if the combine is illegal
353 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
355 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
356 return false;
357 }
358
359 if (IsUndef)
360 Ops.clear();
361
362 return true;
363}
366 // We determined that the concat_vectors can be flatten.
367 // Generate the flattened build_vector.
368 Register DstReg = MI.getOperand(0).getReg();
369 Builder.setInsertPt(*MI.getParent(), MI);
370 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
371
372 // Note: IsUndef is sort of redundant. We could have determine it by
373 // checking that at all Ops are undef. Alternatively, we could have
374 // generate a build_vector of undefs and rely on another combine to
375 // clean that up. For now, given we already gather this information
376 // in matchCombineConcatVectors, just save compile time and issue the
377 // right thing.
378 if (Ops.empty())
379 Builder.buildUndef(NewDstReg);
380 else
381 Builder.buildBuildVector(NewDstReg, Ops);
382 MI.eraseFromParent();
383 replaceRegWith(MRI, DstReg, NewDstReg);
384}
385
388 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
389 auto ConcatMI1 =
390 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
391 auto ConcatMI2 =
392 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
393 if (!ConcatMI1 || !ConcatMI2)
394 return false;
395
396 // Check that the sources of the Concat instructions have the same type
397 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
398 MRI.getType(ConcatMI2->getSourceReg(0)))
399 return false;
400
401 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
402 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
403 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
404 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
405 // Check if the index takes a whole source register from G_CONCAT_VECTORS
406 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
407 if (Mask[i] == -1) {
408 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
409 if (i + j >= Mask.size())
410 return false;
411 if (Mask[i + j] != -1)
412 return false;
413 }
415 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
416 return false;
417 Ops.push_back(0);
418 } else if (Mask[i] % ConcatSrcNumElt == 0) {
419 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
420 if (i + j >= Mask.size())
421 return false;
422 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
423 return false;
424 }
425 // Retrieve the source register from its respective G_CONCAT_VECTORS
426 // instruction
427 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
428 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
429 } else {
430 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
431 ConcatMI1->getNumSources()));
432 }
433 } else {
434 return false;
435 }
436 }
437
439 {TargetOpcode::G_CONCAT_VECTORS,
440 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
441 return false;
442
443 return !Ops.empty();
444}
445
448 LLT SrcTy = MRI.getType(Ops[0]);
449 Register UndefReg = 0;
450
451 for (Register &Reg : Ops) {
452 if (Reg == 0) {
453 if (UndefReg == 0)
454 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
455 Reg = UndefReg;
456 }
457 }
458
459 if (Ops.size() > 1)
460 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
461 else
462 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
463 MI.eraseFromParent();
464}
465
468 if (matchCombineShuffleVector(MI, Ops)) {
470 return true;
471 }
472 return false;
473}
474
477 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
478 "Invalid instruction kind");
479 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
480 Register Src1 = MI.getOperand(1).getReg();
481 LLT SrcType = MRI.getType(Src1);
482 // As bizarre as it may look, shuffle vector can actually produce
483 // scalar! This is because at the IR level a <1 x ty> shuffle
484 // vector is perfectly valid.
485 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
486 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
487
488 // If the resulting vector is smaller than the size of the source
489 // vectors being concatenated, we won't be able to replace the
490 // shuffle vector into a concat_vectors.
491 //
492 // Note: We may still be able to produce a concat_vectors fed by
493 // extract_vector_elt and so on. It is less clear that would
494 // be better though, so don't bother for now.
495 //
496 // If the destination is a scalar, the size of the sources doesn't
497 // matter. we will lower the shuffle to a plain copy. This will
498 // work only if the source and destination have the same size. But
499 // that's covered by the next condition.
500 //
501 // TODO: If the size between the source and destination don't match
502 // we could still emit an extract vector element in that case.
503 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
504 return false;
505
506 // Check that the shuffle mask can be broken evenly between the
507 // different sources.
508 if (DstNumElts % SrcNumElts != 0)
509 return false;
510
511 // Mask length is a multiple of the source vector length.
512 // Check if the shuffle is some kind of concatenation of the input
513 // vectors.
514 unsigned NumConcat = DstNumElts / SrcNumElts;
515 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
516 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
517 for (unsigned i = 0; i != DstNumElts; ++i) {
518 int Idx = Mask[i];
519 // Undef value.
520 if (Idx < 0)
521 continue;
522 // Ensure the indices in each SrcType sized piece are sequential and that
523 // the same source is used for the whole piece.
524 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
525 (ConcatSrcs[i / SrcNumElts] >= 0 &&
526 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
527 return false;
528 // Remember which source this index came from.
529 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
530 }
531
532 // The shuffle is concatenating multiple vectors together.
533 // Collect the different operands for that.
534 Register UndefReg;
535 Register Src2 = MI.getOperand(2).getReg();
536 for (auto Src : ConcatSrcs) {
537 if (Src < 0) {
538 if (!UndefReg) {
539 Builder.setInsertPt(*MI.getParent(), MI);
540 UndefReg = Builder.buildUndef(SrcType).getReg(0);
541 }
542 Ops.push_back(UndefReg);
543 } else if (Src == 0)
544 Ops.push_back(Src1);
545 else
546 Ops.push_back(Src2);
547 }
548 return true;
549}
550
552 const ArrayRef<Register> Ops) {
553 Register DstReg = MI.getOperand(0).getReg();
554 Builder.setInsertPt(*MI.getParent(), MI);
555 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
556
557 if (Ops.size() == 1)
558 Builder.buildCopy(NewDstReg, Ops[0]);
559 else
560 Builder.buildMergeLikeInstr(NewDstReg, Ops);
561
562 MI.eraseFromParent();
563 replaceRegWith(MRI, DstReg, NewDstReg);
564}
565
567 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
568 "Invalid instruction kind");
569
570 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
571 return Mask.size() == 1;
572}
573
575 Register DstReg = MI.getOperand(0).getReg();
576 Builder.setInsertPt(*MI.getParent(), MI);
577
578 int I = MI.getOperand(3).getShuffleMask()[0];
579 Register Src1 = MI.getOperand(1).getReg();
580 LLT Src1Ty = MRI.getType(Src1);
581 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
582 Register SrcReg;
583 if (I >= Src1NumElts) {
584 SrcReg = MI.getOperand(2).getReg();
585 I -= Src1NumElts;
586 } else if (I >= 0)
587 SrcReg = Src1;
588
589 if (I < 0)
590 Builder.buildUndef(DstReg);
591 else if (!MRI.getType(SrcReg).isVector())
592 Builder.buildCopy(DstReg, SrcReg);
593 else
595
596 MI.eraseFromParent();
597}
598
599namespace {
600
601/// Select a preference between two uses. CurrentUse is the current preference
602/// while *ForCandidate is attributes of the candidate under consideration.
603PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
604 PreferredTuple &CurrentUse,
605 const LLT TyForCandidate,
606 unsigned OpcodeForCandidate,
607 MachineInstr *MIForCandidate) {
608 if (!CurrentUse.Ty.isValid()) {
609 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
610 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
611 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
612 return CurrentUse;
613 }
614
615 // We permit the extend to hoist through basic blocks but this is only
616 // sensible if the target has extending loads. If you end up lowering back
617 // into a load and extend during the legalizer then the end result is
618 // hoisting the extend up to the load.
619
620 // Prefer defined extensions to undefined extensions as these are more
621 // likely to reduce the number of instructions.
622 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
623 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
624 return CurrentUse;
625 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
626 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
627 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
628
629 // Prefer sign extensions to zero extensions as sign-extensions tend to be
630 // more expensive. Don't do this if the load is already a zero-extend load
631 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
632 // later.
633 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
634 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
635 OpcodeForCandidate == TargetOpcode::G_ZEXT)
636 return CurrentUse;
637 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
638 OpcodeForCandidate == TargetOpcode::G_SEXT)
639 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
640 }
641
642 // This is potentially target specific. We've chosen the largest type
643 // because G_TRUNC is usually free. One potential catch with this is that
644 // some targets have a reduced number of larger registers than smaller
645 // registers and this choice potentially increases the live-range for the
646 // larger value.
647 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
648 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
649 }
650 return CurrentUse;
651}
652
653/// Find a suitable place to insert some instructions and insert them. This
654/// function accounts for special cases like inserting before a PHI node.
655/// The current strategy for inserting before PHI's is to duplicate the
656/// instructions for each predecessor. However, while that's ok for G_TRUNC
657/// on most targets since it generally requires no code, other targets/cases may
658/// want to try harder to find a dominating block.
659static void InsertInsnsWithoutSideEffectsBeforeUse(
662 MachineOperand &UseMO)>
663 Inserter) {
664 MachineInstr &UseMI = *UseMO.getParent();
665
666 MachineBasicBlock *InsertBB = UseMI.getParent();
667
668 // If the use is a PHI then we want the predecessor block instead.
669 if (UseMI.isPHI()) {
670 MachineOperand *PredBB = std::next(&UseMO);
671 InsertBB = PredBB->getMBB();
672 }
673
674 // If the block is the same block as the def then we want to insert just after
675 // the def instead of at the start of the block.
676 if (InsertBB == DefMI.getParent()) {
678 Inserter(InsertBB, std::next(InsertPt), UseMO);
679 return;
680 }
681
682 // Otherwise we want the start of the BB
683 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
684}
685} // end anonymous namespace
686
688 PreferredTuple Preferred;
689 if (matchCombineExtendingLoads(MI, Preferred)) {
690 applyCombineExtendingLoads(MI, Preferred);
691 return true;
692 }
693 return false;
694}
695
696static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
697 unsigned CandidateLoadOpc;
698 switch (ExtOpc) {
699 case TargetOpcode::G_ANYEXT:
700 CandidateLoadOpc = TargetOpcode::G_LOAD;
701 break;
702 case TargetOpcode::G_SEXT:
703 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
704 break;
705 case TargetOpcode::G_ZEXT:
706 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
707 break;
708 default:
709 llvm_unreachable("Unexpected extend opc");
710 }
711 return CandidateLoadOpc;
712}
713
715 PreferredTuple &Preferred) {
716 // We match the loads and follow the uses to the extend instead of matching
717 // the extends and following the def to the load. This is because the load
718 // must remain in the same position for correctness (unless we also add code
719 // to find a safe place to sink it) whereas the extend is freely movable.
720 // It also prevents us from duplicating the load for the volatile case or just
721 // for performance.
722 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
723 if (!LoadMI)
724 return false;
725
726 Register LoadReg = LoadMI->getDstReg();
727
728 LLT LoadValueTy = MRI.getType(LoadReg);
729 if (!LoadValueTy.isScalar())
730 return false;
731
732 // Most architectures are going to legalize <s8 loads into at least a 1 byte
733 // load, and the MMOs can only describe memory accesses in multiples of bytes.
734 // If we try to perform extload combining on those, we can end up with
735 // %a(s8) = extload %ptr (load 1 byte from %ptr)
736 // ... which is an illegal extload instruction.
737 if (LoadValueTy.getSizeInBits() < 8)
738 return false;
739
740 // For non power-of-2 types, they will very likely be legalized into multiple
741 // loads. Don't bother trying to match them into extending loads.
742 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
743 return false;
744
745 // Find the preferred type aside from the any-extends (unless it's the only
746 // one) and non-extending ops. We'll emit an extending load to that type and
747 // and emit a variant of (extend (trunc X)) for the others according to the
748 // relative type sizes. At the same time, pick an extend to use based on the
749 // extend involved in the chosen type.
750 unsigned PreferredOpcode =
751 isa<GLoad>(&MI)
752 ? TargetOpcode::G_ANYEXT
753 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
754 Preferred = {LLT(), PreferredOpcode, nullptr};
755 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
756 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
757 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
758 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
759 const auto &MMO = LoadMI->getMMO();
760 // Don't do anything for atomics.
761 if (MMO.isAtomic())
762 continue;
763 // Check for legality.
764 if (!isPreLegalize()) {
765 LegalityQuery::MemDesc MMDesc(MMO);
766 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
767 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
768 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
769 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
770 .Action != LegalizeActions::Legal)
771 continue;
772 }
773 Preferred = ChoosePreferredUse(MI, Preferred,
774 MRI.getType(UseMI.getOperand(0).getReg()),
775 UseMI.getOpcode(), &UseMI);
776 }
777 }
778
779 // There were no extends
780 if (!Preferred.MI)
781 return false;
782 // It should be impossible to chose an extend without selecting a different
783 // type since by definition the result of an extend is larger.
784 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
785
786 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
787 return true;
788}
789
791 PreferredTuple &Preferred) {
792 // Rewrite the load to the chosen extending load.
793 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
794
795 // Inserter to insert a truncate back to the original type at a given point
796 // with some basic CSE to limit truncate duplication to one per BB.
798 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
799 MachineBasicBlock::iterator InsertBefore,
800 MachineOperand &UseMO) {
801 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
802 if (PreviouslyEmitted) {
804 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
806 return;
807 }
808
809 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
810 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
811 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
812 EmittedInsns[InsertIntoBB] = NewMI;
813 replaceRegOpWith(MRI, UseMO, NewDstReg);
814 };
815
817 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
818 MI.setDesc(Builder.getTII().get(LoadOpc));
819
820 // Rewrite all the uses to fix up the types.
821 auto &LoadValue = MI.getOperand(0);
823 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
824 Uses.push_back(&UseMO);
825
826 for (auto *UseMO : Uses) {
827 MachineInstr *UseMI = UseMO->getParent();
828
829 // If the extend is compatible with the preferred extend then we should fix
830 // up the type and extend so that it uses the preferred use.
831 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
832 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
833 Register UseDstReg = UseMI->getOperand(0).getReg();
834 MachineOperand &UseSrcMO = UseMI->getOperand(1);
835 const LLT UseDstTy = MRI.getType(UseDstReg);
836 if (UseDstReg != ChosenDstReg) {
837 if (Preferred.Ty == UseDstTy) {
838 // If the use has the same type as the preferred use, then merge
839 // the vregs and erase the extend. For example:
840 // %1:_(s8) = G_LOAD ...
841 // %2:_(s32) = G_SEXT %1(s8)
842 // %3:_(s32) = G_ANYEXT %1(s8)
843 // ... = ... %3(s32)
844 // rewrites to:
845 // %2:_(s32) = G_SEXTLOAD ...
846 // ... = ... %2(s32)
847 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
849 UseMO->getParent()->eraseFromParent();
850 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
851 // If the preferred size is smaller, then keep the extend but extend
852 // from the result of the extending load. For example:
853 // %1:_(s8) = G_LOAD ...
854 // %2:_(s32) = G_SEXT %1(s8)
855 // %3:_(s64) = G_ANYEXT %1(s8)
856 // ... = ... %3(s64)
857 /// rewrites to:
858 // %2:_(s32) = G_SEXTLOAD ...
859 // %3:_(s64) = G_ANYEXT %2:_(s32)
860 // ... = ... %3(s64)
861 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
862 } else {
863 // If the preferred size is large, then insert a truncate. For
864 // example:
865 // %1:_(s8) = G_LOAD ...
866 // %2:_(s64) = G_SEXT %1(s8)
867 // %3:_(s32) = G_ZEXT %1(s8)
868 // ... = ... %3(s32)
869 /// rewrites to:
870 // %2:_(s64) = G_SEXTLOAD ...
871 // %4:_(s8) = G_TRUNC %2:_(s32)
872 // %3:_(s64) = G_ZEXT %2:_(s8)
873 // ... = ... %3(s64)
874 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
875 InsertTruncAt);
876 }
877 continue;
878 }
879 // The use is (one of) the uses of the preferred use we chose earlier.
880 // We're going to update the load to def this value later so just erase
881 // the old extend.
883 UseMO->getParent()->eraseFromParent();
884 continue;
885 }
886
887 // The use isn't an extend. Truncate back to the type we originally loaded.
888 // This is free on many targets.
889 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
890 }
891
892 MI.getOperand(0).setReg(ChosenDstReg);
894}
895
897 BuildFnTy &MatchInfo) {
898 assert(MI.getOpcode() == TargetOpcode::G_AND);
899
900 // If we have the following code:
901 // %mask = G_CONSTANT 255
902 // %ld = G_LOAD %ptr, (load s16)
903 // %and = G_AND %ld, %mask
904 //
905 // Try to fold it into
906 // %ld = G_ZEXTLOAD %ptr, (load s8)
907
908 Register Dst = MI.getOperand(0).getReg();
909 if (MRI.getType(Dst).isVector())
910 return false;
911
912 auto MaybeMask =
913 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
914 if (!MaybeMask)
915 return false;
916
917 APInt MaskVal = MaybeMask->Value;
918
919 if (!MaskVal.isMask())
920 return false;
921
922 Register SrcReg = MI.getOperand(1).getReg();
923 // Don't use getOpcodeDef() here since intermediate instructions may have
924 // multiple users.
925 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
926 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
927 return false;
928
929 Register LoadReg = LoadMI->getDstReg();
930 LLT RegTy = MRI.getType(LoadReg);
931 Register PtrReg = LoadMI->getPointerReg();
932 unsigned RegSize = RegTy.getSizeInBits();
933 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
934 unsigned MaskSizeBits = MaskVal.countr_one();
935
936 // The mask may not be larger than the in-memory type, as it might cover sign
937 // extended bits
938 if (MaskSizeBits > LoadSizeBits.getValue())
939 return false;
940
941 // If the mask covers the whole destination register, there's nothing to
942 // extend
943 if (MaskSizeBits >= RegSize)
944 return false;
945
946 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
947 // at least byte loads. Avoid creating such loads here
948 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
949 return false;
950
951 const MachineMemOperand &MMO = LoadMI->getMMO();
952 LegalityQuery::MemDesc MemDesc(MMO);
953
954 // Don't modify the memory access size if this is atomic/volatile, but we can
955 // still adjust the opcode to indicate the high bit behavior.
956 if (LoadMI->isSimple())
957 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
958 else if (LoadSizeBits.getValue() > MaskSizeBits ||
959 LoadSizeBits.getValue() == RegSize)
960 return false;
961
962 // TODO: Could check if it's legal with the reduced or original memory size.
964 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
965 return false;
966
967 MatchInfo = [=](MachineIRBuilder &B) {
968 B.setInstrAndDebugLoc(*LoadMI);
969 auto &MF = B.getMF();
970 auto PtrInfo = MMO.getPointerInfo();
971 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
972 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
973 LoadMI->eraseFromParent();
974 };
975 return true;
976}
977
979 const MachineInstr &UseMI) {
980 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
981 "shouldn't consider debug uses");
982 assert(DefMI.getParent() == UseMI.getParent());
983 if (&DefMI == &UseMI)
984 return true;
985 const MachineBasicBlock &MBB = *DefMI.getParent();
986 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
987 return &MI == &DefMI || &MI == &UseMI;
988 });
989 if (DefOrUse == MBB.end())
990 llvm_unreachable("Block must contain both DefMI and UseMI!");
991 return &*DefOrUse == &DefMI;
992}
993
995 const MachineInstr &UseMI) {
996 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
997 "shouldn't consider debug uses");
998 if (MDT)
999 return MDT->dominates(&DefMI, &UseMI);
1000 else if (DefMI.getParent() != UseMI.getParent())
1001 return false;
1002
1003 return isPredecessor(DefMI, UseMI);
1004}
1005
1007 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1008 Register SrcReg = MI.getOperand(1).getReg();
1009 Register LoadUser = SrcReg;
1010
1011 if (MRI.getType(SrcReg).isVector())
1012 return false;
1013
1014 Register TruncSrc;
1015 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1016 LoadUser = TruncSrc;
1017
1018 uint64_t SizeInBits = MI.getOperand(2).getImm();
1019 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1020 // need any extend at all, just a truncate.
1021 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1022 // If truncating more than the original extended value, abort.
1023 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1024 if (TruncSrc &&
1025 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1026 return false;
1027 if (LoadSizeBits == SizeInBits)
1028 return true;
1029 }
1030 return false;
1031}
1032
1034 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1035 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1036 MI.eraseFromParent();
1037}
1038
1040 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1041 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1042
1043 Register DstReg = MI.getOperand(0).getReg();
1044 LLT RegTy = MRI.getType(DstReg);
1045
1046 // Only supports scalars for now.
1047 if (RegTy.isVector())
1048 return false;
1049
1050 Register SrcReg = MI.getOperand(1).getReg();
1051 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1052 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
1053 return false;
1054
1055 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1056
1057 // If the sign extend extends from a narrower width than the load's width,
1058 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1059 // Avoid widening the load at all.
1060 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1061
1062 // Don't generate G_SEXTLOADs with a < 1 byte width.
1063 if (NewSizeBits < 8)
1064 return false;
1065 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1066 // anyway for most targets.
1067 if (!isPowerOf2_32(NewSizeBits))
1068 return false;
1069
1070 const MachineMemOperand &MMO = LoadDef->getMMO();
1071 LegalityQuery::MemDesc MMDesc(MMO);
1072
1073 // Don't modify the memory access size if this is atomic/volatile, but we can
1074 // still adjust the opcode to indicate the high bit behavior.
1075 if (LoadDef->isSimple())
1076 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1077 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1078 return false;
1079
1080 // TODO: Could check if it's legal with the reduced or original memory size.
1081 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1082 {MRI.getType(LoadDef->getDstReg()),
1083 MRI.getType(LoadDef->getPointerReg())},
1084 {MMDesc}}))
1085 return false;
1086
1087 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1088 return true;
1089}
1090
1092 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1093 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1094 Register LoadReg;
1095 unsigned ScalarSizeBits;
1096 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1097 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1098
1099 // If we have the following:
1100 // %ld = G_LOAD %ptr, (load 2)
1101 // %ext = G_SEXT_INREG %ld, 8
1102 // ==>
1103 // %ld = G_SEXTLOAD %ptr (load 1)
1104
1105 auto &MMO = LoadDef->getMMO();
1106 Builder.setInstrAndDebugLoc(*LoadDef);
1107 auto &MF = Builder.getMF();
1108 auto PtrInfo = MMO.getPointerInfo();
1109 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1110 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1111 LoadDef->getPointerReg(), *NewMMO);
1112 MI.eraseFromParent();
1113}
1114
1115/// Return true if 'MI' is a load or a store that may be fold it's address
1116/// operand into the load / store addressing mode.
1120 auto *MF = MI->getMF();
1121 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1122 if (!Addr)
1123 return false;
1124
1125 AM.HasBaseReg = true;
1126 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1127 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1128 else
1129 AM.Scale = 1; // [reg +/- reg]
1130
1131 return TLI.isLegalAddressingMode(
1132 MF->getDataLayout(), AM,
1133 getTypeForLLT(MI->getMMO().getMemoryType(),
1134 MF->getFunction().getContext()),
1135 MI->getMMO().getAddrSpace());
1136}
1137
1138static unsigned getIndexedOpc(unsigned LdStOpc) {
1139 switch (LdStOpc) {
1140 case TargetOpcode::G_LOAD:
1141 return TargetOpcode::G_INDEXED_LOAD;
1142 case TargetOpcode::G_STORE:
1143 return TargetOpcode::G_INDEXED_STORE;
1144 case TargetOpcode::G_ZEXTLOAD:
1145 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1146 case TargetOpcode::G_SEXTLOAD:
1147 return TargetOpcode::G_INDEXED_SEXTLOAD;
1148 default:
1149 llvm_unreachable("Unexpected opcode");
1150 }
1151}
1152
1153bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1154 // Check for legality.
1155 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1156 LLT Ty = MRI.getType(LdSt.getReg(0));
1157 LLT MemTy = LdSt.getMMO().getMemoryType();
1159 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1161 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1162 SmallVector<LLT> OpTys;
1163 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1164 OpTys = {PtrTy, Ty, Ty};
1165 else
1166 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1167
1168 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1169 return isLegal(Q);
1170}
1171
1173 "post-index-use-threshold", cl::Hidden, cl::init(32),
1174 cl::desc("Number of uses of a base pointer to check before it is no longer "
1175 "considered for post-indexing."));
1176
1177bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1179 bool &RematOffset) {
1180 // We're looking for the following pattern, for either load or store:
1181 // %baseptr:_(p0) = ...
1182 // G_STORE %val(s64), %baseptr(p0)
1183 // %offset:_(s64) = G_CONSTANT i64 -256
1184 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1185 const auto &TLI = getTargetLowering();
1186
1187 Register Ptr = LdSt.getPointerReg();
1188 // If the store is the only use, don't bother.
1189 if (MRI.hasOneNonDBGUse(Ptr))
1190 return false;
1191
1192 if (!isIndexedLoadStoreLegal(LdSt))
1193 return false;
1194
1195 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1196 return false;
1197
1198 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1199 auto *PtrDef = MRI.getVRegDef(Ptr);
1200
1201 unsigned NumUsesChecked = 0;
1202 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1203 if (++NumUsesChecked > PostIndexUseThreshold)
1204 return false; // Try to avoid exploding compile time.
1205
1206 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1207 // The use itself might be dead. This can happen during combines if DCE
1208 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1209 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1210 continue;
1211
1212 // Check the user of this isn't the store, otherwise we'd be generate a
1213 // indexed store defining its own use.
1214 if (StoredValDef == &Use)
1215 continue;
1216
1217 Offset = PtrAdd->getOffsetReg();
1218 if (!ForceLegalIndexing &&
1219 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1220 /*IsPre*/ false, MRI))
1221 continue;
1222
1223 // Make sure the offset calculation is before the potentially indexed op.
1224 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1225 RematOffset = false;
1226 if (!dominates(*OffsetDef, LdSt)) {
1227 // If the offset however is just a G_CONSTANT, we can always just
1228 // rematerialize it where we need it.
1229 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1230 continue;
1231 RematOffset = true;
1232 }
1233
1234 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1235 if (&BasePtrUse == PtrDef)
1236 continue;
1237
1238 // If the user is a later load/store that can be post-indexed, then don't
1239 // combine this one.
1240 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1241 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1242 dominates(LdSt, *BasePtrLdSt) &&
1243 isIndexedLoadStoreLegal(*BasePtrLdSt))
1244 return false;
1245
1246 // Now we're looking for the key G_PTR_ADD instruction, which contains
1247 // the offset add that we want to fold.
1248 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1249 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1250 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1251 // If the use is in a different block, then we may produce worse code
1252 // due to the extra register pressure.
1253 if (BaseUseUse.getParent() != LdSt.getParent())
1254 return false;
1255
1256 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1257 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1258 return false;
1259 }
1260 if (!dominates(LdSt, BasePtrUse))
1261 return false; // All use must be dominated by the load/store.
1262 }
1263 }
1264
1265 Addr = PtrAdd->getReg(0);
1266 Base = PtrAdd->getBaseReg();
1267 return true;
1268 }
1269
1270 return false;
1271}
1272
1273bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1275 auto &MF = *LdSt.getParent()->getParent();
1276 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1277
1278 Addr = LdSt.getPointerReg();
1281 return false;
1282
1283 if (!ForceLegalIndexing &&
1284 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1285 return false;
1286
1287 if (!isIndexedLoadStoreLegal(LdSt))
1288 return false;
1289
1291 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1292 return false;
1293
1294 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1295 // Would require a copy.
1296 if (Base == St->getValueReg())
1297 return false;
1298
1299 // We're expecting one use of Addr in MI, but it could also be the
1300 // value stored, which isn't actually dominated by the instruction.
1301 if (St->getValueReg() == Addr)
1302 return false;
1303 }
1304
1305 // Avoid increasing cross-block register pressure.
1306 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1307 if (AddrUse.getParent() != LdSt.getParent())
1308 return false;
1309
1310 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1311 // That might allow us to end base's liveness here by adjusting the constant.
1312 bool RealUse = false;
1313 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1314 if (!dominates(LdSt, AddrUse))
1315 return false; // All use must be dominated by the load/store.
1316
1317 // If Ptr may be folded in addressing mode of other use, then it's
1318 // not profitable to do this transformation.
1319 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1320 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1321 RealUse = true;
1322 } else {
1323 RealUse = true;
1324 }
1325 }
1326 return RealUse;
1327}
1328
1330 BuildFnTy &MatchInfo) {
1331 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1332
1333 // Check if there is a load that defines the vector being extracted from.
1334 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1335 if (!LoadMI)
1336 return false;
1337
1338 Register Vector = MI.getOperand(1).getReg();
1339 LLT VecEltTy = MRI.getType(Vector).getElementType();
1340
1341 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1342
1343 // Checking whether we should reduce the load width.
1345 return false;
1346
1347 // Check if the defining load is simple.
1348 if (!LoadMI->isSimple())
1349 return false;
1350
1351 // If the vector element type is not a multiple of a byte then we are unable
1352 // to correctly compute an address to load only the extracted element as a
1353 // scalar.
1354 if (!VecEltTy.isByteSized())
1355 return false;
1356
1357 // Check for load fold barriers between the extraction and the load.
1358 if (MI.getParent() != LoadMI->getParent())
1359 return false;
1360 const unsigned MaxIter = 20;
1361 unsigned Iter = 0;
1362 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1363 if (II->isLoadFoldBarrier())
1364 return false;
1365 if (Iter++ == MaxIter)
1366 return false;
1367 }
1368
1369 // Check if the new load that we are going to create is legal
1370 // if we are in the post-legalization phase.
1371 MachineMemOperand MMO = LoadMI->getMMO();
1372 Align Alignment = MMO.getAlign();
1373 MachinePointerInfo PtrInfo;
1375
1376 // Finding the appropriate PtrInfo if offset is a known constant.
1377 // This is required to create the memory operand for the narrowed load.
1378 // This machine memory operand object helps us infer about legality
1379 // before we proceed to combine the instruction.
1380 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1381 int Elt = CVal->getZExtValue();
1382 // FIXME: should be (ABI size)*Elt.
1383 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1384 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1385 } else {
1386 // Discard the pointer info except the address space because the memory
1387 // operand can't represent this new access since the offset is variable.
1388 Offset = VecEltTy.getSizeInBits() / 8;
1390 }
1391
1392 Alignment = commonAlignment(Alignment, Offset);
1393
1394 Register VecPtr = LoadMI->getPointerReg();
1395 LLT PtrTy = MRI.getType(VecPtr);
1396
1397 MachineFunction &MF = *MI.getMF();
1398 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1399
1400 LegalityQuery::MemDesc MMDesc(*NewMMO);
1401
1402 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1403
1405 return false;
1406
1407 // Load must be allowed and fast on the target.
1409 auto &DL = MF.getDataLayout();
1410 unsigned Fast = 0;
1411 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1412 &Fast) ||
1413 !Fast)
1414 return false;
1415
1416 Register Result = MI.getOperand(0).getReg();
1417 Register Index = MI.getOperand(2).getReg();
1418
1419 MatchInfo = [=](MachineIRBuilder &B) {
1420 GISelObserverWrapper DummyObserver;
1421 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1422 //// Get pointer to the vector element.
1423 Register finalPtr = Helper.getVectorElementPointer(
1424 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1425 Index);
1426 // New G_LOAD instruction.
1427 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1428 // Remove original GLOAD instruction.
1429 LoadMI->eraseFromParent();
1430 };
1431
1432 return true;
1433}
1434
1437 auto &LdSt = cast<GLoadStore>(MI);
1438
1439 if (LdSt.isAtomic())
1440 return false;
1441
1442 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1443 MatchInfo.Offset);
1444 if (!MatchInfo.IsPre &&
1445 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1446 MatchInfo.Offset, MatchInfo.RematOffset))
1447 return false;
1448
1449 return true;
1450}
1451
1454 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1455 unsigned Opcode = MI.getOpcode();
1456 bool IsStore = Opcode == TargetOpcode::G_STORE;
1457 unsigned NewOpcode = getIndexedOpc(Opcode);
1458
1459 // If the offset constant didn't happen to dominate the load/store, we can
1460 // just clone it as needed.
1461 if (MatchInfo.RematOffset) {
1462 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1463 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1464 *OldCst->getOperand(1).getCImm());
1465 MatchInfo.Offset = NewCst.getReg(0);
1466 }
1467
1468 auto MIB = Builder.buildInstr(NewOpcode);
1469 if (IsStore) {
1470 MIB.addDef(MatchInfo.Addr);
1471 MIB.addUse(MI.getOperand(0).getReg());
1472 } else {
1473 MIB.addDef(MI.getOperand(0).getReg());
1474 MIB.addDef(MatchInfo.Addr);
1475 }
1476
1477 MIB.addUse(MatchInfo.Base);
1478 MIB.addUse(MatchInfo.Offset);
1479 MIB.addImm(MatchInfo.IsPre);
1480 MIB->cloneMemRefs(*MI.getMF(), MI);
1481 MI.eraseFromParent();
1482 AddrDef.eraseFromParent();
1483
1484 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1485}
1486
1488 MachineInstr *&OtherMI) {
1489 unsigned Opcode = MI.getOpcode();
1490 bool IsDiv, IsSigned;
1491
1492 switch (Opcode) {
1493 default:
1494 llvm_unreachable("Unexpected opcode!");
1495 case TargetOpcode::G_SDIV:
1496 case TargetOpcode::G_UDIV: {
1497 IsDiv = true;
1498 IsSigned = Opcode == TargetOpcode::G_SDIV;
1499 break;
1500 }
1501 case TargetOpcode::G_SREM:
1502 case TargetOpcode::G_UREM: {
1503 IsDiv = false;
1504 IsSigned = Opcode == TargetOpcode::G_SREM;
1505 break;
1506 }
1507 }
1508
1509 Register Src1 = MI.getOperand(1).getReg();
1510 unsigned DivOpcode, RemOpcode, DivremOpcode;
1511 if (IsSigned) {
1512 DivOpcode = TargetOpcode::G_SDIV;
1513 RemOpcode = TargetOpcode::G_SREM;
1514 DivremOpcode = TargetOpcode::G_SDIVREM;
1515 } else {
1516 DivOpcode = TargetOpcode::G_UDIV;
1517 RemOpcode = TargetOpcode::G_UREM;
1518 DivremOpcode = TargetOpcode::G_UDIVREM;
1519 }
1520
1521 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1522 return false;
1523
1524 // Combine:
1525 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1526 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1527 // into:
1528 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1529
1530 // Combine:
1531 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1532 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1533 // into:
1534 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1535
1536 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1537 if (MI.getParent() == UseMI.getParent() &&
1538 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1539 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1540 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1541 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1542 OtherMI = &UseMI;
1543 return true;
1544 }
1545 }
1546
1547 return false;
1548}
1549
1551 MachineInstr *&OtherMI) {
1552 unsigned Opcode = MI.getOpcode();
1553 assert(OtherMI && "OtherMI shouldn't be empty.");
1554
1555 Register DestDivReg, DestRemReg;
1556 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1557 DestDivReg = MI.getOperand(0).getReg();
1558 DestRemReg = OtherMI->getOperand(0).getReg();
1559 } else {
1560 DestDivReg = OtherMI->getOperand(0).getReg();
1561 DestRemReg = MI.getOperand(0).getReg();
1562 }
1563
1564 bool IsSigned =
1565 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1566
1567 // Check which instruction is first in the block so we don't break def-use
1568 // deps by "moving" the instruction incorrectly. Also keep track of which
1569 // instruction is first so we pick it's operands, avoiding use-before-def
1570 // bugs.
1571 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1572 Builder.setInstrAndDebugLoc(*FirstInst);
1573
1574 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1575 : TargetOpcode::G_UDIVREM,
1576 {DestDivReg, DestRemReg},
1577 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1578 MI.eraseFromParent();
1579 OtherMI->eraseFromParent();
1580}
1581
1583 MachineInstr *&BrCond) {
1584 assert(MI.getOpcode() == TargetOpcode::G_BR);
1585
1586 // Try to match the following:
1587 // bb1:
1588 // G_BRCOND %c1, %bb2
1589 // G_BR %bb3
1590 // bb2:
1591 // ...
1592 // bb3:
1593
1594 // The above pattern does not have a fall through to the successor bb2, always
1595 // resulting in a branch no matter which path is taken. Here we try to find
1596 // and replace that pattern with conditional branch to bb3 and otherwise
1597 // fallthrough to bb2. This is generally better for branch predictors.
1598
1599 MachineBasicBlock *MBB = MI.getParent();
1601 if (BrIt == MBB->begin())
1602 return false;
1603 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1604
1605 BrCond = &*std::prev(BrIt);
1606 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1607 return false;
1608
1609 // Check that the next block is the conditional branch target. Also make sure
1610 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1611 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1612 return BrCondTarget != MI.getOperand(0).getMBB() &&
1613 MBB->isLayoutSuccessor(BrCondTarget);
1614}
1615
1617 MachineInstr *&BrCond) {
1618 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1620 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1621 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1622 // this to i1 only since we might not know for sure what kind of
1623 // compare generated the condition value.
1624 auto True = Builder.buildConstant(
1625 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1626 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1627
1628 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1630 MI.getOperand(0).setMBB(FallthroughBB);
1632
1633 // Change the conditional branch to use the inverted condition and
1634 // new target block.
1635 Observer.changingInstr(*BrCond);
1636 BrCond->getOperand(0).setReg(Xor.getReg(0));
1637 BrCond->getOperand(1).setMBB(BrTarget);
1638 Observer.changedInstr(*BrCond);
1639}
1640
1641
1643 MachineIRBuilder HelperBuilder(MI);
1644 GISelObserverWrapper DummyObserver;
1645 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1646 return Helper.lowerMemcpyInline(MI) ==
1648}
1649
1651 MachineIRBuilder HelperBuilder(MI);
1652 GISelObserverWrapper DummyObserver;
1653 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1654 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1656}
1657
1659 const MachineRegisterInfo &MRI,
1660 const APFloat &Val) {
1661 APFloat Result(Val);
1662 switch (MI.getOpcode()) {
1663 default:
1664 llvm_unreachable("Unexpected opcode!");
1665 case TargetOpcode::G_FNEG: {
1666 Result.changeSign();
1667 return Result;
1668 }
1669 case TargetOpcode::G_FABS: {
1670 Result.clearSign();
1671 return Result;
1672 }
1673 case TargetOpcode::G_FPTRUNC: {
1674 bool Unused;
1675 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1677 &Unused);
1678 return Result;
1679 }
1680 case TargetOpcode::G_FSQRT: {
1681 bool Unused;
1683 &Unused);
1684 Result = APFloat(sqrt(Result.convertToDouble()));
1685 break;
1686 }
1687 case TargetOpcode::G_FLOG2: {
1688 bool Unused;
1690 &Unused);
1691 Result = APFloat(log2(Result.convertToDouble()));
1692 break;
1693 }
1694 }
1695 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1696 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1697 // `G_FLOG2` reach here.
1698 bool Unused;
1699 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1700 return Result;
1701}
1702
1704 const ConstantFP *Cst) {
1705 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1706 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1707 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1708 MI.eraseFromParent();
1709}
1710
1712 PtrAddChain &MatchInfo) {
1713 // We're trying to match the following pattern:
1714 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1715 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1716 // -->
1717 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1718
1719 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1720 return false;
1721
1722 Register Add2 = MI.getOperand(1).getReg();
1723 Register Imm1 = MI.getOperand(2).getReg();
1724 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1725 if (!MaybeImmVal)
1726 return false;
1727
1728 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1729 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1730 return false;
1731
1732 Register Base = Add2Def->getOperand(1).getReg();
1733 Register Imm2 = Add2Def->getOperand(2).getReg();
1734 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1735 if (!MaybeImm2Val)
1736 return false;
1737
1738 // Check if the new combined immediate forms an illegal addressing mode.
1739 // Do not combine if it was legal before but would get illegal.
1740 // To do so, we need to find a load/store user of the pointer to get
1741 // the access type.
1742 Type *AccessTy = nullptr;
1743 auto &MF = *MI.getMF();
1744 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1745 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1746 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1747 MF.getFunction().getContext());
1748 break;
1749 }
1750 }
1752 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1753 AMNew.BaseOffs = CombinedImm.getSExtValue();
1754 if (AccessTy) {
1755 AMNew.HasBaseReg = true;
1757 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1758 AMOld.HasBaseReg = true;
1759 unsigned AS = MRI.getType(Add2).getAddressSpace();
1760 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1761 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1762 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1763 return false;
1764 }
1765
1766 // Pass the combined immediate to the apply function.
1767 MatchInfo.Imm = AMNew.BaseOffs;
1768 MatchInfo.Base = Base;
1769 MatchInfo.Bank = getRegBank(Imm2);
1770 return true;
1771}
1772
1774 PtrAddChain &MatchInfo) {
1775 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1776 MachineIRBuilder MIB(MI);
1777 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1778 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1779 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1781 MI.getOperand(1).setReg(MatchInfo.Base);
1782 MI.getOperand(2).setReg(NewOffset.getReg(0));
1784}
1785
1787 RegisterImmPair &MatchInfo) {
1788 // We're trying to match the following pattern with any of
1789 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1790 // %t1 = SHIFT %base, G_CONSTANT imm1
1791 // %root = SHIFT %t1, G_CONSTANT imm2
1792 // -->
1793 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1794
1795 unsigned Opcode = MI.getOpcode();
1796 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1797 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1798 Opcode == TargetOpcode::G_USHLSAT) &&
1799 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1800
1801 Register Shl2 = MI.getOperand(1).getReg();
1802 Register Imm1 = MI.getOperand(2).getReg();
1803 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1804 if (!MaybeImmVal)
1805 return false;
1806
1807 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1808 if (Shl2Def->getOpcode() != Opcode)
1809 return false;
1810
1811 Register Base = Shl2Def->getOperand(1).getReg();
1812 Register Imm2 = Shl2Def->getOperand(2).getReg();
1813 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1814 if (!MaybeImm2Val)
1815 return false;
1816
1817 // Pass the combined immediate to the apply function.
1818 MatchInfo.Imm =
1819 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1820 MatchInfo.Reg = Base;
1821
1822 // There is no simple replacement for a saturating unsigned left shift that
1823 // exceeds the scalar size.
1824 if (Opcode == TargetOpcode::G_USHLSAT &&
1825 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1826 return false;
1827
1828 return true;
1829}
1830
1832 RegisterImmPair &MatchInfo) {
1833 unsigned Opcode = MI.getOpcode();
1834 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1835 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1836 Opcode == TargetOpcode::G_USHLSAT) &&
1837 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1838
1839 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1840 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1841 auto Imm = MatchInfo.Imm;
1842
1843 if (Imm >= ScalarSizeInBits) {
1844 // Any logical shift that exceeds scalar size will produce zero.
1845 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1846 Builder.buildConstant(MI.getOperand(0), 0);
1847 MI.eraseFromParent();
1848 return;
1849 }
1850 // Arithmetic shift and saturating signed left shift have no effect beyond
1851 // scalar size.
1852 Imm = ScalarSizeInBits - 1;
1853 }
1854
1855 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1856 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1858 MI.getOperand(1).setReg(MatchInfo.Reg);
1859 MI.getOperand(2).setReg(NewImm);
1861}
1862
1864 ShiftOfShiftedLogic &MatchInfo) {
1865 // We're trying to match the following pattern with any of
1866 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1867 // with any of G_AND/G_OR/G_XOR logic instructions.
1868 // %t1 = SHIFT %X, G_CONSTANT C0
1869 // %t2 = LOGIC %t1, %Y
1870 // %root = SHIFT %t2, G_CONSTANT C1
1871 // -->
1872 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1873 // %t4 = SHIFT %Y, G_CONSTANT C1
1874 // %root = LOGIC %t3, %t4
1875 unsigned ShiftOpcode = MI.getOpcode();
1876 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1877 ShiftOpcode == TargetOpcode::G_ASHR ||
1878 ShiftOpcode == TargetOpcode::G_LSHR ||
1879 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1880 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1881 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1882
1883 // Match a one-use bitwise logic op.
1884 Register LogicDest = MI.getOperand(1).getReg();
1885 if (!MRI.hasOneNonDBGUse(LogicDest))
1886 return false;
1887
1888 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1889 unsigned LogicOpcode = LogicMI->getOpcode();
1890 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1891 LogicOpcode != TargetOpcode::G_XOR)
1892 return false;
1893
1894 // Find a matching one-use shift by constant.
1895 const Register C1 = MI.getOperand(2).getReg();
1896 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1897 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1898 return false;
1899
1900 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1901
1902 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1903 // Shift should match previous one and should be a one-use.
1904 if (MI->getOpcode() != ShiftOpcode ||
1905 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1906 return false;
1907
1908 // Must be a constant.
1909 auto MaybeImmVal =
1910 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1911 if (!MaybeImmVal)
1912 return false;
1913
1914 ShiftVal = MaybeImmVal->Value.getSExtValue();
1915 return true;
1916 };
1917
1918 // Logic ops are commutative, so check each operand for a match.
1919 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1920 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1921 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1922 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1923 uint64_t C0Val;
1924
1925 if (matchFirstShift(LogicMIOp1, C0Val)) {
1926 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1927 MatchInfo.Shift2 = LogicMIOp1;
1928 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1929 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1930 MatchInfo.Shift2 = LogicMIOp2;
1931 } else
1932 return false;
1933
1934 MatchInfo.ValSum = C0Val + C1Val;
1935
1936 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1937 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1938 return false;
1939
1940 MatchInfo.Logic = LogicMI;
1941 return true;
1942}
1943
1945 ShiftOfShiftedLogic &MatchInfo) {
1946 unsigned Opcode = MI.getOpcode();
1947 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1948 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1949 Opcode == TargetOpcode::G_SSHLSAT) &&
1950 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1951
1952 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1953 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1954
1955 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1956
1957 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1958 Register Shift1 =
1959 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1960
1961 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1962 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1963 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1964 // remove old shift1. And it will cause crash later. So erase it earlier to
1965 // avoid the crash.
1966 MatchInfo.Shift2->eraseFromParent();
1967
1968 Register Shift2Const = MI.getOperand(2).getReg();
1969 Register Shift2 = Builder
1970 .buildInstr(Opcode, {DestType},
1971 {MatchInfo.LogicNonShiftReg, Shift2Const})
1972 .getReg(0);
1973
1974 Register Dest = MI.getOperand(0).getReg();
1975 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1976
1977 // This was one use so it's safe to remove it.
1978 MatchInfo.Logic->eraseFromParent();
1979
1980 MI.eraseFromParent();
1981}
1982
1984 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1985 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1986 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1987 auto &Shl = cast<GenericMachineInstr>(MI);
1988 Register DstReg = Shl.getReg(0);
1989 Register SrcReg = Shl.getReg(1);
1990 Register ShiftReg = Shl.getReg(2);
1991 Register X, C1;
1992
1993 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1994 return false;
1995
1996 if (!mi_match(SrcReg, MRI,
1998 m_GOr(m_Reg(X), m_Reg(C1))))))
1999 return false;
2000
2001 APInt C1Val, C2Val;
2002 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2003 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2004 return false;
2005
2006 auto *SrcDef = MRI.getVRegDef(SrcReg);
2007 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2008 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2009 LLT SrcTy = MRI.getType(SrcReg);
2010 MatchInfo = [=](MachineIRBuilder &B) {
2011 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2012 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2013 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2014 };
2015 return true;
2016}
2017
2019 unsigned &ShiftVal) {
2020 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2021 auto MaybeImmVal =
2022 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2023 if (!MaybeImmVal)
2024 return false;
2025
2026 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2027 return (static_cast<int32_t>(ShiftVal) != -1);
2028}
2029
2031 unsigned &ShiftVal) {
2032 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2033 MachineIRBuilder MIB(MI);
2034 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2035 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2037 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2038 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2040}
2041
2042// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2044 RegisterImmPair &MatchData) {
2045 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
2046 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2047 return false;
2048
2049 Register LHS = MI.getOperand(1).getReg();
2050
2051 Register ExtSrc;
2052 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2053 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2054 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2055 return false;
2056
2057 Register RHS = MI.getOperand(2).getReg();
2058 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2059 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2060 if (!MaybeShiftAmtVal)
2061 return false;
2062
2063 if (LI) {
2064 LLT SrcTy = MRI.getType(ExtSrc);
2065
2066 // We only really care about the legality with the shifted value. We can
2067 // pick any type the constant shift amount, so ask the target what to
2068 // use. Otherwise we would have to guess and hope it is reported as legal.
2069 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2070 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2071 return false;
2072 }
2073
2074 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2075 MatchData.Reg = ExtSrc;
2076 MatchData.Imm = ShiftAmt;
2077
2078 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2079 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2080 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2081}
2082
2084 const RegisterImmPair &MatchData) {
2085 Register ExtSrcReg = MatchData.Reg;
2086 int64_t ShiftAmtVal = MatchData.Imm;
2087
2088 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2089 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2090 auto NarrowShift =
2091 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2092 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2093 MI.eraseFromParent();
2094}
2095
2097 Register &MatchInfo) {
2098 GMerge &Merge = cast<GMerge>(MI);
2099 SmallVector<Register, 16> MergedValues;
2100 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2101 MergedValues.emplace_back(Merge.getSourceReg(I));
2102
2103 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2104 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2105 return false;
2106
2107 for (unsigned I = 0; I < MergedValues.size(); ++I)
2108 if (MergedValues[I] != Unmerge->getReg(I))
2109 return false;
2110
2111 MatchInfo = Unmerge->getSourceReg();
2112 return true;
2113}
2114
2116 const MachineRegisterInfo &MRI) {
2117 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2118 ;
2119
2120 return Reg;
2121}
2122
2125 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2126 "Expected an unmerge");
2127 auto &Unmerge = cast<GUnmerge>(MI);
2128 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2129
2130 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2131 if (!SrcInstr)
2132 return false;
2133
2134 // Check the source type of the merge.
2135 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2136 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2137 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2138 if (SrcMergeTy != Dst0Ty && !SameSize)
2139 return false;
2140 // They are the same now (modulo a bitcast).
2141 // We can collect all the src registers.
2142 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2143 Operands.push_back(SrcInstr->getSourceReg(Idx));
2144 return true;
2145}
2146
2149 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2150 "Expected an unmerge");
2151 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2152 "Not enough operands to replace all defs");
2153 unsigned NumElems = MI.getNumOperands() - 1;
2154
2155 LLT SrcTy = MRI.getType(Operands[0]);
2156 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2157 bool CanReuseInputDirectly = DstTy == SrcTy;
2158 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2159 Register DstReg = MI.getOperand(Idx).getReg();
2160 Register SrcReg = Operands[Idx];
2161
2162 // This combine may run after RegBankSelect, so we need to be aware of
2163 // register banks.
2164 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2165 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2166 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2167 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2168 }
2169
2170 if (CanReuseInputDirectly)
2171 replaceRegWith(MRI, DstReg, SrcReg);
2172 else
2173 Builder.buildCast(DstReg, SrcReg);
2174 }
2175 MI.eraseFromParent();
2176}
2177
2179 SmallVectorImpl<APInt> &Csts) {
2180 unsigned SrcIdx = MI.getNumOperands() - 1;
2181 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2182 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2183 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2184 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2185 return false;
2186 // Break down the big constant in smaller ones.
2187 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2188 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2189 ? CstVal.getCImm()->getValue()
2190 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2191
2192 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2193 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2194 // Unmerge a constant.
2195 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2196 Csts.emplace_back(Val.trunc(ShiftAmt));
2197 Val = Val.lshr(ShiftAmt);
2198 }
2199
2200 return true;
2201}
2202
2204 SmallVectorImpl<APInt> &Csts) {
2205 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2206 "Expected an unmerge");
2207 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2208 "Not enough operands to replace all defs");
2209 unsigned NumElems = MI.getNumOperands() - 1;
2210 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2211 Register DstReg = MI.getOperand(Idx).getReg();
2212 Builder.buildConstant(DstReg, Csts[Idx]);
2213 }
2214
2215 MI.eraseFromParent();
2216}
2217
2219 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2220 unsigned SrcIdx = MI.getNumOperands() - 1;
2221 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2222 MatchInfo = [&MI](MachineIRBuilder &B) {
2223 unsigned NumElems = MI.getNumOperands() - 1;
2224 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2225 Register DstReg = MI.getOperand(Idx).getReg();
2226 B.buildUndef(DstReg);
2227 }
2228 };
2229 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2230}
2231
2233 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2234 "Expected an unmerge");
2235 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2236 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2237 return false;
2238 // Check that all the lanes are dead except the first one.
2239 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2240 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2241 return false;
2242 }
2243 return true;
2244}
2245
2247 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2248 Register Dst0Reg = MI.getOperand(0).getReg();
2249 Builder.buildTrunc(Dst0Reg, SrcReg);
2250 MI.eraseFromParent();
2251}
2252
2254 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2255 "Expected an unmerge");
2256 Register Dst0Reg = MI.getOperand(0).getReg();
2257 LLT Dst0Ty = MRI.getType(Dst0Reg);
2258 // G_ZEXT on vector applies to each lane, so it will
2259 // affect all destinations. Therefore we won't be able
2260 // to simplify the unmerge to just the first definition.
2261 if (Dst0Ty.isVector())
2262 return false;
2263 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2264 LLT SrcTy = MRI.getType(SrcReg);
2265 if (SrcTy.isVector())
2266 return false;
2267
2268 Register ZExtSrcReg;
2269 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2270 return false;
2271
2272 // Finally we can replace the first definition with
2273 // a zext of the source if the definition is big enough to hold
2274 // all of ZExtSrc bits.
2275 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2276 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2277}
2278
2280 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2281 "Expected an unmerge");
2282
2283 Register Dst0Reg = MI.getOperand(0).getReg();
2284
2285 MachineInstr *ZExtInstr =
2286 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2287 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2288 "Expecting a G_ZEXT");
2289
2290 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2291 LLT Dst0Ty = MRI.getType(Dst0Reg);
2292 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2293
2294 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2295 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2296 } else {
2297 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2298 "ZExt src doesn't fit in destination");
2299 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2300 }
2301
2302 Register ZeroReg;
2303 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2304 if (!ZeroReg)
2305 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2306 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2307 }
2308 MI.eraseFromParent();
2309}
2310
2312 unsigned TargetShiftSize,
2313 unsigned &ShiftVal) {
2314 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2315 MI.getOpcode() == TargetOpcode::G_LSHR ||
2316 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2317
2318 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2319 if (Ty.isVector()) // TODO:
2320 return false;
2321
2322 // Don't narrow further than the requested size.
2323 unsigned Size = Ty.getSizeInBits();
2324 if (Size <= TargetShiftSize)
2325 return false;
2326
2327 auto MaybeImmVal =
2328 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2329 if (!MaybeImmVal)
2330 return false;
2331
2332 ShiftVal = MaybeImmVal->Value.getSExtValue();
2333 return ShiftVal >= Size / 2 && ShiftVal < Size;
2334}
2335
2337 const unsigned &ShiftVal) {
2338 Register DstReg = MI.getOperand(0).getReg();
2339 Register SrcReg = MI.getOperand(1).getReg();
2340 LLT Ty = MRI.getType(SrcReg);
2341 unsigned Size = Ty.getSizeInBits();
2342 unsigned HalfSize = Size / 2;
2343 assert(ShiftVal >= HalfSize);
2344
2345 LLT HalfTy = LLT::scalar(HalfSize);
2346
2347 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2348 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2349
2350 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2351 Register Narrowed = Unmerge.getReg(1);
2352
2353 // dst = G_LSHR s64:x, C for C >= 32
2354 // =>
2355 // lo, hi = G_UNMERGE_VALUES x
2356 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2357
2358 if (NarrowShiftAmt != 0) {
2359 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2360 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2361 }
2362
2363 auto Zero = Builder.buildConstant(HalfTy, 0);
2364 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2365 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2366 Register Narrowed = Unmerge.getReg(0);
2367 // dst = G_SHL s64:x, C for C >= 32
2368 // =>
2369 // lo, hi = G_UNMERGE_VALUES x
2370 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2371 if (NarrowShiftAmt != 0) {
2372 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2373 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2374 }
2375
2376 auto Zero = Builder.buildConstant(HalfTy, 0);
2377 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2378 } else {
2379 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2380 auto Hi = Builder.buildAShr(
2381 HalfTy, Unmerge.getReg(1),
2382 Builder.buildConstant(HalfTy, HalfSize - 1));
2383
2384 if (ShiftVal == HalfSize) {
2385 // (G_ASHR i64:x, 32) ->
2386 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2387 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2388 } else if (ShiftVal == Size - 1) {
2389 // Don't need a second shift.
2390 // (G_ASHR i64:x, 63) ->
2391 // %narrowed = (G_ASHR hi_32(x), 31)
2392 // G_MERGE_VALUES %narrowed, %narrowed
2393 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2394 } else {
2395 auto Lo = Builder.buildAShr(
2396 HalfTy, Unmerge.getReg(1),
2397 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2398
2399 // (G_ASHR i64:x, C) ->, for C >= 32
2400 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2401 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2402 }
2403 }
2404
2405 MI.eraseFromParent();
2406}
2407
2409 unsigned TargetShiftAmount) {
2410 unsigned ShiftAmt;
2411 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2412 applyCombineShiftToUnmerge(MI, ShiftAmt);
2413 return true;
2414 }
2415
2416 return false;
2417}
2418
2420 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2421 Register DstReg = MI.getOperand(0).getReg();
2422 LLT DstTy = MRI.getType(DstReg);
2423 Register SrcReg = MI.getOperand(1).getReg();
2424 return mi_match(SrcReg, MRI,
2425 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2426}
2427
2429 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2430 Register DstReg = MI.getOperand(0).getReg();
2431 Builder.buildCopy(DstReg, Reg);
2432 MI.eraseFromParent();
2433}
2434
2436 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2437 Register DstReg = MI.getOperand(0).getReg();
2438 Builder.buildZExtOrTrunc(DstReg, Reg);
2439 MI.eraseFromParent();
2440}
2441
2443 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2444 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2445 Register LHS = MI.getOperand(1).getReg();
2446 Register RHS = MI.getOperand(2).getReg();
2447 LLT IntTy = MRI.getType(LHS);
2448
2449 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2450 // instruction.
2451 PtrReg.second = false;
2452 for (Register SrcReg : {LHS, RHS}) {
2453 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2454 // Don't handle cases where the integer is implicitly converted to the
2455 // pointer width.
2456 LLT PtrTy = MRI.getType(PtrReg.first);
2457 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2458 return true;
2459 }
2460
2461 PtrReg.second = true;
2462 }
2463
2464 return false;
2465}
2466
2468 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2469 Register Dst = MI.getOperand(0).getReg();
2470 Register LHS = MI.getOperand(1).getReg();
2471 Register RHS = MI.getOperand(2).getReg();
2472
2473 const bool DoCommute = PtrReg.second;
2474 if (DoCommute)
2475 std::swap(LHS, RHS);
2476 LHS = PtrReg.first;
2477
2478 LLT PtrTy = MRI.getType(LHS);
2479
2480 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2481 Builder.buildPtrToInt(Dst, PtrAdd);
2482 MI.eraseFromParent();
2483}
2484
2486 APInt &NewCst) {
2487 auto &PtrAdd = cast<GPtrAdd>(MI);
2488 Register LHS = PtrAdd.getBaseReg();
2489 Register RHS = PtrAdd.getOffsetReg();
2491
2492 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2493 APInt Cst;
2494 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2495 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2496 // G_INTTOPTR uses zero-extension
2497 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2498 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2499 return true;
2500 }
2501 }
2502
2503 return false;
2504}
2505
2507 APInt &NewCst) {
2508 auto &PtrAdd = cast<GPtrAdd>(MI);
2509 Register Dst = PtrAdd.getReg(0);
2510
2511 Builder.buildConstant(Dst, NewCst);
2512 PtrAdd.eraseFromParent();
2513}
2514
2516 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2517 Register DstReg = MI.getOperand(0).getReg();
2518 Register SrcReg = MI.getOperand(1).getReg();
2519 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2520 if (OriginalSrcReg.isValid())
2521 SrcReg = OriginalSrcReg;
2522 LLT DstTy = MRI.getType(DstReg);
2523 return mi_match(SrcReg, MRI,
2524 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2525}
2526
2528 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2529 Register DstReg = MI.getOperand(0).getReg();
2530 Register SrcReg = MI.getOperand(1).getReg();
2531 LLT DstTy = MRI.getType(DstReg);
2532 if (mi_match(SrcReg, MRI,
2533 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2534 unsigned DstSize = DstTy.getScalarSizeInBits();
2535 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2536 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2537 }
2538 return false;
2539}
2540
2542 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2543 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2544
2545 // ShiftTy > 32 > TruncTy -> 32
2546 if (ShiftSize > 32 && TruncSize < 32)
2547 return ShiftTy.changeElementSize(32);
2548
2549 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2550 // Some targets like it, some don't, some only like it under certain
2551 // conditions/processor versions, etc.
2552 // A TL hook might be needed for this.
2553
2554 // Don't combine
2555 return ShiftTy;
2556}
2557
2559 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2560 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2561 Register DstReg = MI.getOperand(0).getReg();
2562 Register SrcReg = MI.getOperand(1).getReg();
2563
2564 if (!MRI.hasOneNonDBGUse(SrcReg))
2565 return false;
2566
2567 LLT SrcTy = MRI.getType(SrcReg);
2568 LLT DstTy = MRI.getType(DstReg);
2569
2570 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2571 const auto &TL = getTargetLowering();
2572
2573 LLT NewShiftTy;
2574 switch (SrcMI->getOpcode()) {
2575 default:
2576 return false;
2577 case TargetOpcode::G_SHL: {
2578 NewShiftTy = DstTy;
2579
2580 // Make sure new shift amount is legal.
2581 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2582 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2583 return false;
2584 break;
2585 }
2586 case TargetOpcode::G_LSHR:
2587 case TargetOpcode::G_ASHR: {
2588 // For right shifts, we conservatively do not do the transform if the TRUNC
2589 // has any STORE users. The reason is that if we change the type of the
2590 // shift, we may break the truncstore combine.
2591 //
2592 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2593 for (auto &User : MRI.use_instructions(DstReg))
2594 if (User.getOpcode() == TargetOpcode::G_STORE)
2595 return false;
2596
2597 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2598 if (NewShiftTy == SrcTy)
2599 return false;
2600
2601 // Make sure we won't lose information by truncating the high bits.
2602 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2603 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2604 DstTy.getScalarSizeInBits()))
2605 return false;
2606 break;
2607 }
2608 }
2609
2611 {SrcMI->getOpcode(),
2612 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2613 return false;
2614
2615 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2616 return true;
2617}
2618
2620 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2621 MachineInstr *ShiftMI = MatchInfo.first;
2622 LLT NewShiftTy = MatchInfo.second;
2623
2624 Register Dst = MI.getOperand(0).getReg();
2625 LLT DstTy = MRI.getType(Dst);
2626
2627 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2628 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2629 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2630
2631 Register NewShift =
2632 Builder
2633 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2634 .getReg(0);
2635
2636 if (NewShiftTy == DstTy)
2637 replaceRegWith(MRI, Dst, NewShift);
2638 else
2639 Builder.buildTrunc(Dst, NewShift);
2640
2641 eraseInst(MI);
2642}
2643
2645 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2646 return MO.isReg() &&
2647 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2648 });
2649}
2650
2652 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2653 return !MO.isReg() ||
2654 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2655 });
2656}
2657
2659 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2660 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2661 return all_of(Mask, [](int Elt) { return Elt < 0; });
2662}
2663
2665 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2666 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2667 MRI);
2668}
2669
2671 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2672 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2673 MRI);
2674}
2675
2677 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2678 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2679 "Expected an insert/extract element op");
2680 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2681 unsigned IdxIdx =
2682 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2683 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2684 if (!Idx)
2685 return false;
2686 return Idx->getZExtValue() >= VecTy.getNumElements();
2687}
2688
2690 GSelect &SelMI = cast<GSelect>(MI);
2691 auto Cst =
2693 if (!Cst)
2694 return false;
2695 OpIdx = Cst->isZero() ? 3 : 2;
2696 return true;
2697}
2698
2699void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2700
2702 const MachineOperand &MOP2) {
2703 if (!MOP1.isReg() || !MOP2.isReg())
2704 return false;
2705 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2706 if (!InstAndDef1)
2707 return false;
2708 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2709 if (!InstAndDef2)
2710 return false;
2711 MachineInstr *I1 = InstAndDef1->MI;
2712 MachineInstr *I2 = InstAndDef2->MI;
2713
2714 // Handle a case like this:
2715 //
2716 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2717 //
2718 // Even though %0 and %1 are produced by the same instruction they are not
2719 // the same values.
2720 if (I1 == I2)
2721 return MOP1.getReg() == MOP2.getReg();
2722
2723 // If we have an instruction which loads or stores, we can't guarantee that
2724 // it is identical.
2725 //
2726 // For example, we may have
2727 //
2728 // %x1 = G_LOAD %addr (load N from @somewhere)
2729 // ...
2730 // call @foo
2731 // ...
2732 // %x2 = G_LOAD %addr (load N from @somewhere)
2733 // ...
2734 // %or = G_OR %x1, %x2
2735 //
2736 // It's possible that @foo will modify whatever lives at the address we're
2737 // loading from. To be safe, let's just assume that all loads and stores
2738 // are different (unless we have something which is guaranteed to not
2739 // change.)
2740 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2741 return false;
2742
2743 // If both instructions are loads or stores, they are equal only if both
2744 // are dereferenceable invariant loads with the same number of bits.
2745 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2746 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2747 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2748 if (!LS1 || !LS2)
2749 return false;
2750
2751 if (!I2->isDereferenceableInvariantLoad() ||
2752 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2753 return false;
2754 }
2755
2756 // Check for physical registers on the instructions first to avoid cases
2757 // like this:
2758 //
2759 // %a = COPY $physreg
2760 // ...
2761 // SOMETHING implicit-def $physreg
2762 // ...
2763 // %b = COPY $physreg
2764 //
2765 // These copies are not equivalent.
2766 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2767 return MO.isReg() && MO.getReg().isPhysical();
2768 })) {
2769 // Check if we have a case like this:
2770 //
2771 // %a = COPY $physreg
2772 // %b = COPY %a
2773 //
2774 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2775 // From that, we know that they must have the same value, since they must
2776 // have come from the same COPY.
2777 return I1->isIdenticalTo(*I2);
2778 }
2779
2780 // We don't have any physical registers, so we don't necessarily need the
2781 // same vreg defs.
2782 //
2783 // On the off-chance that there's some target instruction feeding into the
2784 // instruction, let's use produceSameValue instead of isIdenticalTo.
2785 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2786 // Handle instructions with multiple defs that produce same values. Values
2787 // are same for operands with same index.
2788 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2789 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2790 // I1 and I2 are different instructions but produce same values,
2791 // %1 and %6 are same, %1 and %7 are not the same value.
2792 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2793 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2794 }
2795 return false;
2796}
2797
2799 if (!MOP.isReg())
2800 return false;
2801 auto *MI = MRI.getVRegDef(MOP.getReg());
2802 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2803 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2804 MaybeCst->getSExtValue() == C;
2805}
2806
2808 if (!MOP.isReg())
2809 return false;
2810 std::optional<FPValueAndVReg> MaybeCst;
2811 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2812 return false;
2813
2814 return MaybeCst->Value.isExactlyValue(C);
2815}
2816
2818 unsigned OpIdx) {
2819 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2820 Register OldReg = MI.getOperand(0).getReg();
2821 Register Replacement = MI.getOperand(OpIdx).getReg();
2822 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2823 MI.eraseFromParent();
2824 replaceRegWith(MRI, OldReg, Replacement);
2825}
2826
2828 Register Replacement) {
2829 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2830 Register OldReg = MI.getOperand(0).getReg();
2831 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2832 MI.eraseFromParent();
2833 replaceRegWith(MRI, OldReg, Replacement);
2834}
2835
2837 unsigned ConstIdx) {
2838 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2839 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2840
2841 // Get the shift amount
2842 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2843 if (!VRegAndVal)
2844 return false;
2845
2846 // Return true of shift amount >= Bitwidth
2847 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2848}
2849
2851 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2852 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2853 "This is not a funnel shift operation");
2854
2855 Register ConstReg = MI.getOperand(3).getReg();
2856 LLT ConstTy = MRI.getType(ConstReg);
2857 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2858
2859 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2860 assert((VRegAndVal) && "Value is not a constant");
2861
2862 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2863 APInt NewConst = VRegAndVal->Value.urem(
2864 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2865
2866 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2868 MI.getOpcode(), {MI.getOperand(0)},
2869 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2870
2871 MI.eraseFromParent();
2872}
2873
2875 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2876 // Match (cond ? x : x)
2877 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2878 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2879 MRI);
2880}
2881
2883 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2884 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2885 MRI);
2886}
2887
2889 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2890 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2891 MRI);
2892}
2893
2895 MachineOperand &MO = MI.getOperand(OpIdx);
2896 return MO.isReg() &&
2897 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2898}
2899
2901 unsigned OpIdx) {
2902 MachineOperand &MO = MI.getOperand(OpIdx);
2903 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2904}
2905
2907 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2908 Builder.buildFConstant(MI.getOperand(0), C);
2909 MI.eraseFromParent();
2910}
2911
2913 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2914 Builder.buildConstant(MI.getOperand(0), C);
2915 MI.eraseFromParent();
2916}
2917
2919 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2920 Builder.buildConstant(MI.getOperand(0), C);
2921 MI.eraseFromParent();
2922}
2923
2925 ConstantFP *CFP) {
2926 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2927 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2928 MI.eraseFromParent();
2929}
2930
2932 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2933 Builder.buildUndef(MI.getOperand(0));
2934 MI.eraseFromParent();
2935}
2936
2938 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2939 Register LHS = MI.getOperand(1).getReg();
2940 Register RHS = MI.getOperand(2).getReg();
2941 Register &NewLHS = std::get<0>(MatchInfo);
2942 Register &NewRHS = std::get<1>(MatchInfo);
2943
2944 // Helper lambda to check for opportunities for
2945 // ((0-A) + B) -> B - A
2946 // (A + (0-B)) -> A - B
2947 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2948 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2949 return false;
2950 NewLHS = MaybeNewLHS;
2951 return true;
2952 };
2953
2954 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2955}
2956
2959 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2960 "Invalid opcode");
2961 Register DstReg = MI.getOperand(0).getReg();
2962 LLT DstTy = MRI.getType(DstReg);
2963 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2964 unsigned NumElts = DstTy.getNumElements();
2965 // If this MI is part of a sequence of insert_vec_elts, then
2966 // don't do the combine in the middle of the sequence.
2967 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2968 TargetOpcode::G_INSERT_VECTOR_ELT)
2969 return false;
2970 MachineInstr *CurrInst = &MI;
2971 MachineInstr *TmpInst;
2972 int64_t IntImm;
2973 Register TmpReg;
2974 MatchInfo.resize(NumElts);
2975 while (mi_match(
2976 CurrInst->getOperand(0).getReg(), MRI,
2977 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2978 if (IntImm >= NumElts || IntImm < 0)
2979 return false;
2980 if (!MatchInfo[IntImm])
2981 MatchInfo[IntImm] = TmpReg;
2982 CurrInst = TmpInst;
2983 }
2984 // Variable index.
2985 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2986 return false;
2987 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2988 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2989 if (!MatchInfo[I - 1].isValid())
2990 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2991 }
2992 return true;
2993 }
2994 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
2995 // overwritten, bail out.
2996 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
2997 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
2998}
2999
3002 Register UndefReg;
3003 auto GetUndef = [&]() {
3004 if (UndefReg)
3005 return UndefReg;
3006 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3007 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3008 return UndefReg;
3009 };
3010 for (Register &Reg : MatchInfo) {
3011 if (!Reg)
3012 Reg = GetUndef();
3013 }
3014 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3015 MI.eraseFromParent();
3016}
3017
3019 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3020 Register SubLHS, SubRHS;
3021 std::tie(SubLHS, SubRHS) = MatchInfo;
3022 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3023 MI.eraseFromParent();
3024}
3025
3028 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3029 //
3030 // Creates the new hand + logic instruction (but does not insert them.)
3031 //
3032 // On success, MatchInfo is populated with the new instructions. These are
3033 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3034 unsigned LogicOpcode = MI.getOpcode();
3035 assert(LogicOpcode == TargetOpcode::G_AND ||
3036 LogicOpcode == TargetOpcode::G_OR ||
3037 LogicOpcode == TargetOpcode::G_XOR);
3038 MachineIRBuilder MIB(MI);
3039 Register Dst = MI.getOperand(0).getReg();
3040 Register LHSReg = MI.getOperand(1).getReg();
3041 Register RHSReg = MI.getOperand(2).getReg();
3042
3043 // Don't recompute anything.
3044 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3045 return false;
3046
3047 // Make sure we have (hand x, ...), (hand y, ...)
3048 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3049 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3050 if (!LeftHandInst || !RightHandInst)
3051 return false;
3052 unsigned HandOpcode = LeftHandInst->getOpcode();
3053 if (HandOpcode != RightHandInst->getOpcode())
3054 return false;
3055 if (!LeftHandInst->getOperand(1).isReg() ||
3056 !RightHandInst->getOperand(1).isReg())
3057 return false;
3058
3059 // Make sure the types match up, and if we're doing this post-legalization,
3060 // we end up with legal types.
3061 Register X = LeftHandInst->getOperand(1).getReg();
3062 Register Y = RightHandInst->getOperand(1).getReg();
3063 LLT XTy = MRI.getType(X);
3064 LLT YTy = MRI.getType(Y);
3065 if (!XTy.isValid() || XTy != YTy)
3066 return false;
3067
3068 // Optional extra source register.
3069 Register ExtraHandOpSrcReg;
3070 switch (HandOpcode) {
3071 default:
3072 return false;
3073 case TargetOpcode::G_ANYEXT:
3074 case TargetOpcode::G_SEXT:
3075 case TargetOpcode::G_ZEXT: {
3076 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3077 break;
3078 }
3079 case TargetOpcode::G_TRUNC: {
3080 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3081 const MachineFunction *MF = MI.getMF();
3082 const DataLayout &DL = MF->getDataLayout();
3083 LLVMContext &Ctx = MF->getFunction().getContext();
3084
3085 LLT DstTy = MRI.getType(Dst);
3086 const TargetLowering &TLI = getTargetLowering();
3087
3088 // Be extra careful sinking truncate. If it's free, there's no benefit in
3089 // widening a binop.
3090 if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) &&
3091 TLI.isTruncateFree(XTy, DstTy, DL, Ctx))
3092 return false;
3093 break;
3094 }
3095 case TargetOpcode::G_AND:
3096 case TargetOpcode::G_ASHR:
3097 case TargetOpcode::G_LSHR:
3098 case TargetOpcode::G_SHL: {
3099 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3100 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3101 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3102 return false;
3103 ExtraHandOpSrcReg = ZOp.getReg();
3104 break;
3105 }
3106 }
3107
3108 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3109 return false;
3110
3111 // Record the steps to build the new instructions.
3112 //
3113 // Steps to build (logic x, y)
3114 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3115 OperandBuildSteps LogicBuildSteps = {
3116 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3117 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3118 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3119 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3120
3121 // Steps to build hand (logic x, y), ...z
3122 OperandBuildSteps HandBuildSteps = {
3123 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3124 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3125 if (ExtraHandOpSrcReg.isValid())
3126 HandBuildSteps.push_back(
3127 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3128 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3129
3130 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3131 return true;
3132}
3133
3136 assert(MatchInfo.InstrsToBuild.size() &&
3137 "Expected at least one instr to build?");
3138 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3139 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3140 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3141 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3142 for (auto &OperandFn : InstrToBuild.OperandFns)
3143 OperandFn(Instr);
3144 }
3145 MI.eraseFromParent();
3146}
3147
3149 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3150 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3151 int64_t ShlCst, AshrCst;
3152 Register Src;
3153 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3154 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3155 m_ICstOrSplat(AshrCst))))
3156 return false;
3157 if (ShlCst != AshrCst)
3158 return false;
3160 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3161 return false;
3162 MatchInfo = std::make_tuple(Src, ShlCst);
3163 return true;
3164}
3165
3167 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3168 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3169 Register Src;
3170 int64_t ShiftAmt;
3171 std::tie(Src, ShiftAmt) = MatchInfo;
3172 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3173 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3174 MI.eraseFromParent();
3175}
3176
3177/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3179 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3180 assert(MI.getOpcode() == TargetOpcode::G_AND);
3181
3182 Register Dst = MI.getOperand(0).getReg();
3183 LLT Ty = MRI.getType(Dst);
3184
3185 Register R;
3186 int64_t C1;
3187 int64_t C2;
3188 if (!mi_match(
3189 Dst, MRI,
3190 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3191 return false;
3192
3193 MatchInfo = [=](MachineIRBuilder &B) {
3194 if (C1 & C2) {
3195 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3196 return;
3197 }
3198 auto Zero = B.buildConstant(Ty, 0);
3199 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3200 };
3201 return true;
3202}
3203
3205 Register &Replacement) {
3206 // Given
3207 //
3208 // %y:_(sN) = G_SOMETHING
3209 // %x:_(sN) = G_SOMETHING
3210 // %res:_(sN) = G_AND %x, %y
3211 //
3212 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3213 //
3214 // Patterns like this can appear as a result of legalization. E.g.
3215 //
3216 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3217 // %one:_(s32) = G_CONSTANT i32 1
3218 // %and:_(s32) = G_AND %cmp, %one
3219 //
3220 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3221 assert(MI.getOpcode() == TargetOpcode::G_AND);
3222 if (!KB)
3223 return false;
3224
3225 Register AndDst = MI.getOperand(0).getReg();
3226 Register LHS = MI.getOperand(1).getReg();
3227 Register RHS = MI.getOperand(2).getReg();
3228
3229 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3230 // we can't do anything. If we do, then it depends on whether we have
3231 // KnownBits on the LHS.
3232 KnownBits RHSBits = KB->getKnownBits(RHS);
3233 if (RHSBits.isUnknown())
3234 return false;
3235
3236 KnownBits LHSBits = KB->getKnownBits(LHS);
3237
3238 // Check that x & Mask == x.
3239 // x & 1 == x, always
3240 // x & 0 == x, only if x is also 0
3241 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3242 //
3243 // Check if we can replace AndDst with the LHS of the G_AND
3244 if (canReplaceReg(AndDst, LHS, MRI) &&
3245 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3246 Replacement = LHS;
3247 return true;
3248 }
3249
3250 // Check if we can replace AndDst with the RHS of the G_AND
3251 if (canReplaceReg(AndDst, RHS, MRI) &&
3252 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3253 Replacement = RHS;
3254 return true;
3255 }
3256
3257 return false;
3258}
3259
3261 // Given
3262 //
3263 // %y:_(sN) = G_SOMETHING
3264 // %x:_(sN) = G_SOMETHING
3265 // %res:_(sN) = G_OR %x, %y
3266 //
3267 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3268 assert(MI.getOpcode() == TargetOpcode::G_OR);
3269 if (!KB)
3270 return false;
3271
3272 Register OrDst = MI.getOperand(0).getReg();
3273 Register LHS = MI.getOperand(1).getReg();
3274 Register RHS = MI.getOperand(2).getReg();
3275
3276 KnownBits LHSBits = KB->getKnownBits(LHS);
3277 KnownBits RHSBits = KB->getKnownBits(RHS);
3278
3279 // Check that x | Mask == x.
3280 // x | 0 == x, always
3281 // x | 1 == x, only if x is also 1
3282 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3283 //
3284 // Check if we can replace OrDst with the LHS of the G_OR
3285 if (canReplaceReg(OrDst, LHS, MRI) &&
3286 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3287 Replacement = LHS;
3288 return true;
3289 }
3290
3291 // Check if we can replace OrDst with the RHS of the G_OR
3292 if (canReplaceReg(OrDst, RHS, MRI) &&
3293 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3294 Replacement = RHS;
3295 return true;
3296 }
3297
3298 return false;
3299}
3300
3302 // If the input is already sign extended, just drop the extension.
3303 Register Src = MI.getOperand(1).getReg();
3304 unsigned ExtBits = MI.getOperand(2).getImm();
3305 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3306 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3307}
3308
3309static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3310 int64_t Cst, bool IsVector, bool IsFP) {
3311 // For i1, Cst will always be -1 regardless of boolean contents.
3312 return (ScalarSizeBits == 1 && Cst == -1) ||
3313 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3314}
3315
3317 SmallVectorImpl<Register> &RegsToNegate) {
3318 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3319 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3320 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3321 Register XorSrc;
3322 Register CstReg;
3323 // We match xor(src, true) here.
3324 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3325 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3326 return false;
3327
3328 if (!MRI.hasOneNonDBGUse(XorSrc))
3329 return false;
3330
3331 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3332 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3333 // list of tree nodes to visit.
3334 RegsToNegate.push_back(XorSrc);
3335 // Remember whether the comparisons are all integer or all floating point.
3336 bool IsInt = false;
3337 bool IsFP = false;
3338 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3339 Register Reg = RegsToNegate[I];
3340 if (!MRI.hasOneNonDBGUse(Reg))
3341 return false;
3342 MachineInstr *Def = MRI.getVRegDef(Reg);
3343 switch (Def->getOpcode()) {
3344 default:
3345 // Don't match if the tree contains anything other than ANDs, ORs and
3346 // comparisons.
3347 return false;
3348 case TargetOpcode::G_ICMP:
3349 if (IsFP)
3350 return false;
3351 IsInt = true;
3352 // When we apply the combine we will invert the predicate.
3353 break;
3354 case TargetOpcode::G_FCMP:
3355 if (IsInt)
3356 return false;
3357 IsFP = true;
3358 // When we apply the combine we will invert the predicate.
3359 break;
3360 case TargetOpcode::G_AND:
3361 case TargetOpcode::G_OR:
3362 // Implement De Morgan's laws:
3363 // ~(x & y) -> ~x | ~y
3364 // ~(x | y) -> ~x & ~y
3365 // When we apply the combine we will change the opcode and recursively
3366 // negate the operands.
3367 RegsToNegate.push_back(Def->getOperand(1).getReg());
3368 RegsToNegate.push_back(Def->getOperand(2).getReg());
3369 break;
3370 }
3371 }
3372
3373 // Now we know whether the comparisons are integer or floating point, check
3374 // the constant in the xor.
3375 int64_t Cst;
3376 if (Ty.isVector()) {
3377 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3378 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3379 if (!MaybeCst)
3380 return false;
3381 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3382 return false;
3383 } else {
3384 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3385 return false;
3386 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3387 return false;
3388 }
3389
3390 return true;
3391}
3392
3394 SmallVectorImpl<Register> &RegsToNegate) {
3395 for (Register Reg : RegsToNegate) {
3396 MachineInstr *Def = MRI.getVRegDef(Reg);
3397 Observer.changingInstr(*Def);
3398 // For each comparison, invert the opcode. For each AND and OR, change the
3399 // opcode.
3400 switch (Def->getOpcode()) {
3401 default:
3402 llvm_unreachable("Unexpected opcode");
3403 case TargetOpcode::G_ICMP:
3404 case TargetOpcode::G_FCMP: {
3405 MachineOperand &PredOp = Def->getOperand(1);
3408 PredOp.setPredicate(NewP);
3409 break;
3410 }
3411 case TargetOpcode::G_AND:
3412 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3413 break;
3414 case TargetOpcode::G_OR:
3415 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3416 break;
3417 }
3418 Observer.changedInstr(*Def);
3419 }
3420
3421 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3422 MI.eraseFromParent();
3423}
3424
3426 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3427 // Match (xor (and x, y), y) (or any of its commuted cases)
3428 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3429 Register &X = MatchInfo.first;
3430 Register &Y = MatchInfo.second;
3431 Register AndReg = MI.getOperand(1).getReg();
3432 Register SharedReg = MI.getOperand(2).getReg();
3433
3434 // Find a G_AND on either side of the G_XOR.
3435 // Look for one of
3436 //
3437 // (xor (and x, y), SharedReg)
3438 // (xor SharedReg, (and x, y))
3439 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3440 std::swap(AndReg, SharedReg);
3441 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3442 return false;
3443 }
3444
3445 // Only do this if we'll eliminate the G_AND.
3446 if (!MRI.hasOneNonDBGUse(AndReg))
3447 return false;
3448
3449 // We can combine if SharedReg is the same as either the LHS or RHS of the
3450 // G_AND.
3451 if (Y != SharedReg)
3452 std::swap(X, Y);
3453 return Y == SharedReg;
3454}
3455
3457 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3458 // Fold (xor (and x, y), y) -> (and (not x), y)
3459 Register X, Y;
3460 std::tie(X, Y) = MatchInfo;
3461 auto Not = Builder.buildNot(MRI.getType(X), X);
3463 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3464 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3465 MI.getOperand(2).setReg(Y);
3467}
3468
3470 auto &PtrAdd = cast<GPtrAdd>(MI);
3471 Register DstReg = PtrAdd.getReg(0);
3472 LLT Ty = MRI.getType(DstReg);
3474
3475 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3476 return false;
3477
3478 if (Ty.isPointer()) {
3479 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3480 return ConstVal && *ConstVal == 0;
3481 }
3482
3483 assert(Ty.isVector() && "Expecting a vector type");
3484 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3485 return isBuildVectorAllZeros(*VecMI, MRI);
3486}
3487
3489 auto &PtrAdd = cast<GPtrAdd>(MI);
3490 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3491 PtrAdd.eraseFromParent();
3492}
3493
3494/// The second source operand is known to be a power of 2.
3496 Register DstReg = MI.getOperand(0).getReg();
3497 Register Src0 = MI.getOperand(1).getReg();
3498 Register Pow2Src1 = MI.getOperand(2).getReg();
3499 LLT Ty = MRI.getType(DstReg);
3500
3501 // Fold (urem x, pow2) -> (and x, pow2-1)
3502 auto NegOne = Builder.buildConstant(Ty, -1);
3503 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3504 Builder.buildAnd(DstReg, Src0, Add);
3505 MI.eraseFromParent();
3506}
3507
3509 unsigned &SelectOpNo) {
3510 Register LHS = MI.getOperand(1).getReg();
3511 Register RHS = MI.getOperand(2).getReg();
3512
3513 Register OtherOperandReg = RHS;
3514 SelectOpNo = 1;
3516
3517 // Don't do this unless the old select is going away. We want to eliminate the
3518 // binary operator, not replace a binop with a select.
3519 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3521 OtherOperandReg = LHS;
3522 SelectOpNo = 2;
3524 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3526 return false;
3527 }
3528
3529 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3530 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3531
3532 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3533 /*AllowFP*/ true,
3534 /*AllowOpaqueConstants*/ false))
3535 return false;
3536 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3537 /*AllowFP*/ true,
3538 /*AllowOpaqueConstants*/ false))
3539 return false;
3540
3541 unsigned BinOpcode = MI.getOpcode();
3542
3543 // We know that one of the operands is a select of constants. Now verify that
3544 // the other binary operator operand is either a constant, or we can handle a
3545 // variable.
3546 bool CanFoldNonConst =
3547 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3548 (isNullOrNullSplat(*SelectLHS, MRI) ||
3549 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3550 (isNullOrNullSplat(*SelectRHS, MRI) ||
3551 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3552 if (CanFoldNonConst)
3553 return true;
3554
3555 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3556 /*AllowFP*/ true,
3557 /*AllowOpaqueConstants*/ false);
3558}
3559
3560/// \p SelectOperand is the operand in binary operator \p MI that is the select
3561/// to fold.
3563 const unsigned &SelectOperand) {
3564 Register Dst = MI.getOperand(0).getReg();
3565 Register LHS = MI.getOperand(1).getReg();
3566 Register RHS = MI.getOperand(2).getReg();
3567 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3568
3569 Register SelectCond = Select->getOperand(1).getReg();
3570 Register SelectTrue = Select->getOperand(2).getReg();
3571 Register SelectFalse = Select->getOperand(3).getReg();
3572
3573 LLT Ty = MRI.getType(Dst);
3574 unsigned BinOpcode = MI.getOpcode();
3575
3576 Register FoldTrue, FoldFalse;
3577
3578 // We have a select-of-constants followed by a binary operator with a
3579 // constant. Eliminate the binop by pulling the constant math into the select.
3580 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3581 if (SelectOperand == 1) {
3582 // TODO: SelectionDAG verifies this actually constant folds before
3583 // committing to the combine.
3584
3585 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3586 FoldFalse =
3587 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3588 } else {
3589 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3590 FoldFalse =
3591 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3592 }
3593
3594 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3595 MI.eraseFromParent();
3596}
3597
3598std::optional<SmallVector<Register, 8>>
3599CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3600 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3601 // We want to detect if Root is part of a tree which represents a bunch
3602 // of loads being merged into a larger load. We'll try to recognize patterns
3603 // like, for example:
3604 //
3605 // Reg Reg
3606 // \ /
3607 // OR_1 Reg
3608 // \ /
3609 // OR_2
3610 // \ Reg
3611 // .. /
3612 // Root
3613 //
3614 // Reg Reg Reg Reg
3615 // \ / \ /
3616 // OR_1 OR_2
3617 // \ /
3618 // \ /
3619 // ...
3620 // Root
3621 //
3622 // Each "Reg" may have been produced by a load + some arithmetic. This
3623 // function will save each of them.
3624 SmallVector<Register, 8> RegsToVisit;
3626
3627 // In the "worst" case, we're dealing with a load for each byte. So, there
3628 // are at most #bytes - 1 ORs.
3629 const unsigned MaxIter =
3630 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3631 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3632 if (Ors.empty())
3633 break;
3634 const MachineInstr *Curr = Ors.pop_back_val();
3635 Register OrLHS = Curr->getOperand(1).getReg();
3636 Register OrRHS = Curr->getOperand(2).getReg();
3637
3638 // In the combine, we want to elimate the entire tree.
3639 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3640 return std::nullopt;
3641
3642 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3643 // something that may be a load + arithmetic.
3644 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3645 Ors.push_back(Or);
3646 else
3647 RegsToVisit.push_back(OrLHS);
3648 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3649 Ors.push_back(Or);
3650 else
3651 RegsToVisit.push_back(OrRHS);
3652 }
3653
3654 // We're going to try and merge each register into a wider power-of-2 type,
3655 // so we ought to have an even number of registers.
3656 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3657 return std::nullopt;
3658 return RegsToVisit;
3659}
3660
3661/// Helper function for findLoadOffsetsForLoadOrCombine.
3662///
3663/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3664/// and then moving that value into a specific byte offset.
3665///
3666/// e.g. x[i] << 24
3667///
3668/// \returns The load instruction and the byte offset it is moved into.
3669static std::optional<std::pair<GZExtLoad *, int64_t>>
3670matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3671 const MachineRegisterInfo &MRI) {
3672 assert(MRI.hasOneNonDBGUse(Reg) &&
3673 "Expected Reg to only have one non-debug use?");
3674 Register MaybeLoad;
3675 int64_t Shift;
3676 if (!mi_match(Reg, MRI,
3677 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3678 Shift = 0;
3679 MaybeLoad = Reg;
3680 }
3681
3682 if (Shift % MemSizeInBits != 0)
3683 return std::nullopt;
3684
3685 // TODO: Handle other types of loads.
3686 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3687 if (!Load)
3688 return std::nullopt;
3689
3690 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3691 return std::nullopt;
3692
3693 return std::make_pair(Load, Shift / MemSizeInBits);
3694}
3695
3696std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3697CombinerHelper::findLoadOffsetsForLoadOrCombine(
3699 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3700
3701 // Each load found for the pattern. There should be one for each RegsToVisit.
3703
3704 // The lowest index used in any load. (The lowest "i" for each x[i].)
3705 int64_t LowestIdx = INT64_MAX;
3706
3707 // The load which uses the lowest index.
3708 GZExtLoad *LowestIdxLoad = nullptr;
3709
3710 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3711 SmallSet<int64_t, 8> SeenIdx;
3712
3713 // Ensure each load is in the same MBB.
3714 // TODO: Support multiple MachineBasicBlocks.
3715 MachineBasicBlock *MBB = nullptr;
3716 const MachineMemOperand *MMO = nullptr;
3717
3718 // Earliest instruction-order load in the pattern.
3719 GZExtLoad *EarliestLoad = nullptr;
3720
3721 // Latest instruction-order load in the pattern.
3722 GZExtLoad *LatestLoad = nullptr;
3723
3724 // Base pointer which every load should share.
3726
3727 // We want to find a load for each register. Each load should have some
3728 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3729 // track of the load which uses the lowest index. Later, we will check if we
3730 // can use its pointer in the final, combined load.
3731 for (auto Reg : RegsToVisit) {
3732 // Find the load, and find the position that it will end up in (e.g. a
3733 // shifted) value.
3734 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3735 if (!LoadAndPos)
3736 return std::nullopt;
3737 GZExtLoad *Load;
3738 int64_t DstPos;
3739 std::tie(Load, DstPos) = *LoadAndPos;
3740
3741 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3742 // it is difficult to check for stores/calls/etc between loads.
3743 MachineBasicBlock *LoadMBB = Load->getParent();
3744 if (!MBB)
3745 MBB = LoadMBB;
3746 if (LoadMBB != MBB)
3747 return std::nullopt;
3748
3749 // Make sure that the MachineMemOperands of every seen load are compatible.
3750 auto &LoadMMO = Load->getMMO();
3751 if (!MMO)
3752 MMO = &LoadMMO;
3753 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3754 return std::nullopt;
3755
3756 // Find out what the base pointer and index for the load is.
3757 Register LoadPtr;
3758 int64_t Idx;
3759 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3760 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3761 LoadPtr = Load->getOperand(1).getReg();
3762 Idx = 0;
3763 }
3764
3765 // Don't combine things like a[i], a[i] -> a bigger load.
3766 if (!SeenIdx.insert(Idx).second)
3767 return std::nullopt;
3768
3769 // Every load must share the same base pointer; don't combine things like:
3770 //
3771 // a[i], b[i + 1] -> a bigger load.
3772 if (!BasePtr.isValid())
3773 BasePtr = LoadPtr;
3774 if (BasePtr != LoadPtr)
3775 return std::nullopt;
3776
3777 if (Idx < LowestIdx) {
3778 LowestIdx = Idx;
3779 LowestIdxLoad = Load;
3780 }
3781
3782 // Keep track of the byte offset that this load ends up at. If we have seen
3783 // the byte offset, then stop here. We do not want to combine:
3784 //
3785 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3786 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3787 return std::nullopt;
3788 Loads.insert(Load);
3789
3790 // Keep track of the position of the earliest/latest loads in the pattern.
3791 // We will check that there are no load fold barriers between them later
3792 // on.
3793 //
3794 // FIXME: Is there a better way to check for load fold barriers?
3795 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3796 EarliestLoad = Load;
3797 if (!LatestLoad || dominates(*LatestLoad, *Load))
3798 LatestLoad = Load;
3799 }
3800
3801 // We found a load for each register. Let's check if each load satisfies the
3802 // pattern.
3803 assert(Loads.size() == RegsToVisit.size() &&
3804 "Expected to find a load for each register?");
3805 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3806 LatestLoad && "Expected at least two loads?");
3807
3808 // Check if there are any stores, calls, etc. between any of the loads. If
3809 // there are, then we can't safely perform the combine.
3810 //
3811 // MaxIter is chosen based off the (worst case) number of iterations it
3812 // typically takes to succeed in the LLVM test suite plus some padding.
3813 //
3814 // FIXME: Is there a better way to check for load fold barriers?
3815 const unsigned MaxIter = 20;
3816 unsigned Iter = 0;
3817 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3818 LatestLoad->getIterator())) {
3819 if (Loads.count(&MI))
3820 continue;
3821 if (MI.isLoadFoldBarrier())
3822 return std::nullopt;
3823 if (Iter++ == MaxIter)
3824 return std::nullopt;
3825 }
3826
3827 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3828}
3829
3831 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3832 assert(MI.getOpcode() == TargetOpcode::G_OR);
3833 MachineFunction &MF = *MI.getMF();
3834 // Assuming a little-endian target, transform:
3835 // s8 *a = ...
3836 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3837 // =>
3838 // s32 val = *((i32)a)
3839 //
3840 // s8 *a = ...
3841 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3842 // =>
3843 // s32 val = BSWAP(*((s32)a))
3844 Register Dst = MI.getOperand(0).getReg();
3845 LLT Ty = MRI.getType(Dst);
3846 if (Ty.isVector())
3847 return false;
3848
3849 // We need to combine at least two loads into this type. Since the smallest
3850 // possible load is into a byte, we need at least a 16-bit wide type.
3851 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3852 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3853 return false;
3854
3855 // Match a collection of non-OR instructions in the pattern.
3856 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3857 if (!RegsToVisit)
3858 return false;
3859
3860 // We have a collection of non-OR instructions. Figure out how wide each of
3861 // the small loads should be based off of the number of potential loads we
3862 // found.
3863 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3864 if (NarrowMemSizeInBits % 8 != 0)
3865 return false;
3866
3867 // Check if each register feeding into each OR is a load from the same
3868 // base pointer + some arithmetic.
3869 //
3870 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3871 //
3872 // Also verify that each of these ends up putting a[i] into the same memory
3873 // offset as a load into a wide type would.
3875 GZExtLoad *LowestIdxLoad, *LatestLoad;
3876 int64_t LowestIdx;
3877 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3878 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3879 if (!MaybeLoadInfo)
3880 return false;
3881 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3882
3883 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3884 // we found before, check if this corresponds to a big or little endian byte
3885 // pattern. If it does, then we can represent it using a load + possibly a
3886 // BSWAP.
3887 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3888 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3889 if (!IsBigEndian)
3890 return false;
3891 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3892 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3893 return false;
3894
3895 // Make sure that the load from the lowest index produces offset 0 in the
3896 // final value.
3897 //
3898 // This ensures that we won't combine something like this:
3899 //
3900 // load x[i] -> byte 2
3901 // load x[i+1] -> byte 0 ---> wide_load x[i]
3902 // load x[i+2] -> byte 1
3903 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3904 const unsigned ZeroByteOffset =
3905 *IsBigEndian
3906 ? bigEndianByteAt(NumLoadsInTy, 0)
3907 : littleEndianByteAt(NumLoadsInTy, 0);
3908 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3909 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3910 ZeroOffsetIdx->second != LowestIdx)
3911 return false;
3912
3913 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3914 // may not use index 0.
3915 Register Ptr = LowestIdxLoad->getPointerReg();
3916 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3917 LegalityQuery::MemDesc MMDesc(MMO);
3918 MMDesc.MemoryTy = Ty;
3920 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3921 return false;
3922 auto PtrInfo = MMO.getPointerInfo();
3923 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3924
3925 // Load must be allowed and fast on the target.
3927 auto &DL = MF.getDataLayout();
3928 unsigned Fast = 0;
3929 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3930 !Fast)
3931 return false;
3932
3933 MatchInfo = [=](MachineIRBuilder &MIB) {
3934 MIB.setInstrAndDebugLoc(*LatestLoad);
3935 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3936 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3937 if (NeedsBSwap)
3938 MIB.buildBSwap(Dst, LoadDst);
3939 };
3940 return true;
3941}
3942
3944 MachineInstr *&ExtMI) {
3945 auto &PHI = cast<GPhi>(MI);
3946 Register DstReg = PHI.getReg(0);
3947
3948 // TODO: Extending a vector may be expensive, don't do this until heuristics
3949 // are better.
3950 if (MRI.getType(DstReg).isVector())
3951 return false;
3952
3953 // Try to match a phi, whose only use is an extend.
3954 if (!MRI.hasOneNonDBGUse(DstReg))
3955 return false;
3956 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3957 switch (ExtMI->getOpcode()) {
3958 case TargetOpcode::G_ANYEXT:
3959 return true; // G_ANYEXT is usually free.
3960 case TargetOpcode::G_ZEXT:
3961 case TargetOpcode::G_SEXT:
3962 break;
3963 default:
3964 return false;
3965 }
3966
3967 // If the target is likely to fold this extend away, don't propagate.
3969 return false;
3970
3971 // We don't want to propagate the extends unless there's a good chance that
3972 // they'll be optimized in some way.
3973 // Collect the unique incoming values.
3975 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3976 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3977 switch (DefMI->getOpcode()) {
3978 case TargetOpcode::G_LOAD:
3979 case TargetOpcode::G_TRUNC:
3980 case TargetOpcode::G_SEXT:
3981 case TargetOpcode::G_ZEXT:
3982 case TargetOpcode::G_ANYEXT:
3983 case TargetOpcode::G_CONSTANT:
3984 InSrcs.insert(DefMI);
3985 // Don't try to propagate if there are too many places to create new
3986 // extends, chances are it'll increase code size.
3987 if (InSrcs.size() > 2)
3988 return false;
3989 break;
3990 default:
3991 return false;
3992 }
3993 }
3994 return true;
3995}
3996
3998 MachineInstr *&ExtMI) {
3999 auto &PHI = cast<GPhi>(MI);
4000 Register DstReg = ExtMI->getOperand(0).getReg();
4001 LLT ExtTy = MRI.getType(DstReg);
4002
4003 // Propagate the extension into the block of each incoming reg's block.
4004 // Use a SetVector here because PHIs can have duplicate edges, and we want
4005 // deterministic iteration order.
4008 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4009 auto SrcReg = PHI.getIncomingValue(I);
4010 auto *SrcMI = MRI.getVRegDef(SrcReg);
4011 if (!SrcMIs.insert(SrcMI))
4012 continue;
4013
4014 // Build an extend after each src inst.
4015 auto *MBB = SrcMI->getParent();
4016 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4017 if (InsertPt != MBB->end() && InsertPt->isPHI())
4018 InsertPt = MBB->getFirstNonPHI();
4019
4020 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4021 Builder.setDebugLoc(MI.getDebugLoc());
4022 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4023 OldToNewSrcMap[SrcMI] = NewExt;
4024 }
4025
4026 // Create a new phi with the extended inputs.
4028 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4029 NewPhi.addDef(DstReg);
4030 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4031 if (!MO.isReg()) {
4032 NewPhi.addMBB(MO.getMBB());
4033 continue;
4034 }
4035 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4036 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4037 }
4038 Builder.insertInstr(NewPhi);
4039 ExtMI->eraseFromParent();
4040}
4041
4043 Register &Reg) {
4044 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4045 // If we have a constant index, look for a G_BUILD_VECTOR source
4046 // and find the source register that the index maps to.
4047 Register SrcVec = MI.getOperand(1).getReg();
4048 LLT SrcTy = MRI.getType(SrcVec);
4049
4050 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4051 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4052 return false;
4053
4054 unsigned VecIdx = Cst->Value.getZExtValue();
4055
4056 // Check if we have a build_vector or build_vector_trunc with an optional
4057 // trunc in front.
4058 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4059 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4060 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4061 }
4062
4063 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4064 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4065 return false;
4066
4067 EVT Ty(getMVTForLLT(SrcTy));
4068 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4069 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4070 return false;
4071
4072 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4073 return true;
4074}
4075
4077 Register &Reg) {
4078 // Check the type of the register, since it may have come from a
4079 // G_BUILD_VECTOR_TRUNC.
4080 LLT ScalarTy = MRI.getType(Reg);
4081 Register DstReg = MI.getOperand(0).getReg();
4082 LLT DstTy = MRI.getType(DstReg);
4083
4084 if (ScalarTy != DstTy) {
4085 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4086 Builder.buildTrunc(DstReg, Reg);
4087 MI.eraseFromParent();
4088 return;
4089 }
4091}
4092
4095 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4096 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4097 // This combine tries to find build_vector's which have every source element
4098 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4099 // the masked load scalarization is run late in the pipeline. There's already
4100 // a combine for a similar pattern starting from the extract, but that
4101 // doesn't attempt to do it if there are multiple uses of the build_vector,
4102 // which in this case is true. Starting the combine from the build_vector
4103 // feels more natural than trying to find sibling nodes of extracts.
4104 // E.g.
4105 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4106 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4107 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4108 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4109 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4110 // ==>
4111 // replace ext{1,2,3,4} with %s{1,2,3,4}
4112
4113 Register DstReg = MI.getOperand(0).getReg();
4114 LLT DstTy = MRI.getType(DstReg);
4115 unsigned NumElts = DstTy.getNumElements();
4116
4117 SmallBitVector ExtractedElts(NumElts);
4118 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4119 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4120 return false;
4121 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4122 if (!Cst)
4123 return false;
4124 unsigned Idx = Cst->getZExtValue();
4125 if (Idx >= NumElts)
4126 return false; // Out of range.
4127 ExtractedElts.set(Idx);
4128 SrcDstPairs.emplace_back(
4129 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4130 }
4131 // Match if every element was extracted.
4132 return ExtractedElts.all();
4133}
4134
4137 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4138 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4139 for (auto &Pair : SrcDstPairs) {
4140 auto *ExtMI = Pair.second;
4141 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4142 ExtMI->eraseFromParent();
4143 }
4144 MI.eraseFromParent();
4145}
4146
4148 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4149 applyBuildFnNoErase(MI, MatchInfo);
4150 MI.eraseFromParent();
4151}
4152
4154 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4155 MatchInfo(Builder);
4156}
4157
4159 BuildFnTy &MatchInfo) {
4160 assert(MI.getOpcode() == TargetOpcode::G_OR);
4161
4162 Register Dst = MI.getOperand(0).getReg();
4163 LLT Ty = MRI.getType(Dst);
4164 unsigned BitWidth = Ty.getScalarSizeInBits();
4165
4166 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4167 unsigned FshOpc = 0;
4168
4169 // Match (or (shl ...), (lshr ...)).
4170 if (!mi_match(Dst, MRI,
4171 // m_GOr() handles the commuted version as well.
4172 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4173 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4174 return false;
4175
4176 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4177 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4178 int64_t CstShlAmt, CstLShrAmt;
4179 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4180 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4181 CstShlAmt + CstLShrAmt == BitWidth) {
4182 FshOpc = TargetOpcode::G_FSHR;
4183 Amt = LShrAmt;
4184
4185 } else if (mi_match(LShrAmt, MRI,
4187 ShlAmt == Amt) {
4188 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4189 FshOpc = TargetOpcode::G_FSHL;
4190
4191 } else if (mi_match(ShlAmt, MRI,
4193 LShrAmt == Amt) {
4194 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4195 FshOpc = TargetOpcode::G_FSHR;
4196
4197 } else {
4198 return false;
4199 }
4200
4201 LLT AmtTy = MRI.getType(Amt);
4202 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4203 return false;
4204
4205 MatchInfo = [=](MachineIRBuilder &B) {
4206 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4207 };
4208 return true;
4209}
4210
4211/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4213 unsigned Opc = MI.getOpcode();
4214 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4215 Register X = MI.getOperand(1).getReg();
4216 Register Y = MI.getOperand(2).getReg();
4217 if (X != Y)
4218 return false;
4219 unsigned RotateOpc =
4220 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4221 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4222}
4223
4225 unsigned Opc = MI.getOpcode();
4226 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4227 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4229 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4230 : TargetOpcode::G_ROTR));
4231 MI.removeOperand(2);
4233}
4234
4235// Fold (rot x, c) -> (rot x, c % BitSize)
4237 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4238 MI.getOpcode() == TargetOpcode::G_ROTR);
4239 unsigned Bitsize =
4240 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4241 Register AmtReg = MI.getOperand(2).getReg();
4242 bool OutOfRange = false;
4243 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4244 if (auto *CI = dyn_cast<ConstantInt>(C))
4245 OutOfRange |= CI->getValue().uge(Bitsize);
4246 return true;
4247 };
4248 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4249}
4250
4252 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4253 MI.getOpcode() == TargetOpcode::G_ROTR);
4254 unsigned Bitsize =
4255 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4256 Register Amt = MI.getOperand(2).getReg();
4257 LLT AmtTy = MRI.getType(Amt);
4258 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4259 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4261 MI.getOperand(2).setReg(Amt);
4263}
4264
4266 int64_t &MatchInfo) {
4267 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4268 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4269
4270 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4271 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4272 // KnownBits on the LHS in two cases:
4273 //
4274 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4275 // we cannot do any transforms so we can safely bail out early.
4276 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4277 // >=0.
4278 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4279 if (KnownRHS.isUnknown())
4280 return false;
4281
4282 std::optional<bool> KnownVal;
4283 if (KnownRHS.isZero()) {
4284 // ? uge 0 -> always true
4285 // ? ult 0 -> always false
4286 if (Pred == CmpInst::ICMP_UGE)
4287 KnownVal = true;
4288 else if (Pred == CmpInst::ICMP_ULT)
4289 KnownVal = false;
4290 }
4291
4292 if (!KnownVal) {
4293 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4294 switch (Pred) {
4295 default:
4296 llvm_unreachable("Unexpected G_ICMP predicate?");
4297 case CmpInst::ICMP_EQ:
4298 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4299 break;
4300 case CmpInst::ICMP_NE:
4301 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4302 break;
4303 case CmpInst::ICMP_SGE:
4304 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4305 break;
4306 case CmpInst::ICMP_SGT:
4307 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4308 break;
4309 case CmpInst::ICMP_SLE:
4310 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4311 break;
4312 case CmpInst::ICMP_SLT:
4313 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4314 break;
4315 case CmpInst::ICMP_UGE:
4316 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4317 break;
4318 case CmpInst::ICMP_UGT:
4319 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4320 break;
4321 case CmpInst::ICMP_ULE:
4322 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4323 break;
4324 case CmpInst::ICMP_ULT:
4325 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4326 break;
4327 }
4328 }
4329
4330 if (!KnownVal)
4331 return false;
4332 MatchInfo =
4333 *KnownVal
4335 /*IsVector = */
4336 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4337 /* IsFP = */ false)
4338 : 0;
4339 return true;
4340}
4341
4343 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4344 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4345 // Given:
4346 //
4347 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4348 // %cmp = G_ICMP ne %x, 0
4349 //
4350 // Or:
4351 //
4352 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4353 // %cmp = G_ICMP eq %x, 1
4354 //
4355 // We can replace %cmp with %x assuming true is 1 on the target.
4356 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4357 if (!CmpInst::isEquality(Pred))
4358 return false;
4359 Register Dst = MI.getOperand(0).getReg();
4360 LLT DstTy = MRI.getType(Dst);
4362 /* IsFP = */ false) != 1)
4363 return false;
4364 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4365 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4366 return false;
4367 Register LHS = MI.getOperand(2).getReg();
4368 auto KnownLHS = KB->getKnownBits(LHS);
4369 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4370 return false;
4371 // Make sure replacing Dst with the LHS is a legal operation.
4372 LLT LHSTy = MRI.getType(LHS);
4373 unsigned LHSSize = LHSTy.getSizeInBits();
4374 unsigned DstSize = DstTy.getSizeInBits();
4375 unsigned Op = TargetOpcode::COPY;
4376 if (DstSize != LHSSize)
4377 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4378 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4379 return false;
4380 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4381 return true;
4382}
4383
4384// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4386 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4387 assert(MI.getOpcode() == TargetOpcode::G_AND);
4388
4389 // Ignore vector types to simplify matching the two constants.
4390 // TODO: do this for vectors and scalars via a demanded bits analysis.
4391 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4392 if (Ty.isVector())
4393 return false;
4394
4395 Register Src;
4396 Register AndMaskReg;
4397 int64_t AndMaskBits;
4398 int64_t OrMaskBits;
4399 if (!mi_match(MI, MRI,
4400 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4401 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4402 return false;
4403
4404 // Check if OrMask could turn on any bits in Src.
4405 if (AndMaskBits & OrMaskBits)
4406 return false;
4407
4408 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4410 // Canonicalize the result to have the constant on the RHS.
4411 if (MI.getOperand(1).getReg() == AndMaskReg)
4412 MI.getOperand(2).setReg(AndMaskReg);
4413 MI.getOperand(1).setReg(Src);
4415 };
4416 return true;
4417}
4418
4419/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4421 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4422 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4423 Register Dst = MI.getOperand(0).getReg();
4424 Register Src = MI.getOperand(1).getReg();
4425 LLT Ty = MRI.getType(Src);
4427 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4428 return false;
4429 int64_t Width = MI.getOperand(2).getImm();
4430 Register ShiftSrc;
4431 int64_t ShiftImm;
4432 if (!mi_match(
4433 Src, MRI,