LLVM 20.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
227 MachineInstr &MI, BuildFnTy &MatchInfo) {
228 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
229 Register DstOp = MI.getOperand(0).getReg();
230 Register OrigOp = MI.getOperand(1).getReg();
231
232 if (!MRI.hasOneNonDBGUse(OrigOp))
233 return false;
234
235 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
236 // Even if only a single operand of the PHI is not guaranteed non-poison,
237 // moving freeze() backwards across a PHI can cause optimization issues for
238 // other users of that operand.
239 //
240 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
241 // the source register is unprofitable because it makes the freeze() more
242 // strict than is necessary (it would affect the whole register instead of
243 // just the subreg being frozen).
244 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
245 return false;
246
247 if (canCreateUndefOrPoison(OrigOp, MRI,
248 /*ConsiderFlagsAndMetadata=*/false))
249 return false;
250
251 std::optional<MachineOperand> MaybePoisonOperand;
252 for (MachineOperand &Operand : OrigDef->uses()) {
253 if (!Operand.isReg())
254 return false;
255
256 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
257 continue;
258
259 if (!MaybePoisonOperand)
260 MaybePoisonOperand = Operand;
261 else {
262 // We have more than one maybe-poison operand. Moving the freeze is
263 // unsafe.
264 return false;
265 }
266 }
267
268 // Eliminate freeze if all operands are guaranteed non-poison.
269 if (!MaybePoisonOperand) {
270 MatchInfo = [=](MachineIRBuilder &B) {
271 Observer.changingInstr(*OrigDef);
272 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
273 Observer.changedInstr(*OrigDef);
274 B.buildCopy(DstOp, OrigOp);
275 };
276 return true;
277 }
278
279 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
280 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
281
282 MatchInfo = [=](MachineIRBuilder &B) mutable {
283 Observer.changingInstr(*OrigDef);
284 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
285 Observer.changedInstr(*OrigDef);
286 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
287 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
289 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
290 Freeze.getReg(0));
291 replaceRegWith(MRI, DstOp, OrigOp);
292 };
293 return true;
294}
295
298 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
299 "Invalid instruction");
300 bool IsUndef = true;
301 MachineInstr *Undef = nullptr;
302
303 // Walk over all the operands of concat vectors and check if they are
304 // build_vector themselves or undef.
305 // Then collect their operands in Ops.
306 for (const MachineOperand &MO : MI.uses()) {
307 Register Reg = MO.getReg();
308 MachineInstr *Def = MRI.getVRegDef(Reg);
309 assert(Def && "Operand not defined");
310 if (!MRI.hasOneNonDBGUse(Reg))
311 return false;
312 switch (Def->getOpcode()) {
313 case TargetOpcode::G_BUILD_VECTOR:
314 IsUndef = false;
315 // Remember the operands of the build_vector to fold
316 // them into the yet-to-build flattened concat vectors.
317 for (const MachineOperand &BuildVecMO : Def->uses())
318 Ops.push_back(BuildVecMO.getReg());
319 break;
320 case TargetOpcode::G_IMPLICIT_DEF: {
321 LLT OpType = MRI.getType(Reg);
322 // Keep one undef value for all the undef operands.
323 if (!Undef) {
324 Builder.setInsertPt(*MI.getParent(), MI);
325 Undef = Builder.buildUndef(OpType.getScalarType());
326 }
327 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
328 OpType.getScalarType() &&
329 "All undefs should have the same type");
330 // Break the undef vector in as many scalar elements as needed
331 // for the flattening.
332 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
333 EltIdx != EltEnd; ++EltIdx)
334 Ops.push_back(Undef->getOperand(0).getReg());
335 break;
336 }
337 default:
338 return false;
339 }
340 }
341
342 // Check if the combine is illegal
343 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
345 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
346 return false;
347 }
348
349 if (IsUndef)
350 Ops.clear();
351
352 return true;
353}
356 // We determined that the concat_vectors can be flatten.
357 // Generate the flattened build_vector.
358 Register DstReg = MI.getOperand(0).getReg();
359 Builder.setInsertPt(*MI.getParent(), MI);
360 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
361
362 // Note: IsUndef is sort of redundant. We could have determine it by
363 // checking that at all Ops are undef. Alternatively, we could have
364 // generate a build_vector of undefs and rely on another combine to
365 // clean that up. For now, given we already gather this information
366 // in matchCombineConcatVectors, just save compile time and issue the
367 // right thing.
368 if (Ops.empty())
369 Builder.buildUndef(NewDstReg);
370 else
371 Builder.buildBuildVector(NewDstReg, Ops);
372 MI.eraseFromParent();
373 replaceRegWith(MRI, DstReg, NewDstReg);
374}
375
378 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
379 auto ConcatMI1 =
380 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
381 auto ConcatMI2 =
382 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
383 if (!ConcatMI1 || !ConcatMI2)
384 return false;
385
386 // Check that the sources of the Concat instructions have the same type
387 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
388 MRI.getType(ConcatMI2->getSourceReg(0)))
389 return false;
390
391 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
392 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
393 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
394 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
395 // Check if the index takes a whole source register from G_CONCAT_VECTORS
396 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
397 if (Mask[i] == -1) {
398 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
399 if (i + j >= Mask.size())
400 return false;
401 if (Mask[i + j] != -1)
402 return false;
403 }
405 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
406 return false;
407 Ops.push_back(0);
408 } else if (Mask[i] % ConcatSrcNumElt == 0) {
409 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
410 if (i + j >= Mask.size())
411 return false;
412 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
413 return false;
414 }
415 // Retrieve the source register from its respective G_CONCAT_VECTORS
416 // instruction
417 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
418 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
419 } else {
420 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
421 ConcatMI1->getNumSources()));
422 }
423 } else {
424 return false;
425 }
426 }
427
429 {TargetOpcode::G_CONCAT_VECTORS,
430 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
431 return false;
432
433 return !Ops.empty();
434}
435
438 LLT SrcTy = MRI.getType(Ops[0]);
439 Register UndefReg = 0;
440
441 for (Register &Reg : Ops) {
442 if (Reg == 0) {
443 if (UndefReg == 0)
444 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
445 Reg = UndefReg;
446 }
447 }
448
449 if (Ops.size() > 1)
450 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
451 else
452 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
453 MI.eraseFromParent();
454}
455
458 if (matchCombineShuffleVector(MI, Ops)) {
460 return true;
461 }
462 return false;
463}
464
467 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
468 "Invalid instruction kind");
469 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
470 Register Src1 = MI.getOperand(1).getReg();
471 LLT SrcType = MRI.getType(Src1);
472 // As bizarre as it may look, shuffle vector can actually produce
473 // scalar! This is because at the IR level a <1 x ty> shuffle
474 // vector is perfectly valid.
475 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
476 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
477
478 // If the resulting vector is smaller than the size of the source
479 // vectors being concatenated, we won't be able to replace the
480 // shuffle vector into a concat_vectors.
481 //
482 // Note: We may still be able to produce a concat_vectors fed by
483 // extract_vector_elt and so on. It is less clear that would
484 // be better though, so don't bother for now.
485 //
486 // If the destination is a scalar, the size of the sources doesn't
487 // matter. we will lower the shuffle to a plain copy. This will
488 // work only if the source and destination have the same size. But
489 // that's covered by the next condition.
490 //
491 // TODO: If the size between the source and destination don't match
492 // we could still emit an extract vector element in that case.
493 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
494 return false;
495
496 // Check that the shuffle mask can be broken evenly between the
497 // different sources.
498 if (DstNumElts % SrcNumElts != 0)
499 return false;
500
501 // Mask length is a multiple of the source vector length.
502 // Check if the shuffle is some kind of concatenation of the input
503 // vectors.
504 unsigned NumConcat = DstNumElts / SrcNumElts;
505 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
506 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
507 for (unsigned i = 0; i != DstNumElts; ++i) {
508 int Idx = Mask[i];
509 // Undef value.
510 if (Idx < 0)
511 continue;
512 // Ensure the indices in each SrcType sized piece are sequential and that
513 // the same source is used for the whole piece.
514 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
515 (ConcatSrcs[i / SrcNumElts] >= 0 &&
516 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
517 return false;
518 // Remember which source this index came from.
519 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
520 }
521
522 // The shuffle is concatenating multiple vectors together.
523 // Collect the different operands for that.
524 Register UndefReg;
525 Register Src2 = MI.getOperand(2).getReg();
526 for (auto Src : ConcatSrcs) {
527 if (Src < 0) {
528 if (!UndefReg) {
529 Builder.setInsertPt(*MI.getParent(), MI);
530 UndefReg = Builder.buildUndef(SrcType).getReg(0);
531 }
532 Ops.push_back(UndefReg);
533 } else if (Src == 0)
534 Ops.push_back(Src1);
535 else
536 Ops.push_back(Src2);
537 }
538 return true;
539}
540
542 const ArrayRef<Register> Ops) {
543 Register DstReg = MI.getOperand(0).getReg();
544 Builder.setInsertPt(*MI.getParent(), MI);
545 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
546
547 if (Ops.size() == 1)
548 Builder.buildCopy(NewDstReg, Ops[0]);
549 else
550 Builder.buildMergeLikeInstr(NewDstReg, Ops);
551
552 MI.eraseFromParent();
553 replaceRegWith(MRI, DstReg, NewDstReg);
554}
555
557 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
558 "Invalid instruction kind");
559
560 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
561 return Mask.size() == 1;
562}
563
565 Register DstReg = MI.getOperand(0).getReg();
566 Builder.setInsertPt(*MI.getParent(), MI);
567
568 int I = MI.getOperand(3).getShuffleMask()[0];
569 Register Src1 = MI.getOperand(1).getReg();
570 LLT Src1Ty = MRI.getType(Src1);
571 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
572 Register SrcReg;
573 if (I >= Src1NumElts) {
574 SrcReg = MI.getOperand(2).getReg();
575 I -= Src1NumElts;
576 } else if (I >= 0)
577 SrcReg = Src1;
578
579 if (I < 0)
580 Builder.buildUndef(DstReg);
581 else if (!MRI.getType(SrcReg).isVector())
582 Builder.buildCopy(DstReg, SrcReg);
583 else
585
586 MI.eraseFromParent();
587}
588
589namespace {
590
591/// Select a preference between two uses. CurrentUse is the current preference
592/// while *ForCandidate is attributes of the candidate under consideration.
593PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
594 PreferredTuple &CurrentUse,
595 const LLT TyForCandidate,
596 unsigned OpcodeForCandidate,
597 MachineInstr *MIForCandidate) {
598 if (!CurrentUse.Ty.isValid()) {
599 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
600 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
601 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
602 return CurrentUse;
603 }
604
605 // We permit the extend to hoist through basic blocks but this is only
606 // sensible if the target has extending loads. If you end up lowering back
607 // into a load and extend during the legalizer then the end result is
608 // hoisting the extend up to the load.
609
610 // Prefer defined extensions to undefined extensions as these are more
611 // likely to reduce the number of instructions.
612 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
613 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
614 return CurrentUse;
615 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
616 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
617 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
618
619 // Prefer sign extensions to zero extensions as sign-extensions tend to be
620 // more expensive. Don't do this if the load is already a zero-extend load
621 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
622 // later.
623 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
624 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
625 OpcodeForCandidate == TargetOpcode::G_ZEXT)
626 return CurrentUse;
627 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
628 OpcodeForCandidate == TargetOpcode::G_SEXT)
629 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
630 }
631
632 // This is potentially target specific. We've chosen the largest type
633 // because G_TRUNC is usually free. One potential catch with this is that
634 // some targets have a reduced number of larger registers than smaller
635 // registers and this choice potentially increases the live-range for the
636 // larger value.
637 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
638 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
639 }
640 return CurrentUse;
641}
642
643/// Find a suitable place to insert some instructions and insert them. This
644/// function accounts for special cases like inserting before a PHI node.
645/// The current strategy for inserting before PHI's is to duplicate the
646/// instructions for each predecessor. However, while that's ok for G_TRUNC
647/// on most targets since it generally requires no code, other targets/cases may
648/// want to try harder to find a dominating block.
649static void InsertInsnsWithoutSideEffectsBeforeUse(
652 MachineOperand &UseMO)>
653 Inserter) {
654 MachineInstr &UseMI = *UseMO.getParent();
655
656 MachineBasicBlock *InsertBB = UseMI.getParent();
657
658 // If the use is a PHI then we want the predecessor block instead.
659 if (UseMI.isPHI()) {
660 MachineOperand *PredBB = std::next(&UseMO);
661 InsertBB = PredBB->getMBB();
662 }
663
664 // If the block is the same block as the def then we want to insert just after
665 // the def instead of at the start of the block.
666 if (InsertBB == DefMI.getParent()) {
668 Inserter(InsertBB, std::next(InsertPt), UseMO);
669 return;
670 }
671
672 // Otherwise we want the start of the BB
673 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
674}
675} // end anonymous namespace
676
678 PreferredTuple Preferred;
679 if (matchCombineExtendingLoads(MI, Preferred)) {
680 applyCombineExtendingLoads(MI, Preferred);
681 return true;
682 }
683 return false;
684}
685
686static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
687 unsigned CandidateLoadOpc;
688 switch (ExtOpc) {
689 case TargetOpcode::G_ANYEXT:
690 CandidateLoadOpc = TargetOpcode::G_LOAD;
691 break;
692 case TargetOpcode::G_SEXT:
693 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
694 break;
695 case TargetOpcode::G_ZEXT:
696 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
697 break;
698 default:
699 llvm_unreachable("Unexpected extend opc");
700 }
701 return CandidateLoadOpc;
702}
703
705 PreferredTuple &Preferred) {
706 // We match the loads and follow the uses to the extend instead of matching
707 // the extends and following the def to the load. This is because the load
708 // must remain in the same position for correctness (unless we also add code
709 // to find a safe place to sink it) whereas the extend is freely movable.
710 // It also prevents us from duplicating the load for the volatile case or just
711 // for performance.
712 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
713 if (!LoadMI)
714 return false;
715
716 Register LoadReg = LoadMI->getDstReg();
717
718 LLT LoadValueTy = MRI.getType(LoadReg);
719 if (!LoadValueTy.isScalar())
720 return false;
721
722 // Most architectures are going to legalize <s8 loads into at least a 1 byte
723 // load, and the MMOs can only describe memory accesses in multiples of bytes.
724 // If we try to perform extload combining on those, we can end up with
725 // %a(s8) = extload %ptr (load 1 byte from %ptr)
726 // ... which is an illegal extload instruction.
727 if (LoadValueTy.getSizeInBits() < 8)
728 return false;
729
730 // For non power-of-2 types, they will very likely be legalized into multiple
731 // loads. Don't bother trying to match them into extending loads.
732 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
733 return false;
734
735 // Find the preferred type aside from the any-extends (unless it's the only
736 // one) and non-extending ops. We'll emit an extending load to that type and
737 // and emit a variant of (extend (trunc X)) for the others according to the
738 // relative type sizes. At the same time, pick an extend to use based on the
739 // extend involved in the chosen type.
740 unsigned PreferredOpcode =
741 isa<GLoad>(&MI)
742 ? TargetOpcode::G_ANYEXT
743 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
744 Preferred = {LLT(), PreferredOpcode, nullptr};
745 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
746 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
747 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
748 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
749 const auto &MMO = LoadMI->getMMO();
750 // Don't do anything for atomics.
751 if (MMO.isAtomic())
752 continue;
753 // Check for legality.
754 if (!isPreLegalize()) {
755 LegalityQuery::MemDesc MMDesc(MMO);
756 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
757 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
758 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
759 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
760 .Action != LegalizeActions::Legal)
761 continue;
762 }
763 Preferred = ChoosePreferredUse(MI, Preferred,
764 MRI.getType(UseMI.getOperand(0).getReg()),
765 UseMI.getOpcode(), &UseMI);
766 }
767 }
768
769 // There were no extends
770 if (!Preferred.MI)
771 return false;
772 // It should be impossible to chose an extend without selecting a different
773 // type since by definition the result of an extend is larger.
774 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
775
776 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
777 return true;
778}
779
781 PreferredTuple &Preferred) {
782 // Rewrite the load to the chosen extending load.
783 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
784
785 // Inserter to insert a truncate back to the original type at a given point
786 // with some basic CSE to limit truncate duplication to one per BB.
788 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
789 MachineBasicBlock::iterator InsertBefore,
790 MachineOperand &UseMO) {
791 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
792 if (PreviouslyEmitted) {
794 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
796 return;
797 }
798
799 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
800 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
801 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
802 EmittedInsns[InsertIntoBB] = NewMI;
803 replaceRegOpWith(MRI, UseMO, NewDstReg);
804 };
805
807 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
808 MI.setDesc(Builder.getTII().get(LoadOpc));
809
810 // Rewrite all the uses to fix up the types.
811 auto &LoadValue = MI.getOperand(0);
813 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
814 Uses.push_back(&UseMO);
815
816 for (auto *UseMO : Uses) {
817 MachineInstr *UseMI = UseMO->getParent();
818
819 // If the extend is compatible with the preferred extend then we should fix
820 // up the type and extend so that it uses the preferred use.
821 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
822 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
823 Register UseDstReg = UseMI->getOperand(0).getReg();
824 MachineOperand &UseSrcMO = UseMI->getOperand(1);
825 const LLT UseDstTy = MRI.getType(UseDstReg);
826 if (UseDstReg != ChosenDstReg) {
827 if (Preferred.Ty == UseDstTy) {
828 // If the use has the same type as the preferred use, then merge
829 // the vregs and erase the extend. For example:
830 // %1:_(s8) = G_LOAD ...
831 // %2:_(s32) = G_SEXT %1(s8)
832 // %3:_(s32) = G_ANYEXT %1(s8)
833 // ... = ... %3(s32)
834 // rewrites to:
835 // %2:_(s32) = G_SEXTLOAD ...
836 // ... = ... %2(s32)
837 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
839 UseMO->getParent()->eraseFromParent();
840 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
841 // If the preferred size is smaller, then keep the extend but extend
842 // from the result of the extending load. For example:
843 // %1:_(s8) = G_LOAD ...
844 // %2:_(s32) = G_SEXT %1(s8)
845 // %3:_(s64) = G_ANYEXT %1(s8)
846 // ... = ... %3(s64)
847 /// rewrites to:
848 // %2:_(s32) = G_SEXTLOAD ...
849 // %3:_(s64) = G_ANYEXT %2:_(s32)
850 // ... = ... %3(s64)
851 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
852 } else {
853 // If the preferred size is large, then insert a truncate. For
854 // example:
855 // %1:_(s8) = G_LOAD ...
856 // %2:_(s64) = G_SEXT %1(s8)
857 // %3:_(s32) = G_ZEXT %1(s8)
858 // ... = ... %3(s32)
859 /// rewrites to:
860 // %2:_(s64) = G_SEXTLOAD ...
861 // %4:_(s8) = G_TRUNC %2:_(s32)
862 // %3:_(s64) = G_ZEXT %2:_(s8)
863 // ... = ... %3(s64)
864 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
865 InsertTruncAt);
866 }
867 continue;
868 }
869 // The use is (one of) the uses of the preferred use we chose earlier.
870 // We're going to update the load to def this value later so just erase
871 // the old extend.
873 UseMO->getParent()->eraseFromParent();
874 continue;
875 }
876
877 // The use isn't an extend. Truncate back to the type we originally loaded.
878 // This is free on many targets.
879 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
880 }
881
882 MI.getOperand(0).setReg(ChosenDstReg);
884}
885
887 BuildFnTy &MatchInfo) {
888 assert(MI.getOpcode() == TargetOpcode::G_AND);
889
890 // If we have the following code:
891 // %mask = G_CONSTANT 255
892 // %ld = G_LOAD %ptr, (load s16)
893 // %and = G_AND %ld, %mask
894 //
895 // Try to fold it into
896 // %ld = G_ZEXTLOAD %ptr, (load s8)
897
898 Register Dst = MI.getOperand(0).getReg();
899 if (MRI.getType(Dst).isVector())
900 return false;
901
902 auto MaybeMask =
903 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
904 if (!MaybeMask)
905 return false;
906
907 APInt MaskVal = MaybeMask->Value;
908
909 if (!MaskVal.isMask())
910 return false;
911
912 Register SrcReg = MI.getOperand(1).getReg();
913 // Don't use getOpcodeDef() here since intermediate instructions may have
914 // multiple users.
915 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
916 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
917 return false;
918
919 Register LoadReg = LoadMI->getDstReg();
920 LLT RegTy = MRI.getType(LoadReg);
921 Register PtrReg = LoadMI->getPointerReg();
922 unsigned RegSize = RegTy.getSizeInBits();
923 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
924 unsigned MaskSizeBits = MaskVal.countr_one();
925
926 // The mask may not be larger than the in-memory type, as it might cover sign
927 // extended bits
928 if (MaskSizeBits > LoadSizeBits.getValue())
929 return false;
930
931 // If the mask covers the whole destination register, there's nothing to
932 // extend
933 if (MaskSizeBits >= RegSize)
934 return false;
935
936 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
937 // at least byte loads. Avoid creating such loads here
938 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
939 return false;
940
941 const MachineMemOperand &MMO = LoadMI->getMMO();
942 LegalityQuery::MemDesc MemDesc(MMO);
943
944 // Don't modify the memory access size if this is atomic/volatile, but we can
945 // still adjust the opcode to indicate the high bit behavior.
946 if (LoadMI->isSimple())
947 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
948 else if (LoadSizeBits.getValue() > MaskSizeBits ||
949 LoadSizeBits.getValue() == RegSize)
950 return false;
951
952 // TODO: Could check if it's legal with the reduced or original memory size.
954 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
955 return false;
956
957 MatchInfo = [=](MachineIRBuilder &B) {
958 B.setInstrAndDebugLoc(*LoadMI);
959 auto &MF = B.getMF();
960 auto PtrInfo = MMO.getPointerInfo();
961 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
962 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
963 LoadMI->eraseFromParent();
964 };
965 return true;
966}
967
969 const MachineInstr &UseMI) {
970 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
971 "shouldn't consider debug uses");
972 assert(DefMI.getParent() == UseMI.getParent());
973 if (&DefMI == &UseMI)
974 return true;
975 const MachineBasicBlock &MBB = *DefMI.getParent();
976 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
977 return &MI == &DefMI || &MI == &UseMI;
978 });
979 if (DefOrUse == MBB.end())
980 llvm_unreachable("Block must contain both DefMI and UseMI!");
981 return &*DefOrUse == &DefMI;
982}
983
985 const MachineInstr &UseMI) {
986 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
987 "shouldn't consider debug uses");
988 if (MDT)
989 return MDT->dominates(&DefMI, &UseMI);
990 else if (DefMI.getParent() != UseMI.getParent())
991 return false;
992
993 return isPredecessor(DefMI, UseMI);
994}
995
997 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
998 Register SrcReg = MI.getOperand(1).getReg();
999 Register LoadUser = SrcReg;
1000
1001 if (MRI.getType(SrcReg).isVector())
1002 return false;
1003
1004 Register TruncSrc;
1005 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1006 LoadUser = TruncSrc;
1007
1008 uint64_t SizeInBits = MI.getOperand(2).getImm();
1009 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1010 // need any extend at all, just a truncate.
1011 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1012 // If truncating more than the original extended value, abort.
1013 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1014 if (TruncSrc &&
1015 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1016 return false;
1017 if (LoadSizeBits == SizeInBits)
1018 return true;
1019 }
1020 return false;
1021}
1022
1024 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1025 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1026 MI.eraseFromParent();
1027}
1028
1030 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1031 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1032
1033 Register DstReg = MI.getOperand(0).getReg();
1034 LLT RegTy = MRI.getType(DstReg);
1035
1036 // Only supports scalars for now.
1037 if (RegTy.isVector())
1038 return false;
1039
1040 Register SrcReg = MI.getOperand(1).getReg();
1041 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1042 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
1043 return false;
1044
1045 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1046
1047 // If the sign extend extends from a narrower width than the load's width,
1048 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1049 // Avoid widening the load at all.
1050 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1051
1052 // Don't generate G_SEXTLOADs with a < 1 byte width.
1053 if (NewSizeBits < 8)
1054 return false;
1055 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1056 // anyway for most targets.
1057 if (!isPowerOf2_32(NewSizeBits))
1058 return false;
1059
1060 const MachineMemOperand &MMO = LoadDef->getMMO();
1061 LegalityQuery::MemDesc MMDesc(MMO);
1062
1063 // Don't modify the memory access size if this is atomic/volatile, but we can
1064 // still adjust the opcode to indicate the high bit behavior.
1065 if (LoadDef->isSimple())
1066 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1067 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1068 return false;
1069
1070 // TODO: Could check if it's legal with the reduced or original memory size.
1071 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1072 {MRI.getType(LoadDef->getDstReg()),
1073 MRI.getType(LoadDef->getPointerReg())},
1074 {MMDesc}}))
1075 return false;
1076
1077 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1078 return true;
1079}
1080
1082 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1083 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1084 Register LoadReg;
1085 unsigned ScalarSizeBits;
1086 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1087 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1088
1089 // If we have the following:
1090 // %ld = G_LOAD %ptr, (load 2)
1091 // %ext = G_SEXT_INREG %ld, 8
1092 // ==>
1093 // %ld = G_SEXTLOAD %ptr (load 1)
1094
1095 auto &MMO = LoadDef->getMMO();
1096 Builder.setInstrAndDebugLoc(*LoadDef);
1097 auto &MF = Builder.getMF();
1098 auto PtrInfo = MMO.getPointerInfo();
1099 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1100 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1101 LoadDef->getPointerReg(), *NewMMO);
1102 MI.eraseFromParent();
1103}
1104
1105/// Return true if 'MI' is a load or a store that may be fold it's address
1106/// operand into the load / store addressing mode.
1110 auto *MF = MI->getMF();
1111 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1112 if (!Addr)
1113 return false;
1114
1115 AM.HasBaseReg = true;
1116 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1117 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1118 else
1119 AM.Scale = 1; // [reg +/- reg]
1120
1121 return TLI.isLegalAddressingMode(
1122 MF->getDataLayout(), AM,
1123 getTypeForLLT(MI->getMMO().getMemoryType(),
1124 MF->getFunction().getContext()),
1125 MI->getMMO().getAddrSpace());
1126}
1127
1128static unsigned getIndexedOpc(unsigned LdStOpc) {
1129 switch (LdStOpc) {
1130 case TargetOpcode::G_LOAD:
1131 return TargetOpcode::G_INDEXED_LOAD;
1132 case TargetOpcode::G_STORE:
1133 return TargetOpcode::G_INDEXED_STORE;
1134 case TargetOpcode::G_ZEXTLOAD:
1135 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1136 case TargetOpcode::G_SEXTLOAD:
1137 return TargetOpcode::G_INDEXED_SEXTLOAD;
1138 default:
1139 llvm_unreachable("Unexpected opcode");
1140 }
1141}
1142
1143bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1144 // Check for legality.
1145 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1146 LLT Ty = MRI.getType(LdSt.getReg(0));
1147 LLT MemTy = LdSt.getMMO().getMemoryType();
1149 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1151 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1152 SmallVector<LLT> OpTys;
1153 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1154 OpTys = {PtrTy, Ty, Ty};
1155 else
1156 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1157
1158 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1159 return isLegal(Q);
1160}
1161
1163 "post-index-use-threshold", cl::Hidden, cl::init(32),
1164 cl::desc("Number of uses of a base pointer to check before it is no longer "
1165 "considered for post-indexing."));
1166
1167bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1169 bool &RematOffset) {
1170 // We're looking for the following pattern, for either load or store:
1171 // %baseptr:_(p0) = ...
1172 // G_STORE %val(s64), %baseptr(p0)
1173 // %offset:_(s64) = G_CONSTANT i64 -256
1174 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1175 const auto &TLI = getTargetLowering();
1176
1177 Register Ptr = LdSt.getPointerReg();
1178 // If the store is the only use, don't bother.
1179 if (MRI.hasOneNonDBGUse(Ptr))
1180 return false;
1181
1182 if (!isIndexedLoadStoreLegal(LdSt))
1183 return false;
1184
1185 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1186 return false;
1187
1188 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1189 auto *PtrDef = MRI.getVRegDef(Ptr);
1190
1191 unsigned NumUsesChecked = 0;
1192 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1193 if (++NumUsesChecked > PostIndexUseThreshold)
1194 return false; // Try to avoid exploding compile time.
1195
1196 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1197 // The use itself might be dead. This can happen during combines if DCE
1198 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1199 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1200 continue;
1201
1202 // Check the user of this isn't the store, otherwise we'd be generate a
1203 // indexed store defining its own use.
1204 if (StoredValDef == &Use)
1205 continue;
1206
1207 Offset = PtrAdd->getOffsetReg();
1208 if (!ForceLegalIndexing &&
1209 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1210 /*IsPre*/ false, MRI))
1211 continue;
1212
1213 // Make sure the offset calculation is before the potentially indexed op.
1214 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1215 RematOffset = false;
1216 if (!dominates(*OffsetDef, LdSt)) {
1217 // If the offset however is just a G_CONSTANT, we can always just
1218 // rematerialize it where we need it.
1219 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1220 continue;
1221 RematOffset = true;
1222 }
1223
1224 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1225 if (&BasePtrUse == PtrDef)
1226 continue;
1227
1228 // If the user is a later load/store that can be post-indexed, then don't
1229 // combine this one.
1230 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1231 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1232 dominates(LdSt, *BasePtrLdSt) &&
1233 isIndexedLoadStoreLegal(*BasePtrLdSt))
1234 return false;
1235
1236 // Now we're looking for the key G_PTR_ADD instruction, which contains
1237 // the offset add that we want to fold.
1238 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1239 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1240 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1241 // If the use is in a different block, then we may produce worse code
1242 // due to the extra register pressure.
1243 if (BaseUseUse.getParent() != LdSt.getParent())
1244 return false;
1245
1246 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1247 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1248 return false;
1249 }
1250 if (!dominates(LdSt, BasePtrUse))
1251 return false; // All use must be dominated by the load/store.
1252 }
1253 }
1254
1255 Addr = PtrAdd->getReg(0);
1256 Base = PtrAdd->getBaseReg();
1257 return true;
1258 }
1259
1260 return false;
1261}
1262
1263bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1265 auto &MF = *LdSt.getParent()->getParent();
1266 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1267
1268 Addr = LdSt.getPointerReg();
1271 return false;
1272
1273 if (!ForceLegalIndexing &&
1274 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1275 return false;
1276
1277 if (!isIndexedLoadStoreLegal(LdSt))
1278 return false;
1279
1281 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1282 return false;
1283
1284 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1285 // Would require a copy.
1286 if (Base == St->getValueReg())
1287 return false;
1288
1289 // We're expecting one use of Addr in MI, but it could also be the
1290 // value stored, which isn't actually dominated by the instruction.
1291 if (St->getValueReg() == Addr)
1292 return false;
1293 }
1294
1295 // Avoid increasing cross-block register pressure.
1296 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1297 if (AddrUse.getParent() != LdSt.getParent())
1298 return false;
1299
1300 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1301 // That might allow us to end base's liveness here by adjusting the constant.
1302 bool RealUse = false;
1303 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1304 if (!dominates(LdSt, AddrUse))
1305 return false; // All use must be dominated by the load/store.
1306
1307 // If Ptr may be folded in addressing mode of other use, then it's
1308 // not profitable to do this transformation.
1309 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1310 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1311 RealUse = true;
1312 } else {
1313 RealUse = true;
1314 }
1315 }
1316 return RealUse;
1317}
1318
1320 BuildFnTy &MatchInfo) {
1321 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1322
1323 // Check if there is a load that defines the vector being extracted from.
1324 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1325 if (!LoadMI)
1326 return false;
1327
1328 Register Vector = MI.getOperand(1).getReg();
1329 LLT VecEltTy = MRI.getType(Vector).getElementType();
1330
1331 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1332
1333 // Checking whether we should reduce the load width.
1335 return false;
1336
1337 // Check if the defining load is simple.
1338 if (!LoadMI->isSimple())
1339 return false;
1340
1341 // If the vector element type is not a multiple of a byte then we are unable
1342 // to correctly compute an address to load only the extracted element as a
1343 // scalar.
1344 if (!VecEltTy.isByteSized())
1345 return false;
1346
1347 // Check for load fold barriers between the extraction and the load.
1348 if (MI.getParent() != LoadMI->getParent())
1349 return false;
1350 const unsigned MaxIter = 20;
1351 unsigned Iter = 0;
1352 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1353 if (II->isLoadFoldBarrier())
1354 return false;
1355 if (Iter++ == MaxIter)
1356 return false;
1357 }
1358
1359 // Check if the new load that we are going to create is legal
1360 // if we are in the post-legalization phase.
1361 MachineMemOperand MMO = LoadMI->getMMO();
1362 Align Alignment = MMO.getAlign();
1363 MachinePointerInfo PtrInfo;
1365
1366 // Finding the appropriate PtrInfo if offset is a known constant.
1367 // This is required to create the memory operand for the narrowed load.
1368 // This machine memory operand object helps us infer about legality
1369 // before we proceed to combine the instruction.
1370 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1371 int Elt = CVal->getZExtValue();
1372 // FIXME: should be (ABI size)*Elt.
1373 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1374 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1375 } else {
1376 // Discard the pointer info except the address space because the memory
1377 // operand can't represent this new access since the offset is variable.
1378 Offset = VecEltTy.getSizeInBits() / 8;
1380 }
1381
1382 Alignment = commonAlignment(Alignment, Offset);
1383
1384 Register VecPtr = LoadMI->getPointerReg();
1385 LLT PtrTy = MRI.getType(VecPtr);
1386
1387 MachineFunction &MF = *MI.getMF();
1388 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1389
1390 LegalityQuery::MemDesc MMDesc(*NewMMO);
1391
1392 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1393
1395 return false;
1396
1397 // Load must be allowed and fast on the target.
1399 auto &DL = MF.getDataLayout();
1400 unsigned Fast = 0;
1401 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1402 &Fast) ||
1403 !Fast)
1404 return false;
1405
1406 Register Result = MI.getOperand(0).getReg();
1407 Register Index = MI.getOperand(2).getReg();
1408
1409 MatchInfo = [=](MachineIRBuilder &B) {
1410 GISelObserverWrapper DummyObserver;
1411 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1412 //// Get pointer to the vector element.
1413 Register finalPtr = Helper.getVectorElementPointer(
1414 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1415 Index);
1416 // New G_LOAD instruction.
1417 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1418 // Remove original GLOAD instruction.
1419 LoadMI->eraseFromParent();
1420 };
1421
1422 return true;
1423}
1424
1427 auto &LdSt = cast<GLoadStore>(MI);
1428
1429 if (LdSt.isAtomic())
1430 return false;
1431
1432 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1433 MatchInfo.Offset);
1434 if (!MatchInfo.IsPre &&
1435 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1436 MatchInfo.Offset, MatchInfo.RematOffset))
1437 return false;
1438
1439 return true;
1440}
1441
1444 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1445 unsigned Opcode = MI.getOpcode();
1446 bool IsStore = Opcode == TargetOpcode::G_STORE;
1447 unsigned NewOpcode = getIndexedOpc(Opcode);
1448
1449 // If the offset constant didn't happen to dominate the load/store, we can
1450 // just clone it as needed.
1451 if (MatchInfo.RematOffset) {
1452 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1453 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1454 *OldCst->getOperand(1).getCImm());
1455 MatchInfo.Offset = NewCst.getReg(0);
1456 }
1457
1458 auto MIB = Builder.buildInstr(NewOpcode);
1459 if (IsStore) {
1460 MIB.addDef(MatchInfo.Addr);
1461 MIB.addUse(MI.getOperand(0).getReg());
1462 } else {
1463 MIB.addDef(MI.getOperand(0).getReg());
1464 MIB.addDef(MatchInfo.Addr);
1465 }
1466
1467 MIB.addUse(MatchInfo.Base);
1468 MIB.addUse(MatchInfo.Offset);
1469 MIB.addImm(MatchInfo.IsPre);
1470 MIB->cloneMemRefs(*MI.getMF(), MI);
1471 MI.eraseFromParent();
1472 AddrDef.eraseFromParent();
1473
1474 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1475}
1476
1478 MachineInstr *&OtherMI) {
1479 unsigned Opcode = MI.getOpcode();
1480 bool IsDiv, IsSigned;
1481
1482 switch (Opcode) {
1483 default:
1484 llvm_unreachable("Unexpected opcode!");
1485 case TargetOpcode::G_SDIV:
1486 case TargetOpcode::G_UDIV: {
1487 IsDiv = true;
1488 IsSigned = Opcode == TargetOpcode::G_SDIV;
1489 break;
1490 }
1491 case TargetOpcode::G_SREM:
1492 case TargetOpcode::G_UREM: {
1493 IsDiv = false;
1494 IsSigned = Opcode == TargetOpcode::G_SREM;
1495 break;
1496 }
1497 }
1498
1499 Register Src1 = MI.getOperand(1).getReg();
1500 unsigned DivOpcode, RemOpcode, DivremOpcode;
1501 if (IsSigned) {
1502 DivOpcode = TargetOpcode::G_SDIV;
1503 RemOpcode = TargetOpcode::G_SREM;
1504 DivremOpcode = TargetOpcode::G_SDIVREM;
1505 } else {
1506 DivOpcode = TargetOpcode::G_UDIV;
1507 RemOpcode = TargetOpcode::G_UREM;
1508 DivremOpcode = TargetOpcode::G_UDIVREM;
1509 }
1510
1511 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1512 return false;
1513
1514 // Combine:
1515 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1516 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1517 // into:
1518 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1519
1520 // Combine:
1521 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1522 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1523 // into:
1524 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1525
1526 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1527 if (MI.getParent() == UseMI.getParent() &&
1528 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1529 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1530 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1531 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1532 OtherMI = &UseMI;
1533 return true;
1534 }
1535 }
1536
1537 return false;
1538}
1539
1541 MachineInstr *&OtherMI) {
1542 unsigned Opcode = MI.getOpcode();
1543 assert(OtherMI && "OtherMI shouldn't be empty.");
1544
1545 Register DestDivReg, DestRemReg;
1546 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1547 DestDivReg = MI.getOperand(0).getReg();
1548 DestRemReg = OtherMI->getOperand(0).getReg();
1549 } else {
1550 DestDivReg = OtherMI->getOperand(0).getReg();
1551 DestRemReg = MI.getOperand(0).getReg();
1552 }
1553
1554 bool IsSigned =
1555 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1556
1557 // Check which instruction is first in the block so we don't break def-use
1558 // deps by "moving" the instruction incorrectly. Also keep track of which
1559 // instruction is first so we pick it's operands, avoiding use-before-def
1560 // bugs.
1561 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1562 Builder.setInstrAndDebugLoc(*FirstInst);
1563
1564 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1565 : TargetOpcode::G_UDIVREM,
1566 {DestDivReg, DestRemReg},
1567 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1568 MI.eraseFromParent();
1569 OtherMI->eraseFromParent();
1570}
1571
1573 MachineInstr *&BrCond) {
1574 assert(MI.getOpcode() == TargetOpcode::G_BR);
1575
1576 // Try to match the following:
1577 // bb1:
1578 // G_BRCOND %c1, %bb2
1579 // G_BR %bb3
1580 // bb2:
1581 // ...
1582 // bb3:
1583
1584 // The above pattern does not have a fall through to the successor bb2, always
1585 // resulting in a branch no matter which path is taken. Here we try to find
1586 // and replace that pattern with conditional branch to bb3 and otherwise
1587 // fallthrough to bb2. This is generally better for branch predictors.
1588
1589 MachineBasicBlock *MBB = MI.getParent();
1591 if (BrIt == MBB->begin())
1592 return false;
1593 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1594
1595 BrCond = &*std::prev(BrIt);
1596 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1597 return false;
1598
1599 // Check that the next block is the conditional branch target. Also make sure
1600 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1601 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1602 return BrCondTarget != MI.getOperand(0).getMBB() &&
1603 MBB->isLayoutSuccessor(BrCondTarget);
1604}
1605
1607 MachineInstr *&BrCond) {
1608 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1610 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1611 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1612 // this to i1 only since we might not know for sure what kind of
1613 // compare generated the condition value.
1614 auto True = Builder.buildConstant(
1615 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1616 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1617
1618 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1620 MI.getOperand(0).setMBB(FallthroughBB);
1622
1623 // Change the conditional branch to use the inverted condition and
1624 // new target block.
1625 Observer.changingInstr(*BrCond);
1626 BrCond->getOperand(0).setReg(Xor.getReg(0));
1627 BrCond->getOperand(1).setMBB(BrTarget);
1628 Observer.changedInstr(*BrCond);
1629}
1630
1631
1633 MachineIRBuilder HelperBuilder(MI);
1634 GISelObserverWrapper DummyObserver;
1635 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1636 return Helper.lowerMemcpyInline(MI) ==
1638}
1639
1641 MachineIRBuilder HelperBuilder(MI);
1642 GISelObserverWrapper DummyObserver;
1643 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1644 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1646}
1647
1649 const MachineRegisterInfo &MRI,
1650 const APFloat &Val) {
1651 APFloat Result(Val);
1652 switch (MI.getOpcode()) {
1653 default:
1654 llvm_unreachable("Unexpected opcode!");
1655 case TargetOpcode::G_FNEG: {
1656 Result.changeSign();
1657 return Result;
1658 }
1659 case TargetOpcode::G_FABS: {
1660 Result.clearSign();
1661 return Result;
1662 }
1663 case TargetOpcode::G_FPTRUNC: {
1664 bool Unused;
1665 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1667 &Unused);
1668 return Result;
1669 }
1670 case TargetOpcode::G_FSQRT: {
1671 bool Unused;
1673 &Unused);
1674 Result = APFloat(sqrt(Result.convertToDouble()));
1675 break;
1676 }
1677 case TargetOpcode::G_FLOG2: {
1678 bool Unused;
1680 &Unused);
1681 Result = APFloat(log2(Result.convertToDouble()));
1682 break;
1683 }
1684 }
1685 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1686 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1687 // `G_FLOG2` reach here.
1688 bool Unused;
1689 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1690 return Result;
1691}
1692
1694 const ConstantFP *Cst) {
1695 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1696 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1697 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1698 MI.eraseFromParent();
1699}
1700
1702 PtrAddChain &MatchInfo) {
1703 // We're trying to match the following pattern:
1704 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1705 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1706 // -->
1707 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1708
1709 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1710 return false;
1711
1712 Register Add2 = MI.getOperand(1).getReg();
1713 Register Imm1 = MI.getOperand(2).getReg();
1714 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1715 if (!MaybeImmVal)
1716 return false;
1717
1718 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1719 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1720 return false;
1721
1722 Register Base = Add2Def->getOperand(1).getReg();
1723 Register Imm2 = Add2Def->getOperand(2).getReg();
1724 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1725 if (!MaybeImm2Val)
1726 return false;
1727
1728 // Check if the new combined immediate forms an illegal addressing mode.
1729 // Do not combine if it was legal before but would get illegal.
1730 // To do so, we need to find a load/store user of the pointer to get
1731 // the access type.
1732 Type *AccessTy = nullptr;
1733 auto &MF = *MI.getMF();
1734 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1735 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1736 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1737 MF.getFunction().getContext());
1738 break;
1739 }
1740 }
1742 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1743 AMNew.BaseOffs = CombinedImm.getSExtValue();
1744 if (AccessTy) {
1745 AMNew.HasBaseReg = true;
1747 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1748 AMOld.HasBaseReg = true;
1749 unsigned AS = MRI.getType(Add2).getAddressSpace();
1750 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1751 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1752 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1753 return false;
1754 }
1755
1756 // Pass the combined immediate to the apply function.
1757 MatchInfo.Imm = AMNew.BaseOffs;
1758 MatchInfo.Base = Base;
1759 MatchInfo.Bank = getRegBank(Imm2);
1760 return true;
1761}
1762
1764 PtrAddChain &MatchInfo) {
1765 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1766 MachineIRBuilder MIB(MI);
1767 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1768 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1769 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1771 MI.getOperand(1).setReg(MatchInfo.Base);
1772 MI.getOperand(2).setReg(NewOffset.getReg(0));
1774}
1775
1777 RegisterImmPair &MatchInfo) {
1778 // We're trying to match the following pattern with any of
1779 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1780 // %t1 = SHIFT %base, G_CONSTANT imm1
1781 // %root = SHIFT %t1, G_CONSTANT imm2
1782 // -->
1783 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1784
1785 unsigned Opcode = MI.getOpcode();
1786 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1787 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1788 Opcode == TargetOpcode::G_USHLSAT) &&
1789 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1790
1791 Register Shl2 = MI.getOperand(1).getReg();
1792 Register Imm1 = MI.getOperand(2).getReg();
1793 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1794 if (!MaybeImmVal)
1795 return false;
1796
1797 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1798 if (Shl2Def->getOpcode() != Opcode)
1799 return false;
1800
1801 Register Base = Shl2Def->getOperand(1).getReg();
1802 Register Imm2 = Shl2Def->getOperand(2).getReg();
1803 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1804 if (!MaybeImm2Val)
1805 return false;
1806
1807 // Pass the combined immediate to the apply function.
1808 MatchInfo.Imm =
1809 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1810 MatchInfo.Reg = Base;
1811
1812 // There is no simple replacement for a saturating unsigned left shift that
1813 // exceeds the scalar size.
1814 if (Opcode == TargetOpcode::G_USHLSAT &&
1815 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1816 return false;
1817
1818 return true;
1819}
1820
1822 RegisterImmPair &MatchInfo) {
1823 unsigned Opcode = MI.getOpcode();
1824 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1825 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1826 Opcode == TargetOpcode::G_USHLSAT) &&
1827 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1828
1829 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1830 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1831 auto Imm = MatchInfo.Imm;
1832
1833 if (Imm >= ScalarSizeInBits) {
1834 // Any logical shift that exceeds scalar size will produce zero.
1835 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1836 Builder.buildConstant(MI.getOperand(0), 0);
1837 MI.eraseFromParent();
1838 return;
1839 }
1840 // Arithmetic shift and saturating signed left shift have no effect beyond
1841 // scalar size.
1842 Imm = ScalarSizeInBits - 1;
1843 }
1844
1845 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1846 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1848 MI.getOperand(1).setReg(MatchInfo.Reg);
1849 MI.getOperand(2).setReg(NewImm);
1851}
1852
1854 ShiftOfShiftedLogic &MatchInfo) {
1855 // We're trying to match the following pattern with any of
1856 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1857 // with any of G_AND/G_OR/G_XOR logic instructions.
1858 // %t1 = SHIFT %X, G_CONSTANT C0
1859 // %t2 = LOGIC %t1, %Y
1860 // %root = SHIFT %t2, G_CONSTANT C1
1861 // -->
1862 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1863 // %t4 = SHIFT %Y, G_CONSTANT C1
1864 // %root = LOGIC %t3, %t4
1865 unsigned ShiftOpcode = MI.getOpcode();
1866 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1867 ShiftOpcode == TargetOpcode::G_ASHR ||
1868 ShiftOpcode == TargetOpcode::G_LSHR ||
1869 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1870 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1871 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1872
1873 // Match a one-use bitwise logic op.
1874 Register LogicDest = MI.getOperand(1).getReg();
1875 if (!MRI.hasOneNonDBGUse(LogicDest))
1876 return false;
1877
1878 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1879 unsigned LogicOpcode = LogicMI->getOpcode();
1880 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1881 LogicOpcode != TargetOpcode::G_XOR)
1882 return false;
1883
1884 // Find a matching one-use shift by constant.
1885 const Register C1 = MI.getOperand(2).getReg();
1886 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1887 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1888 return false;
1889
1890 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1891
1892 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1893 // Shift should match previous one and should be a one-use.
1894 if (MI->getOpcode() != ShiftOpcode ||
1895 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1896 return false;
1897
1898 // Must be a constant.
1899 auto MaybeImmVal =
1900 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1901 if (!MaybeImmVal)
1902 return false;
1903
1904 ShiftVal = MaybeImmVal->Value.getSExtValue();
1905 return true;
1906 };
1907
1908 // Logic ops are commutative, so check each operand for a match.
1909 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1910 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1911 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1912 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1913 uint64_t C0Val;
1914
1915 if (matchFirstShift(LogicMIOp1, C0Val)) {
1916 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1917 MatchInfo.Shift2 = LogicMIOp1;
1918 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1919 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1920 MatchInfo.Shift2 = LogicMIOp2;
1921 } else
1922 return false;
1923
1924 MatchInfo.ValSum = C0Val + C1Val;
1925
1926 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1927 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1928 return false;
1929
1930 MatchInfo.Logic = LogicMI;
1931 return true;
1932}
1933
1935 ShiftOfShiftedLogic &MatchInfo) {
1936 unsigned Opcode = MI.getOpcode();
1937 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1938 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1939 Opcode == TargetOpcode::G_SSHLSAT) &&
1940 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1941
1942 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1943 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1944
1945 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1946
1947 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1948 Register Shift1 =
1949 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1950
1951 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1952 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1953 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1954 // remove old shift1. And it will cause crash later. So erase it earlier to
1955 // avoid the crash.
1956 MatchInfo.Shift2->eraseFromParent();
1957
1958 Register Shift2Const = MI.getOperand(2).getReg();
1959 Register Shift2 = Builder
1960 .buildInstr(Opcode, {DestType},
1961 {MatchInfo.LogicNonShiftReg, Shift2Const})
1962 .getReg(0);
1963
1964 Register Dest = MI.getOperand(0).getReg();
1965 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1966
1967 // This was one use so it's safe to remove it.
1968 MatchInfo.Logic->eraseFromParent();
1969
1970 MI.eraseFromParent();
1971}
1972
1974 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1975 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1976 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1977 auto &Shl = cast<GenericMachineInstr>(MI);
1978 Register DstReg = Shl.getReg(0);
1979 Register SrcReg = Shl.getReg(1);
1980 Register ShiftReg = Shl.getReg(2);
1981 Register X, C1;
1982
1983 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1984 return false;
1985
1986 if (!mi_match(SrcReg, MRI,
1988 m_GOr(m_Reg(X), m_Reg(C1))))))
1989 return false;
1990
1991 APInt C1Val, C2Val;
1992 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1993 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1994 return false;
1995
1996 auto *SrcDef = MRI.getVRegDef(SrcReg);
1997 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1998 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1999 LLT SrcTy = MRI.getType(SrcReg);
2000 MatchInfo = [=](MachineIRBuilder &B) {
2001 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2002 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2003 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2004 };
2005 return true;
2006}
2007
2009 unsigned &ShiftVal) {
2010 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2011 auto MaybeImmVal =
2012 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2013 if (!MaybeImmVal)
2014 return false;
2015
2016 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2017 return (static_cast<int32_t>(ShiftVal) != -1);
2018}
2019
2021 unsigned &ShiftVal) {
2022 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2023 MachineIRBuilder MIB(MI);
2024 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2025 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2027 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2028 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2030}
2031
2032// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2034 RegisterImmPair &MatchData) {
2035 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
2036 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2037 return false;
2038
2039 Register LHS = MI.getOperand(1).getReg();
2040
2041 Register ExtSrc;
2042 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2043 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2044 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2045 return false;
2046
2047 Register RHS = MI.getOperand(2).getReg();
2048 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2049 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2050 if (!MaybeShiftAmtVal)
2051 return false;
2052
2053 if (LI) {
2054 LLT SrcTy = MRI.getType(ExtSrc);
2055
2056 // We only really care about the legality with the shifted value. We can
2057 // pick any type the constant shift amount, so ask the target what to
2058 // use. Otherwise we would have to guess and hope it is reported as legal.
2059 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2060 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2061 return false;
2062 }
2063
2064 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2065 MatchData.Reg = ExtSrc;
2066 MatchData.Imm = ShiftAmt;
2067
2068 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2069 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2070 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2071}
2072
2074 const RegisterImmPair &MatchData) {
2075 Register ExtSrcReg = MatchData.Reg;
2076 int64_t ShiftAmtVal = MatchData.Imm;
2077
2078 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2079 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2080 auto NarrowShift =
2081 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2082 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2083 MI.eraseFromParent();
2084}
2085
2087 Register &MatchInfo) {
2088 GMerge &Merge = cast<GMerge>(MI);
2089 SmallVector<Register, 16> MergedValues;
2090 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2091 MergedValues.emplace_back(Merge.getSourceReg(I));
2092
2093 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2094 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2095 return false;
2096
2097 for (unsigned I = 0; I < MergedValues.size(); ++I)
2098 if (MergedValues[I] != Unmerge->getReg(I))
2099 return false;
2100
2101 MatchInfo = Unmerge->getSourceReg();
2102 return true;
2103}
2104
2106 const MachineRegisterInfo &MRI) {
2107 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2108 ;
2109
2110 return Reg;
2111}
2112
2115 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2116 "Expected an unmerge");
2117 auto &Unmerge = cast<GUnmerge>(MI);
2118 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2119
2120 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2121 if (!SrcInstr)
2122 return false;
2123
2124 // Check the source type of the merge.
2125 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2126 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2127 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2128 if (SrcMergeTy != Dst0Ty && !SameSize)
2129 return false;
2130 // They are the same now (modulo a bitcast).
2131 // We can collect all the src registers.
2132 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2133 Operands.push_back(SrcInstr->getSourceReg(Idx));
2134 return true;
2135}
2136
2139 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2140 "Expected an unmerge");
2141 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2142 "Not enough operands to replace all defs");
2143 unsigned NumElems = MI.getNumOperands() - 1;
2144
2145 LLT SrcTy = MRI.getType(Operands[0]);
2146 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2147 bool CanReuseInputDirectly = DstTy == SrcTy;
2148 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2149 Register DstReg = MI.getOperand(Idx).getReg();
2150 Register SrcReg = Operands[Idx];
2151
2152 // This combine may run after RegBankSelect, so we need to be aware of
2153 // register banks.
2154 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2155 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2156 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2157 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2158 }
2159
2160 if (CanReuseInputDirectly)
2161 replaceRegWith(MRI, DstReg, SrcReg);
2162 else
2163 Builder.buildCast(DstReg, SrcReg);
2164 }
2165 MI.eraseFromParent();
2166}
2167
2169 SmallVectorImpl<APInt> &Csts) {
2170 unsigned SrcIdx = MI.getNumOperands() - 1;
2171 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2172 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2173 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2174 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2175 return false;
2176 // Break down the big constant in smaller ones.
2177 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2178 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2179 ? CstVal.getCImm()->getValue()
2180 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2181
2182 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2183 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2184 // Unmerge a constant.
2185 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2186 Csts.emplace_back(Val.trunc(ShiftAmt));
2187 Val = Val.lshr(ShiftAmt);
2188 }
2189
2190 return true;
2191}
2192
2194 SmallVectorImpl<APInt> &Csts) {
2195 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2196 "Expected an unmerge");
2197 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2198 "Not enough operands to replace all defs");
2199 unsigned NumElems = MI.getNumOperands() - 1;
2200 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2201 Register DstReg = MI.getOperand(Idx).getReg();
2202 Builder.buildConstant(DstReg, Csts[Idx]);
2203 }
2204
2205 MI.eraseFromParent();
2206}
2207
2209 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2210 unsigned SrcIdx = MI.getNumOperands() - 1;
2211 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2212 MatchInfo = [&MI](MachineIRBuilder &B) {
2213 unsigned NumElems = MI.getNumOperands() - 1;
2214 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2215 Register DstReg = MI.getOperand(Idx).getReg();
2216 B.buildUndef(DstReg);
2217 }
2218 };
2219 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2220}
2221
2223 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2224 "Expected an unmerge");
2225 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2226 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2227 return false;
2228 // Check that all the lanes are dead except the first one.
2229 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2230 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2231 return false;
2232 }
2233 return true;
2234}
2235
2237 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2238 Register Dst0Reg = MI.getOperand(0).getReg();
2239 Builder.buildTrunc(Dst0Reg, SrcReg);
2240 MI.eraseFromParent();
2241}
2242
2244 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2245 "Expected an unmerge");
2246 Register Dst0Reg = MI.getOperand(0).getReg();
2247 LLT Dst0Ty = MRI.getType(Dst0Reg);
2248 // G_ZEXT on vector applies to each lane, so it will
2249 // affect all destinations. Therefore we won't be able
2250 // to simplify the unmerge to just the first definition.
2251 if (Dst0Ty.isVector())
2252 return false;
2253 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2254 LLT SrcTy = MRI.getType(SrcReg);
2255 if (SrcTy.isVector())
2256 return false;
2257
2258 Register ZExtSrcReg;
2259 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2260 return false;
2261
2262 // Finally we can replace the first definition with
2263 // a zext of the source if the definition is big enough to hold
2264 // all of ZExtSrc bits.
2265 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2266 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2267}
2268
2270 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2271 "Expected an unmerge");
2272
2273 Register Dst0Reg = MI.getOperand(0).getReg();
2274
2275 MachineInstr *ZExtInstr =
2276 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2277 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2278 "Expecting a G_ZEXT");
2279
2280 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2281 LLT Dst0Ty = MRI.getType(Dst0Reg);
2282 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2283
2284 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2285 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2286 } else {
2287 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2288 "ZExt src doesn't fit in destination");
2289 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2290 }
2291
2292 Register ZeroReg;
2293 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2294 if (!ZeroReg)
2295 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2296 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2297 }
2298 MI.eraseFromParent();
2299}
2300
2302 unsigned TargetShiftSize,
2303 unsigned &ShiftVal) {
2304 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2305 MI.getOpcode() == TargetOpcode::G_LSHR ||
2306 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2307
2308 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2309 if (Ty.isVector()) // TODO:
2310 return false;
2311
2312 // Don't narrow further than the requested size.
2313 unsigned Size = Ty.getSizeInBits();
2314 if (Size <= TargetShiftSize)
2315 return false;
2316
2317 auto MaybeImmVal =
2318 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2319 if (!MaybeImmVal)
2320 return false;
2321
2322 ShiftVal = MaybeImmVal->Value.getSExtValue();
2323 return ShiftVal >= Size / 2 && ShiftVal < Size;
2324}
2325
2327 const unsigned &ShiftVal) {
2328 Register DstReg = MI.getOperand(0).getReg();
2329 Register SrcReg = MI.getOperand(1).getReg();
2330 LLT Ty = MRI.getType(SrcReg);
2331 unsigned Size = Ty.getSizeInBits();
2332 unsigned HalfSize = Size / 2;
2333 assert(ShiftVal >= HalfSize);
2334
2335 LLT HalfTy = LLT::scalar(HalfSize);
2336
2337 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2338 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2339
2340 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2341 Register Narrowed = Unmerge.getReg(1);
2342
2343 // dst = G_LSHR s64:x, C for C >= 32
2344 // =>
2345 // lo, hi = G_UNMERGE_VALUES x
2346 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2347
2348 if (NarrowShiftAmt != 0) {
2349 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2350 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2351 }
2352
2353 auto Zero = Builder.buildConstant(HalfTy, 0);
2354 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2355 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2356 Register Narrowed = Unmerge.getReg(0);
2357 // dst = G_SHL s64:x, C for C >= 32
2358 // =>
2359 // lo, hi = G_UNMERGE_VALUES x
2360 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2361 if (NarrowShiftAmt != 0) {
2362 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2363 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2364 }
2365
2366 auto Zero = Builder.buildConstant(HalfTy, 0);
2367 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2368 } else {
2369 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2370 auto Hi = Builder.buildAShr(
2371 HalfTy, Unmerge.getReg(1),
2372 Builder.buildConstant(HalfTy, HalfSize - 1));
2373
2374 if (ShiftVal == HalfSize) {
2375 // (G_ASHR i64:x, 32) ->
2376 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2377 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2378 } else if (ShiftVal == Size - 1) {
2379 // Don't need a second shift.
2380 // (G_ASHR i64:x, 63) ->
2381 // %narrowed = (G_ASHR hi_32(x), 31)
2382 // G_MERGE_VALUES %narrowed, %narrowed
2383 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2384 } else {
2385 auto Lo = Builder.buildAShr(
2386 HalfTy, Unmerge.getReg(1),
2387 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2388
2389 // (G_ASHR i64:x, C) ->, for C >= 32
2390 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2391 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2392 }
2393 }
2394
2395 MI.eraseFromParent();
2396}
2397
2399 unsigned TargetShiftAmount) {
2400 unsigned ShiftAmt;
2401 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2402 applyCombineShiftToUnmerge(MI, ShiftAmt);
2403 return true;
2404 }
2405
2406 return false;
2407}
2408
2410 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2411 Register DstReg = MI.getOperand(0).getReg();
2412 LLT DstTy = MRI.getType(DstReg);
2413 Register SrcReg = MI.getOperand(1).getReg();
2414 return mi_match(SrcReg, MRI,
2415 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2416}
2417
2419 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2420 Register DstReg = MI.getOperand(0).getReg();
2421 Builder.buildCopy(DstReg, Reg);
2422 MI.eraseFromParent();
2423}
2424
2426 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2427 Register DstReg = MI.getOperand(0).getReg();
2428 Builder.buildZExtOrTrunc(DstReg, Reg);
2429 MI.eraseFromParent();
2430}
2431
2433 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2434 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2435 Register LHS = MI.getOperand(1).getReg();
2436 Register RHS = MI.getOperand(2).getReg();
2437 LLT IntTy = MRI.getType(LHS);
2438
2439 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2440 // instruction.
2441 PtrReg.second = false;
2442 for (Register SrcReg : {LHS, RHS}) {
2443 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2444 // Don't handle cases where the integer is implicitly converted to the
2445 // pointer width.
2446 LLT PtrTy = MRI.getType(PtrReg.first);
2447 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2448 return true;
2449 }
2450
2451 PtrReg.second = true;
2452 }
2453
2454 return false;
2455}
2456
2458 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2459 Register Dst = MI.getOperand(0).getReg();
2460 Register LHS = MI.getOperand(1).getReg();
2461 Register RHS = MI.getOperand(2).getReg();
2462
2463 const bool DoCommute = PtrReg.second;
2464 if (DoCommute)
2465 std::swap(LHS, RHS);
2466 LHS = PtrReg.first;
2467
2468 LLT PtrTy = MRI.getType(LHS);
2469
2470 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2471 Builder.buildPtrToInt(Dst, PtrAdd);
2472 MI.eraseFromParent();
2473}
2474
2476 APInt &NewCst) {
2477 auto &PtrAdd = cast<GPtrAdd>(MI);
2478 Register LHS = PtrAdd.getBaseReg();
2479 Register RHS = PtrAdd.getOffsetReg();
2481
2482 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2483 APInt Cst;
2484 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2485 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2486 // G_INTTOPTR uses zero-extension
2487 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2488 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2489 return true;
2490 }
2491 }
2492
2493 return false;
2494}
2495
2497 APInt &NewCst) {
2498 auto &PtrAdd = cast<GPtrAdd>(MI);
2499 Register Dst = PtrAdd.getReg(0);
2500
2501 Builder.buildConstant(Dst, NewCst);
2502 PtrAdd.eraseFromParent();
2503}
2504
2506 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2507 Register DstReg = MI.getOperand(0).getReg();
2508 Register SrcReg = MI.getOperand(1).getReg();
2509 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2510 if (OriginalSrcReg.isValid())
2511 SrcReg = OriginalSrcReg;
2512 LLT DstTy = MRI.getType(DstReg);
2513 return mi_match(SrcReg, MRI,
2514 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2515}
2516
2518 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2519 Register DstReg = MI.getOperand(0).getReg();
2520 Register SrcReg = MI.getOperand(1).getReg();
2521 LLT DstTy = MRI.getType(DstReg);
2522 if (mi_match(SrcReg, MRI,
2523 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2524 unsigned DstSize = DstTy.getScalarSizeInBits();
2525 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2526 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2527 }
2528 return false;
2529}
2530
2532 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2533 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2534 MI.getOpcode() == TargetOpcode::G_SEXT ||
2535 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2536 "Expected a G_[ASZ]EXT");
2537 Register SrcReg = MI.getOperand(1).getReg();
2538 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2539 if (OriginalSrcReg.isValid())
2540 SrcReg = OriginalSrcReg;
2541 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2542 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2543 unsigned Opc = MI.getOpcode();
2544 unsigned SrcOpc = SrcMI->getOpcode();
2545 if (Opc == SrcOpc ||
2546 (Opc == TargetOpcode::G_ANYEXT &&
2547 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2548 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2549 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2550 return true;
2551 }
2552 return false;
2553}
2554
2556 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2557 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2558 MI.getOpcode() == TargetOpcode::G_SEXT ||
2559 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2560 "Expected a G_[ASZ]EXT");
2561
2562 Register Reg = std::get<0>(MatchInfo);
2563 unsigned SrcExtOp = std::get<1>(MatchInfo);
2564
2565 // Combine exts with the same opcode.
2566 if (MI.getOpcode() == SrcExtOp) {
2568 MI.getOperand(1).setReg(Reg);
2570 return;
2571 }
2572
2573 // Combine:
2574 // - anyext([sz]ext x) to [sz]ext x
2575 // - sext(zext x) to zext x
2576 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2577 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2578 SrcExtOp == TargetOpcode::G_ZEXT)) {
2579 Register DstReg = MI.getOperand(0).getReg();
2580 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2581 MI.eraseFromParent();
2582 }
2583}
2584
2586 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2587 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2588
2589 // ShiftTy > 32 > TruncTy -> 32
2590 if (ShiftSize > 32 && TruncSize < 32)
2591 return ShiftTy.changeElementSize(32);
2592
2593 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2594 // Some targets like it, some don't, some only like it under certain
2595 // conditions/processor versions, etc.
2596 // A TL hook might be needed for this.
2597
2598 // Don't combine
2599 return ShiftTy;
2600}
2601
2603 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2604 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2605 Register DstReg = MI.getOperand(0).getReg();
2606 Register SrcReg = MI.getOperand(1).getReg();
2607
2608 if (!MRI.hasOneNonDBGUse(SrcReg))
2609 return false;
2610
2611 LLT SrcTy = MRI.getType(SrcReg);
2612 LLT DstTy = MRI.getType(DstReg);
2613
2614 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2615 const auto &TL = getTargetLowering();
2616
2617 LLT NewShiftTy;
2618 switch (SrcMI->getOpcode()) {
2619 default:
2620 return false;
2621 case TargetOpcode::G_SHL: {
2622 NewShiftTy = DstTy;
2623
2624 // Make sure new shift amount is legal.
2625 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2626 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2627 return false;
2628 break;
2629 }
2630 case TargetOpcode::G_LSHR:
2631 case TargetOpcode::G_ASHR: {
2632 // For right shifts, we conservatively do not do the transform if the TRUNC
2633 // has any STORE users. The reason is that if we change the type of the
2634 // shift, we may break the truncstore combine.
2635 //
2636 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2637 for (auto &User : MRI.use_instructions(DstReg))
2638 if (User.getOpcode() == TargetOpcode::G_STORE)
2639 return false;
2640
2641 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2642 if (NewShiftTy == SrcTy)
2643 return false;
2644
2645 // Make sure we won't lose information by truncating the high bits.
2646 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2647 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2648 DstTy.getScalarSizeInBits()))
2649 return false;
2650 break;
2651 }
2652 }
2653
2655 {SrcMI->getOpcode(),
2656 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2657 return false;
2658
2659 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2660 return true;
2661}
2662
2664 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2665 MachineInstr *ShiftMI = MatchInfo.first;
2666 LLT NewShiftTy = MatchInfo.second;
2667
2668 Register Dst = MI.getOperand(0).getReg();
2669 LLT DstTy = MRI.getType(Dst);
2670
2671 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2672 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2673 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2674
2675 Register NewShift =
2676 Builder
2677 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2678 .getReg(0);
2679
2680 if (NewShiftTy == DstTy)
2681 replaceRegWith(MRI, Dst, NewShift);
2682 else
2683 Builder.buildTrunc(Dst, NewShift);
2684
2685 eraseInst(MI);
2686}
2687
2689 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2690 return MO.isReg() &&
2691 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2692 });
2693}
2694
2696 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2697 return !MO.isReg() ||
2698 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2699 });
2700}
2701
2703 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2704 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2705 return all_of(Mask, [](int Elt) { return Elt < 0; });
2706}
2707
2709 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2710 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2711 MRI);
2712}
2713
2715 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2716 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2717 MRI);
2718}
2719
2721 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2722 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2723 "Expected an insert/extract element op");
2724 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2725 unsigned IdxIdx =
2726 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2727 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2728 if (!Idx)
2729 return false;
2730 return Idx->getZExtValue() >= VecTy.getNumElements();
2731}
2732
2734 GSelect &SelMI = cast<GSelect>(MI);
2735 auto Cst =
2737 if (!Cst)
2738 return false;
2739 OpIdx = Cst->isZero() ? 3 : 2;
2740 return true;
2741}
2742
2743void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2744
2746 const MachineOperand &MOP2) {
2747 if (!MOP1.isReg() || !MOP2.isReg())
2748 return false;
2749 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2750 if (!InstAndDef1)
2751 return false;
2752 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2753 if (!InstAndDef2)
2754 return false;
2755 MachineInstr *I1 = InstAndDef1->MI;
2756 MachineInstr *I2 = InstAndDef2->MI;
2757
2758 // Handle a case like this:
2759 //
2760 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2761 //
2762 // Even though %0 and %1 are produced by the same instruction they are not
2763 // the same values.
2764 if (I1 == I2)
2765 return MOP1.getReg() == MOP2.getReg();
2766
2767 // If we have an instruction which loads or stores, we can't guarantee that
2768 // it is identical.
2769 //
2770 // For example, we may have
2771 //
2772 // %x1 = G_LOAD %addr (load N from @somewhere)
2773 // ...
2774 // call @foo
2775 // ...
2776 // %x2 = G_LOAD %addr (load N from @somewhere)
2777 // ...
2778 // %or = G_OR %x1, %x2
2779 //
2780 // It's possible that @foo will modify whatever lives at the address we're
2781 // loading from. To be safe, let's just assume that all loads and stores
2782 // are different (unless we have something which is guaranteed to not
2783 // change.)
2784 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2785 return false;
2786
2787 // If both instructions are loads or stores, they are equal only if both
2788 // are dereferenceable invariant loads with the same number of bits.
2789 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2790 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2791 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2792 if (!LS1 || !LS2)
2793 return false;
2794
2795 if (!I2->isDereferenceableInvariantLoad() ||
2796 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2797 return false;
2798 }
2799
2800 // Check for physical registers on the instructions first to avoid cases
2801 // like this:
2802 //
2803 // %a = COPY $physreg
2804 // ...
2805 // SOMETHING implicit-def $physreg
2806 // ...
2807 // %b = COPY $physreg
2808 //
2809 // These copies are not equivalent.
2810 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2811 return MO.isReg() && MO.getReg().isPhysical();
2812 })) {
2813 // Check if we have a case like this:
2814 //
2815 // %a = COPY $physreg
2816 // %b = COPY %a
2817 //
2818 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2819 // From that, we know that they must have the same value, since they must
2820 // have come from the same COPY.
2821 return I1->isIdenticalTo(*I2);
2822 }
2823
2824 // We don't have any physical registers, so we don't necessarily need the
2825 // same vreg defs.
2826 //
2827 // On the off-chance that there's some target instruction feeding into the
2828 // instruction, let's use produceSameValue instead of isIdenticalTo.
2829 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2830 // Handle instructions with multiple defs that produce same values. Values
2831 // are same for operands with same index.
2832 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2833 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2834 // I1 and I2 are different instructions but produce same values,
2835 // %1 and %6 are same, %1 and %7 are not the same value.
2836 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2837 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2838 }
2839 return false;
2840}
2841
2843 if (!MOP.isReg())
2844 return false;
2845 auto *MI = MRI.getVRegDef(MOP.getReg());
2846 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2847 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2848 MaybeCst->getSExtValue() == C;
2849}
2850
2852 if (!MOP.isReg())
2853 return false;
2854 std::optional<FPValueAndVReg> MaybeCst;
2855 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2856 return false;
2857
2858 return MaybeCst->Value.isExactlyValue(C);
2859}
2860
2862 unsigned OpIdx) {
2863 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2864 Register OldReg = MI.getOperand(0).getReg();
2865 Register Replacement = MI.getOperand(OpIdx).getReg();
2866 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2867 MI.eraseFromParent();
2868 replaceRegWith(MRI, OldReg, Replacement);
2869}
2870
2872 Register Replacement) {
2873 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2874 Register OldReg = MI.getOperand(0).getReg();
2875 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2876 MI.eraseFromParent();
2877 replaceRegWith(MRI, OldReg, Replacement);
2878}
2879
2881 unsigned ConstIdx) {
2882 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2883 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2884
2885 // Get the shift amount
2886 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2887 if (!VRegAndVal)
2888 return false;
2889
2890 // Return true of shift amount >= Bitwidth
2891 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2892}
2893
2895 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2896 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2897 "This is not a funnel shift operation");
2898
2899 Register ConstReg = MI.getOperand(3).getReg();
2900 LLT ConstTy = MRI.getType(ConstReg);
2901 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2902
2903 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2904 assert((VRegAndVal) && "Value is not a constant");
2905
2906 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2907 APInt NewConst = VRegAndVal->Value.urem(
2908 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2909
2910 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2912 MI.getOpcode(), {MI.getOperand(0)},
2913 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2914
2915 MI.eraseFromParent();
2916}
2917
2919 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2920 // Match (cond ? x : x)
2921 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2922 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2923 MRI);
2924}
2925
2927 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2928 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2929 MRI);
2930}
2931
2933 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2934 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2935 MRI);
2936}
2937
2939 MachineOperand &MO = MI.getOperand(OpIdx);
2940 return MO.isReg() &&
2941 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2942}
2943
2945 unsigned OpIdx) {
2946 MachineOperand &MO = MI.getOperand(OpIdx);
2947 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2948}
2949
2951 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2952 Builder.buildFConstant(MI.getOperand(0), C);
2953 MI.eraseFromParent();
2954}
2955
2957 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2958 Builder.buildConstant(MI.getOperand(0), C);
2959 MI.eraseFromParent();
2960}
2961
2963 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2964 Builder.buildConstant(MI.getOperand(0), C);
2965 MI.eraseFromParent();
2966}
2967
2969 ConstantFP *CFP) {
2970 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2971 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2972 MI.eraseFromParent();
2973}
2974
2976 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2977 Builder.buildUndef(MI.getOperand(0));
2978 MI.eraseFromParent();
2979}
2980
2982 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2983 Register LHS = MI.getOperand(1).getReg();
2984 Register RHS = MI.getOperand(2).getReg();
2985 Register &NewLHS = std::get<0>(MatchInfo);
2986 Register &NewRHS = std::get<1>(MatchInfo);
2987
2988 // Helper lambda to check for opportunities for
2989 // ((0-A) + B) -> B - A
2990 // (A + (0-B)) -> A - B
2991 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2992 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2993 return false;
2994 NewLHS = MaybeNewLHS;
2995 return true;
2996 };
2997
2998 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2999}
3000
3003 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3004 "Invalid opcode");
3005 Register DstReg = MI.getOperand(0).getReg();
3006 LLT DstTy = MRI.getType(DstReg);
3007 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3008 unsigned NumElts = DstTy.getNumElements();
3009 // If this MI is part of a sequence of insert_vec_elts, then
3010 // don't do the combine in the middle of the sequence.
3011 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3012 TargetOpcode::G_INSERT_VECTOR_ELT)
3013 return false;
3014 MachineInstr *CurrInst = &MI;
3015 MachineInstr *TmpInst;
3016 int64_t IntImm;
3017 Register TmpReg;
3018 MatchInfo.resize(NumElts);
3019 while (mi_match(
3020 CurrInst->getOperand(0).getReg(), MRI,
3021 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3022 if (IntImm >= NumElts || IntImm < 0)
3023 return false;
3024 if (!MatchInfo[IntImm])
3025 MatchInfo[IntImm] = TmpReg;
3026 CurrInst = TmpInst;
3027 }
3028 // Variable index.
3029 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3030 return false;
3031 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3032 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3033 if (!MatchInfo[I - 1].isValid())
3034 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3035 }
3036 return true;
3037 }
3038 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3039 // overwritten, bail out.
3040 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3041 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3042}
3043
3046 Register UndefReg;
3047 auto GetUndef = [&]() {
3048 if (UndefReg)
3049 return UndefReg;
3050 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3051 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3052 return UndefReg;
3053 };
3054 for (Register &Reg : MatchInfo) {
3055 if (!Reg)
3056 Reg = GetUndef();
3057 }
3058 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3059 MI.eraseFromParent();
3060}
3061
3063 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3064 Register SubLHS, SubRHS;
3065 std::tie(SubLHS, SubRHS) = MatchInfo;
3066 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3067 MI.eraseFromParent();
3068}
3069
3072 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3073 //
3074 // Creates the new hand + logic instruction (but does not insert them.)
3075 //
3076 // On success, MatchInfo is populated with the new instructions. These are
3077 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3078 unsigned LogicOpcode = MI.getOpcode();
3079 assert(LogicOpcode == TargetOpcode::G_AND ||
3080 LogicOpcode == TargetOpcode::G_OR ||
3081 LogicOpcode == TargetOpcode::G_XOR);
3082 MachineIRBuilder MIB(MI);
3083 Register Dst = MI.getOperand(0).getReg();
3084 Register LHSReg = MI.getOperand(1).getReg();
3085 Register RHSReg = MI.getOperand(2).getReg();
3086
3087 // Don't recompute anything.
3088 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3089 return false;
3090
3091 // Make sure we have (hand x, ...), (hand y, ...)
3092 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3093 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3094 if (!LeftHandInst || !RightHandInst)
3095 return false;
3096 unsigned HandOpcode = LeftHandInst->getOpcode();
3097 if (HandOpcode != RightHandInst->getOpcode())
3098 return false;
3099 if (!LeftHandInst->getOperand(1).isReg() ||
3100 !RightHandInst->getOperand(1).isReg())
3101 return false;
3102
3103 // Make sure the types match up, and if we're doing this post-legalization,
3104 // we end up with legal types.
3105 Register X = LeftHandInst->getOperand(1).getReg();
3106 Register Y = RightHandInst->getOperand(1).getReg();
3107 LLT XTy = MRI.getType(X);
3108 LLT YTy = MRI.getType(Y);
3109 if (!XTy.isValid() || XTy != YTy)
3110 return false;
3111
3112 // Optional extra source register.
3113 Register ExtraHandOpSrcReg;
3114 switch (HandOpcode) {
3115 default:
3116 return false;
3117 case TargetOpcode::G_ANYEXT:
3118 case TargetOpcode::G_SEXT:
3119 case TargetOpcode::G_ZEXT: {
3120 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3121 break;
3122 }
3123 case TargetOpcode::G_TRUNC: {
3124 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3125 const MachineFunction *MF = MI.getMF();
3126 const DataLayout &DL = MF->getDataLayout();
3127 LLVMContext &Ctx = MF->getFunction().getContext();
3128
3129 LLT DstTy = MRI.getType(Dst);
3130 const TargetLowering &TLI = getTargetLowering();
3131
3132 // Be extra careful sinking truncate. If it's free, there's no benefit in
3133 // widening a binop.
3134 if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) &&
3135 TLI.isTruncateFree(XTy, DstTy, DL, Ctx))
3136 return false;
3137 break;
3138 }
3139 case TargetOpcode::G_AND:
3140 case TargetOpcode::G_ASHR:
3141 case TargetOpcode::G_LSHR:
3142 case TargetOpcode::G_SHL: {
3143 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3144 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3145 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3146 return false;
3147 ExtraHandOpSrcReg = ZOp.getReg();
3148 break;
3149 }
3150 }
3151
3152 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3153 return false;
3154
3155 // Record the steps to build the new instructions.
3156 //
3157 // Steps to build (logic x, y)
3158 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3159 OperandBuildSteps LogicBuildSteps = {
3160 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3161 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3162 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3163 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3164
3165 // Steps to build hand (logic x, y), ...z
3166 OperandBuildSteps HandBuildSteps = {
3167 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3168 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3169 if (ExtraHandOpSrcReg.isValid())
3170 HandBuildSteps.push_back(
3171 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3172 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3173
3174 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3175 return true;
3176}
3177
3180 assert(MatchInfo.InstrsToBuild.size() &&
3181 "Expected at least one instr to build?");
3182 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3183 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3184 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3185 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3186 for (auto &OperandFn : InstrToBuild.OperandFns)
3187 OperandFn(Instr);
3188 }
3189 MI.eraseFromParent();
3190}
3191
3193 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3194 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3195 int64_t ShlCst, AshrCst;
3196 Register Src;
3197 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3198 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3199 m_ICstOrSplat(AshrCst))))
3200 return false;
3201 if (ShlCst != AshrCst)
3202 return false;
3204 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3205 return false;
3206 MatchInfo = std::make_tuple(Src, ShlCst);
3207 return true;
3208}
3209
3211 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3212 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3213 Register Src;
3214 int64_t ShiftAmt;
3215 std::tie(Src, ShiftAmt) = MatchInfo;
3216 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3217 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3218 MI.eraseFromParent();
3219}
3220
3221/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3223 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3224 assert(MI.getOpcode() == TargetOpcode::G_AND);
3225
3226 Register Dst = MI.getOperand(0).getReg();
3227 LLT Ty = MRI.getType(Dst);
3228
3229 Register R;
3230 int64_t C1;
3231 int64_t C2;
3232 if (!mi_match(
3233 Dst, MRI,
3234 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3235 return false;
3236
3237 MatchInfo = [=](MachineIRBuilder &B) {
3238 if (C1 & C2) {
3239 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3240 return;
3241 }
3242 auto Zero = B.buildConstant(Ty, 0);
3243 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3244 };
3245 return true;
3246}
3247
3249 Register &Replacement) {
3250 // Given
3251 //
3252 // %y:_(sN) = G_SOMETHING
3253 // %x:_(sN) = G_SOMETHING
3254 // %res:_(sN) = G_AND %x, %y
3255 //
3256 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3257 //
3258 // Patterns like this can appear as a result of legalization. E.g.
3259 //
3260 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3261 // %one:_(s32) = G_CONSTANT i32 1
3262 // %and:_(s32) = G_AND %cmp, %one
3263 //
3264 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3265 assert(MI.getOpcode() == TargetOpcode::G_AND);
3266 if (!KB)
3267 return false;
3268
3269 Register AndDst = MI.getOperand(0).getReg();
3270 Register LHS = MI.getOperand(1).getReg();
3271 Register RHS = MI.getOperand(2).getReg();
3272
3273 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3274 // we can't do anything. If we do, then it depends on whether we have
3275 // KnownBits on the LHS.
3276 KnownBits RHSBits = KB->getKnownBits(RHS);
3277 if (RHSBits.isUnknown())
3278 return false;
3279
3280 KnownBits LHSBits = KB->getKnownBits(LHS);
3281
3282 // Check that x & Mask == x.
3283 // x & 1 == x, always
3284 // x & 0 == x, only if x is also 0
3285 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3286 //
3287 // Check if we can replace AndDst with the LHS of the G_AND
3288 if (canReplaceReg(AndDst, LHS, MRI) &&
3289 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3290 Replacement = LHS;
3291 return true;
3292 }
3293
3294 // Check if we can replace AndDst with the RHS of the G_AND
3295 if (canReplaceReg(AndDst, RHS, MRI) &&
3296 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3297 Replacement = RHS;
3298 return true;
3299 }
3300
3301 return false;
3302}
3303
3305 // Given
3306 //
3307 // %y:_(sN) = G_SOMETHING
3308 // %x:_(sN) = G_SOMETHING
3309 // %res:_(sN) = G_OR %x, %y
3310 //
3311 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3312 assert(MI.getOpcode() == TargetOpcode::G_OR);
3313 if (!KB)
3314 return false;
3315
3316 Register OrDst = MI.getOperand(0).getReg();
3317 Register LHS = MI.getOperand(1).getReg();
3318 Register RHS = MI.getOperand(2).getReg();
3319
3320 KnownBits LHSBits = KB->getKnownBits(LHS);
3321 KnownBits RHSBits = KB->getKnownBits(RHS);
3322
3323 // Check that x | Mask == x.
3324 // x | 0 == x, always
3325 // x | 1 == x, only if x is also 1
3326 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3327 //
3328 // Check if we can replace OrDst with the LHS of the G_OR
3329 if (canReplaceReg(OrDst, LHS, MRI) &&
3330 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3331 Replacement = LHS;
3332 return true;
3333 }
3334
3335 // Check if we can replace OrDst with the RHS of the G_OR
3336 if (canReplaceReg(OrDst, RHS, MRI) &&
3337 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3338 Replacement = RHS;
3339 return true;
3340 }
3341
3342 return false;
3343}
3344
3346 // If the input is already sign extended, just drop the extension.
3347 Register Src = MI.getOperand(1).getReg();
3348 unsigned ExtBits = MI.getOperand(2).getImm();
3349 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3350 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3351}
3352
3353static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3354 int64_t Cst, bool IsVector, bool IsFP) {
3355 // For i1, Cst will always be -1 regardless of boolean contents.
3356 return (ScalarSizeBits == 1 && Cst == -1) ||
3357 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3358}
3359
3361 SmallVectorImpl<Register> &RegsToNegate) {
3362 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3363 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3364 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3365 Register XorSrc;
3366 Register CstReg;
3367 // We match xor(src, true) here.
3368 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3369 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3370 return false;
3371
3372 if (!MRI.hasOneNonDBGUse(XorSrc))
3373 return false;
3374
3375 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3376 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3377 // list of tree nodes to visit.
3378 RegsToNegate.push_back(XorSrc);
3379 // Remember whether the comparisons are all integer or all floating point.
3380 bool IsInt = false;
3381 bool IsFP = false;
3382 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3383 Register Reg = RegsToNegate[I];
3384 if (!MRI.hasOneNonDBGUse(Reg))
3385 return false;
3386 MachineInstr *Def = MRI.getVRegDef(Reg);
3387 switch (Def->getOpcode()) {
3388 default:
3389 // Don't match if the tree contains anything other than ANDs, ORs and
3390 // comparisons.
3391 return false;
3392 case TargetOpcode::G_ICMP:
3393 if (IsFP)
3394 return false;
3395 IsInt = true;
3396 // When we apply the combine we will invert the predicate.
3397 break;
3398 case TargetOpcode::G_FCMP:
3399 if (IsInt)
3400 return false;
3401 IsFP = true;
3402 // When we apply the combine we will invert the predicate.
3403 break;
3404 case TargetOpcode::G_AND:
3405 case TargetOpcode::G_OR:
3406 // Implement De Morgan's laws:
3407 // ~(x & y) -> ~x | ~y
3408 // ~(x | y) -> ~x & ~y
3409 // When we apply the combine we will change the opcode and recursively
3410 // negate the operands.
3411 RegsToNegate.push_back(Def->getOperand(1).getReg());
3412 RegsToNegate.push_back(Def->getOperand(2).getReg());
3413 break;
3414 }
3415 }
3416
3417 // Now we know whether the comparisons are integer or floating point, check
3418 // the constant in the xor.
3419 int64_t Cst;
3420 if (Ty.isVector()) {
3421 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3422 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3423 if (!MaybeCst)
3424 return false;
3425 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3426 return false;
3427 } else {
3428 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3429 return false;
3430 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3431 return false;
3432 }
3433
3434 return true;
3435}
3436
3438 SmallVectorImpl<Register> &RegsToNegate) {
3439 for (Register Reg : RegsToNegate) {
3440 MachineInstr *Def = MRI.getVRegDef(Reg);
3441 Observer.changingInstr(*Def);
3442 // For each comparison, invert the opcode. For each AND and OR, change the
3443 // opcode.
3444 switch (Def->getOpcode()) {
3445 default:
3446 llvm_unreachable("Unexpected opcode");
3447 case TargetOpcode::G_ICMP:
3448 case TargetOpcode::G_FCMP: {
3449 MachineOperand &PredOp = Def->getOperand(1);
3452 PredOp.setPredicate(NewP);
3453 break;
3454 }
3455 case TargetOpcode::G_AND:
3456 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3457 break;
3458 case TargetOpcode::G_OR:
3459 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3460 break;
3461 }
3462 Observer.changedInstr(*Def);
3463 }
3464
3465 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3466 MI.eraseFromParent();
3467}
3468
3470 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3471 // Match (xor (and x, y), y) (or any of its commuted cases)
3472 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3473 Register &X = MatchInfo.first;
3474 Register &Y = MatchInfo.second;
3475 Register AndReg = MI.getOperand(1).getReg();
3476 Register SharedReg = MI.getOperand(2).getReg();
3477
3478 // Find a G_AND on either side of the G_XOR.
3479 // Look for one of
3480 //
3481 // (xor (and x, y), SharedReg)
3482 // (xor SharedReg, (and x, y))
3483 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3484 std::swap(AndReg, SharedReg);
3485 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3486 return false;
3487 }
3488
3489 // Only do this if we'll eliminate the G_AND.
3490 if (!MRI.hasOneNonDBGUse(AndReg))
3491 return false;
3492
3493 // We can combine if SharedReg is the same as either the LHS or RHS of the
3494 // G_AND.
3495 if (Y != SharedReg)
3496 std::swap(X, Y);
3497 return Y == SharedReg;
3498}
3499
3501 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3502 // Fold (xor (and x, y), y) -> (and (not x), y)
3503 Register X, Y;
3504 std::tie(X, Y) = MatchInfo;
3505 auto Not = Builder.buildNot(MRI.getType(X), X);
3507 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3508 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3509 MI.getOperand(2).setReg(Y);
3511}
3512
3514 auto &PtrAdd = cast<GPtrAdd>(MI);
3515 Register DstReg = PtrAdd.getReg(0);
3516 LLT Ty = MRI.getType(DstReg);
3518
3519 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3520 return false;
3521
3522 if (Ty.isPointer()) {
3523 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3524 return ConstVal && *ConstVal == 0;
3525 }
3526
3527 assert(Ty.isVector() && "Expecting a vector type");
3528 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3529 return isBuildVectorAllZeros(*VecMI, MRI);
3530}
3531
3533 auto &PtrAdd = cast<GPtrAdd>(MI);
3534 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3535 PtrAdd.eraseFromParent();
3536}
3537
3538/// The second source operand is known to be a power of 2.
3540 Register DstReg = MI.getOperand(0).getReg();
3541 Register Src0 = MI.getOperand(1).getReg();
3542 Register Pow2Src1 = MI.getOperand(2).getReg();
3543 LLT Ty = MRI.getType(DstReg);
3544
3545 // Fold (urem x, pow2) -> (and x, pow2-1)
3546 auto NegOne = Builder.buildConstant(Ty, -1);
3547 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3548 Builder.buildAnd(DstReg, Src0, Add);
3549 MI.eraseFromParent();
3550}
3551
3553 unsigned &SelectOpNo) {
3554 Register LHS = MI.getOperand(1).getReg();
3555 Register RHS = MI.getOperand(2).getReg();
3556
3557 Register OtherOperandReg = RHS;
3558 SelectOpNo = 1;
3560
3561 // Don't do this unless the old select is going away. We want to eliminate the
3562 // binary operator, not replace a binop with a select.
3563 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3565 OtherOperandReg = LHS;
3566 SelectOpNo = 2;
3568 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3570 return false;
3571 }
3572
3573 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3574 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3575
3576 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3577 /*AllowFP*/ true,
3578 /*AllowOpaqueConstants*/ false))
3579 return false;
3580 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3581 /*AllowFP*/ true,
3582 /*AllowOpaqueConstants*/ false))
3583 return false;
3584
3585 unsigned BinOpcode = MI.getOpcode();
3586
3587 // We know that one of the operands is a select of constants. Now verify that
3588 // the other binary operator operand is either a constant, or we can handle a
3589 // variable.
3590 bool CanFoldNonConst =
3591 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3592 (isNullOrNullSplat(*SelectLHS, MRI) ||
3593 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3594 (isNullOrNullSplat(*SelectRHS, MRI) ||
3595 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3596 if (CanFoldNonConst)
3597 return true;
3598
3599 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3600 /*AllowFP*/ true,
3601 /*AllowOpaqueConstants*/ false);
3602}
3603
3604/// \p SelectOperand is the operand in binary operator \p MI that is the select
3605/// to fold.
3607 const unsigned &SelectOperand) {
3608 Register Dst = MI.getOperand(0).getReg();
3609 Register LHS = MI.getOperand(1).getReg();
3610 Register RHS = MI.getOperand(2).getReg();
3611 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3612
3613 Register SelectCond = Select->getOperand(1).getReg();
3614 Register SelectTrue = Select->getOperand(2).getReg();
3615 Register SelectFalse = Select->getOperand(3).getReg();
3616
3617 LLT Ty = MRI.getType(Dst);
3618 unsigned BinOpcode = MI.getOpcode();
3619
3620 Register FoldTrue, FoldFalse;
3621
3622 // We have a select-of-constants followed by a binary operator with a
3623 // constant. Eliminate the binop by pulling the constant math into the select.
3624 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3625 if (SelectOperand == 1) {
3626 // TODO: SelectionDAG verifies this actually constant folds before
3627 // committing to the combine.
3628
3629 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3630 FoldFalse =
3631 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3632 } else {
3633 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3634 FoldFalse =
3635 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3636 }
3637
3638 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3639 MI.eraseFromParent();
3640}
3641
3642std::optional<SmallVector<Register, 8>>
3643CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3644 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3645 // We want to detect if Root is part of a tree which represents a bunch
3646 // of loads being merged into a larger load. We'll try to recognize patterns
3647 // like, for example:
3648 //
3649 // Reg Reg
3650 // \ /
3651 // OR_1 Reg
3652 // \ /
3653 // OR_2
3654 // \ Reg
3655 // .. /
3656 // Root
3657 //
3658 // Reg Reg Reg Reg
3659 // \ / \ /
3660 // OR_1 OR_2
3661 // \ /
3662 // \ /
3663 // ...
3664 // Root
3665 //
3666 // Each "Reg" may have been produced by a load + some arithmetic. This
3667 // function will save each of them.
3668 SmallVector<Register, 8> RegsToVisit;
3670
3671 // In the "worst" case, we're dealing with a load for each byte. So, there
3672 // are at most #bytes - 1 ORs.
3673 const unsigned MaxIter =
3674 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3675 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3676 if (Ors.empty())
3677 break;
3678 const MachineInstr *Curr = Ors.pop_back_val();
3679 Register OrLHS = Curr->getOperand(1).getReg();
3680 Register OrRHS = Curr->getOperand(2).getReg();
3681
3682 // In the combine, we want to elimate the entire tree.
3683 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3684 return std::nullopt;
3685
3686 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3687 // something that may be a load + arithmetic.
3688 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3689 Ors.push_back(Or);
3690 else
3691 RegsToVisit.push_back(OrLHS);
3692 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3693 Ors.push_back(Or);
3694 else
3695 RegsToVisit.push_back(OrRHS);
3696 }
3697
3698 // We're going to try and merge each register into a wider power-of-2 type,
3699 // so we ought to have an even number of registers.
3700 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3701 return std::nullopt;
3702 return RegsToVisit;
3703}
3704
3705/// Helper function for findLoadOffsetsForLoadOrCombine.
3706///
3707/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3708/// and then moving that value into a specific byte offset.
3709///
3710/// e.g. x[i] << 24
3711///
3712/// \returns The load instruction and the byte offset it is moved into.
3713static std::optional<std::pair<GZExtLoad *, int64_t>>
3714matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3715 const MachineRegisterInfo &MRI) {
3716 assert(MRI.hasOneNonDBGUse(Reg) &&
3717 "Expected Reg to only have one non-debug use?");
3718 Register MaybeLoad;
3719 int64_t Shift;
3720 if (!mi_match(Reg, MRI,
3721 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3722 Shift = 0;
3723 MaybeLoad = Reg;
3724 }
3725
3726 if (Shift % MemSizeInBits != 0)
3727 return std::nullopt;
3728
3729 // TODO: Handle other types of loads.
3730 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3731 if (!Load)
3732 return std::nullopt;
3733
3734 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3735 return std::nullopt;
3736
3737 return std::make_pair(Load, Shift / MemSizeInBits);
3738}
3739
3740std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3741CombinerHelper::findLoadOffsetsForLoadOrCombine(
3743 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3744
3745 // Each load found for the pattern. There should be one for each RegsToVisit.
3747
3748 // The lowest index used in any load. (The lowest "i" for each x[i].)
3749 int64_t LowestIdx = INT64_MAX;
3750
3751 // The load which uses the lowest index.
3752 GZExtLoad *LowestIdxLoad = nullptr;
3753
3754 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3755 SmallSet<int64_t, 8> SeenIdx;
3756
3757 // Ensure each load is in the same MBB.
3758 // TODO: Support multiple MachineBasicBlocks.
3759 MachineBasicBlock *MBB = nullptr;
3760 const MachineMemOperand *MMO = nullptr;
3761
3762 // Earliest instruction-order load in the pattern.
3763 GZExtLoad *EarliestLoad = nullptr;
3764
3765 // Latest instruction-order load in the pattern.
3766 GZExtLoad *LatestLoad = nullptr;
3767
3768 // Base pointer which every load should share.
3770
3771 // We want to find a load for each register. Each load should have some
3772 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3773 // track of the load which uses the lowest index. Later, we will check if we
3774 // can use its pointer in the final, combined load.
3775 for (auto Reg : RegsToVisit) {
3776 // Find the load, and find the position that it will end up in (e.g. a
3777 // shifted) value.
3778 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3779 if (!LoadAndPos)
3780 return std::nullopt;
3781 GZExtLoad *Load;
3782 int64_t DstPos;
3783 std::tie(Load, DstPos) = *LoadAndPos;
3784
3785 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3786 // it is difficult to check for stores/calls/etc between loads.
3787 MachineBasicBlock *LoadMBB = Load->getParent();
3788 if (!MBB)
3789 MBB = LoadMBB;
3790 if (LoadMBB != MBB)
3791 return std::nullopt;
3792
3793 // Make sure that the MachineMemOperands of every seen load are compatible.
3794 auto &LoadMMO = Load->getMMO();
3795 if (!MMO)
3796 MMO = &LoadMMO;
3797 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3798 return std::nullopt;
3799
3800 // Find out what the base pointer and index for the load is.
3801 Register LoadPtr;
3802 int64_t Idx;
3803 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3804 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3805 LoadPtr = Load->getOperand(1).getReg();
3806 Idx = 0;
3807 }
3808
3809 // Don't combine things like a[i], a[i] -> a bigger load.
3810 if (!SeenIdx.insert(Idx).second)
3811 return std::nullopt;
3812
3813 // Every load must share the same base pointer; don't combine things like:
3814 //
3815 // a[i], b[i + 1] -> a bigger load.
3816 if (!BasePtr.isValid())
3817 BasePtr = LoadPtr;
3818 if (BasePtr != LoadPtr)
3819 return std::nullopt;
3820
3821 if (Idx < LowestIdx) {
3822 LowestIdx = Idx;
3823 LowestIdxLoad = Load;
3824 }
3825
3826 // Keep track of the byte offset that this load ends up at. If we have seen
3827 // the byte offset, then stop here. We do not want to combine:
3828 //
3829 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3830 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3831 return std::nullopt;
3832 Loads.insert(Load);
3833
3834 // Keep track of the position of the earliest/latest loads in the pattern.
3835 // We will check that there are no load fold barriers between them later
3836 // on.
3837 //
3838 // FIXME: Is there a better way to check for load fold barriers?
3839 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3840 EarliestLoad = Load;
3841 if (!LatestLoad || dominates(*LatestLoad, *Load))
3842 LatestLoad = Load;
3843 }
3844
3845 // We found a load for each register. Let's check if each load satisfies the
3846 // pattern.
3847 assert(Loads.size() == RegsToVisit.size() &&
3848 "Expected to find a load for each register?");
3849 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3850 LatestLoad && "Expected at least two loads?");
3851
3852 // Check if there are any stores, calls, etc. between any of the loads. If
3853 // there are, then we can't safely perform the combine.
3854 //
3855 // MaxIter is chosen based off the (worst case) number of iterations it
3856 // typically takes to succeed in the LLVM test suite plus some padding.
3857 //
3858 // FIXME: Is there a better way to check for load fold barriers?
3859 const unsigned MaxIter = 20;
3860 unsigned Iter = 0;
3861 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3862 LatestLoad->getIterator())) {
3863 if (Loads.count(&MI))
3864 continue;
3865 if (MI.isLoadFoldBarrier())
3866 return std::nullopt;
3867 if (Iter++ == MaxIter)
3868 return std::nullopt;
3869 }
3870
3871 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3872}
3873
3875 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3876 assert(MI.getOpcode() == TargetOpcode::G_OR);
3877 MachineFunction &MF = *MI.getMF();
3878 // Assuming a little-endian target, transform:
3879 // s8 *a = ...
3880 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3881 // =>
3882 // s32 val = *((i32)a)
3883 //
3884 // s8 *a = ...
3885 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3886 // =>
3887 // s32 val = BSWAP(*((s32)a))
3888 Register Dst = MI.getOperand(0).getReg();
3889 LLT Ty = MRI.getType(Dst);
3890 if (Ty.isVector())
3891 return false;
3892
3893 // We need to combine at least two loads into this type. Since the smallest
3894 // possible load is into a byte, we need at least a 16-bit wide type.
3895 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3896 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3897 return false;
3898
3899 // Match a collection of non-OR instructions in the pattern.
3900 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3901 if (!RegsToVisit)
3902 return false;
3903
3904 // We have a collection of non-OR instructions. Figure out how wide each of
3905 // the small loads should be based off of the number of potential loads we
3906 // found.
3907 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3908 if (NarrowMemSizeInBits % 8 != 0)
3909 return false;
3910
3911 // Check if each register feeding into each OR is a load from the same
3912 // base pointer + some arithmetic.
3913 //
3914 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3915 //
3916 // Also verify that each of these ends up putting a[i] into the same memory
3917 // offset as a load into a wide type would.
3919 GZExtLoad *LowestIdxLoad, *LatestLoad;
3920 int64_t LowestIdx;
3921 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3922 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3923 if (!MaybeLoadInfo)
3924 return false;
3925 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3926
3927 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3928 // we found before, check if this corresponds to a big or little endian byte
3929 // pattern. If it does, then we can represent it using a load + possibly a
3930 // BSWAP.
3931 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3932 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3933 if (!IsBigEndian)
3934 return false;
3935 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3936 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3937 return false;
3938
3939 // Make sure that the load from the lowest index produces offset 0 in the
3940 // final value.
3941 //
3942 // This ensures that we won't combine something like this:
3943 //
3944 // load x[i] -> byte 2
3945 // load x[i+1] -> byte 0 ---> wide_load x[i]
3946 // load x[i+2] -> byte 1
3947 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3948 const unsigned ZeroByteOffset =
3949 *IsBigEndian
3950 ? bigEndianByteAt(NumLoadsInTy, 0)
3951 : littleEndianByteAt(NumLoadsInTy, 0);
3952 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3953 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3954 ZeroOffsetIdx->second != LowestIdx)
3955 return false;
3956
3957 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3958 // may not use index 0.
3959 Register Ptr = LowestIdxLoad->getPointerReg();
3960 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3961 LegalityQuery::MemDesc MMDesc(MMO);
3962 MMDesc.MemoryTy = Ty;
3964 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3965 return false;
3966 auto PtrInfo = MMO.getPointerInfo();
3967 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3968
3969 // Load must be allowed and fast on the target.
3971 auto &DL = MF.getDataLayout();
3972 unsigned Fast = 0;
3973 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3974 !Fast)
3975 return false;
3976
3977 MatchInfo = [=](MachineIRBuilder &MIB) {
3978 MIB.setInstrAndDebugLoc(*LatestLoad);
3979 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3980 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3981 if (NeedsBSwap)
3982 MIB.buildBSwap(Dst, LoadDst);
3983 };
3984 return true;
3985}
3986
3988 MachineInstr *&ExtMI) {
3989 auto &PHI = cast<GPhi>(MI);
3990 Register DstReg = PHI.getReg(0);
3991
3992 // TODO: Extending a vector may be expensive, don't do this until heuristics
3993 // are better.
3994 if (MRI.getType(DstReg).isVector())
3995 return false;
3996
3997 // Try to match a phi, whose only use is an extend.
3998 if (!MRI.hasOneNonDBGUse(DstReg))
3999 return false;
4000 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4001 switch (ExtMI->getOpcode()) {
4002 case TargetOpcode::G_ANYEXT:
4003 return true; // G_ANYEXT is usually free.
4004 case TargetOpcode::G_ZEXT:
4005 case TargetOpcode::G_SEXT:
4006 break;
4007 default:
4008 return false;
4009 }
4010
4011 // If the target is likely to fold this extend away, don't propagate.
4013 return false;
4014
4015 // We don't want to propagate the extends unless there's a good chance that
4016 // they'll be optimized in some way.
4017 // Collect the unique incoming values.
4019 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4020 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4021 switch (DefMI->getOpcode()) {
4022 case TargetOpcode::G_LOAD:
4023 case TargetOpcode::G_TRUNC:
4024 case TargetOpcode::G_SEXT:
4025 case TargetOpcode::G_ZEXT:
4026 case TargetOpcode::G_ANYEXT:
4027 case TargetOpcode::G_CONSTANT:
4028 InSrcs.insert(DefMI);
4029 // Don't try to propagate if there are too many places to create new
4030 // extends, chances are it'll increase code size.
4031 if (InSrcs.size() > 2)
4032 return false;
4033 break;
4034 default:
4035 return false;
4036 }
4037 }
4038 return true;
4039}
4040
4042 MachineInstr *&ExtMI) {
4043 auto &PHI = cast<GPhi>(MI);
4044 Register DstReg = ExtMI->getOperand(0).getReg();
4045 LLT ExtTy = MRI.getType(DstReg);
4046
4047 // Propagate the extension into the block of each incoming reg's block.
4048 // Use a SetVector here because PHIs can have duplicate edges, and we want
4049 // deterministic iteration order.
4052 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4053 auto SrcReg = PHI.getIncomingValue(I);
4054 auto *SrcMI = MRI.getVRegDef(SrcReg);
4055 if (!SrcMIs.insert(SrcMI))
4056 continue;
4057
4058 // Build an extend after each src inst.
4059 auto *MBB = SrcMI->getParent();
4060 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4061 if (InsertPt != MBB->end() && InsertPt->isPHI())
4062 InsertPt = MBB->getFirstNonPHI();
4063
4064 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4065 Builder.setDebugLoc(MI.getDebugLoc());
4066 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4067 OldToNewSrcMap[SrcMI] = NewExt;
4068 }
4069
4070 // Create a new phi with the extended inputs.
4072 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4073 NewPhi.addDef(DstReg);
4074 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4075 if (!MO.isReg()) {
4076 NewPhi.addMBB(MO.getMBB());
4077 continue;
4078 }
4079 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4080 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4081 }
4082 Builder.insertInstr(NewPhi);
4083 ExtMI->eraseFromParent();
4084}
4085
4087 Register &Reg) {
4088 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4089 // If we have a constant index, look for a G_BUILD_VECTOR source
4090 // and find the source register that the index maps to.
4091 Register SrcVec = MI.getOperand(1).getReg();
4092 LLT SrcTy = MRI.getType(SrcVec);
4093
4094 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4095 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4096 return false;
4097
4098 unsigned VecIdx = Cst->Value.getZExtValue();
4099
4100 // Check if we have a build_vector or build_vector_trunc with an optional
4101 // trunc in front.
4102 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4103 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4104 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4105 }
4106
4107 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4108 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4109 return false;
4110
4111 EVT Ty(getMVTForLLT(SrcTy));
4112 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4113 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4114 return false;
4115
4116 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4117 return true;
4118}
4119
4121 Register &Reg) {
4122 // Check the type of the register, since it may have come from a
4123 // G_BUILD_VECTOR_TRUNC.
4124 LLT ScalarTy = MRI.getType(Reg);
4125 Register DstReg = MI.getOperand(0).getReg();
4126 LLT DstTy = MRI.getType(DstReg);
4127
4128 if (ScalarTy != DstTy) {
4129 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4130 Builder.buildTrunc(DstReg, Reg);
4131 MI.eraseFromParent();
4132 return;
4133 }
4135}
4136
4139 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4140 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4141 // This combine tries to find build_vector's which have every source element
4142 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4143 // the masked load scalarization is run late in the pipeline. There's already
4144 // a combine for a similar pattern starting from the extract, but that
4145 // doesn't attempt to do it if there are multiple uses of the build_vector,
4146 // which in this case is true. Starting the combine from the build_vector
4147 // feels more natural than trying to find sibling nodes of extracts.
4148 // E.g.
4149 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4150 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4151 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4152 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4153 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4154 // ==>
4155 // replace ext{1,2,3,4} with %s{1,2,3,4}
4156
4157 Register DstReg = MI.getOperand(0).getReg();
4158 LLT DstTy = MRI.getType(DstReg);
4159 unsigned NumElts = DstTy.getNumElements();
4160
4161 SmallBitVector ExtractedElts(NumElts);
4162 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4163 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4164 return false;
4165 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4166 if (!Cst)
4167 return false;
4168 unsigned Idx = Cst->getZExtValue();
4169 if (Idx >= NumElts)
4170 return false; // Out of range.
4171 ExtractedElts.set(Idx);
4172 SrcDstPairs.emplace_back(
4173 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4174 }
4175 // Match if every element was extracted.
4176 return ExtractedElts.all();
4177}
4178
4181 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4182 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4183 for (auto &Pair : SrcDstPairs) {
4184 auto *ExtMI = Pair.second;
4185 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4186 ExtMI->eraseFromParent();
4187 }
4188 MI.eraseFromParent();
4189}
4190
4192 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4193 applyBuildFnNoErase(MI, MatchInfo);
4194 MI.eraseFromParent();
4195}
4196
4198 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4199 MatchInfo(Builder);
4200}
4201
4203 BuildFnTy &MatchInfo) {
4204 assert(MI.getOpcode() == TargetOpcode::G_OR);
4205
4206 Register Dst = MI.getOperand(0).getReg();
4207 LLT Ty = MRI.getType(Dst);
4208 unsigned BitWidth = Ty.getScalarSizeInBits();
4209
4210 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4211 unsigned FshOpc = 0;
4212
4213 // Match (or (shl ...), (lshr ...)).
4214 if (!mi_match(Dst, MRI,
4215 // m_GOr() handles the commuted version as well.
4216 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4217 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4218 return false;
4219
4220 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4221 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4222 int64_t CstShlAmt, CstLShrAmt;
4223 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4224 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4225 CstShlAmt + CstLShrAmt == BitWidth) {
4226 FshOpc = TargetOpcode::G_FSHR;
4227 Amt = LShrAmt;
4228
4229 } else if (mi_match(LShrAmt, MRI,
4231 ShlAmt == Amt) {
4232 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4233 FshOpc = TargetOpcode::G_FSHL;
4234
4235 } else if (mi_match(ShlAmt, MRI,
4237 LShrAmt == Amt) {
4238 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4239 FshOpc = TargetOpcode::G_FSHR;
4240
4241 } else {
4242 return false;
4243 }
4244
4245 LLT AmtTy = MRI.getType(Amt);
4246 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4247 return false;
4248
4249 MatchInfo = [=](MachineIRBuilder &B) {
4250 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4251 };
4252 return true;
4253}
4254
4255/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4257 unsigned Opc = MI.getOpcode();
4258 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4259 Register X = MI.getOperand(1).getReg();
4260 Register Y = MI.getOperand(2).getReg();
4261 if (X != Y)
4262 return false;
4263 unsigned RotateOpc =
4264 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4265 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4266}
4267
4269 unsigned Opc = MI.getOpcode();
4270 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4271 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4273 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4274 : TargetOpcode::G_ROTR));
4275 MI.removeOperand(2);
4277}
4278
4279// Fold (rot x, c) -> (rot x, c % BitSize)
4281 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4282 MI.getOpcode() == TargetOpcode::G_ROTR);
4283 unsigned Bitsize =
4284 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4285 Register AmtReg = MI.getOperand(2).getReg();
4286 bool OutOfRange = false;
4287 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4288 if (auto *CI = dyn_cast<ConstantInt>(C))
4289 OutOfRange |= CI->getValue().uge(Bitsize);
4290 return true;
4291 };
4292 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4293}
4294
4296 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4297 MI.getOpcode() == TargetOpcode::G_ROTR);
4298 unsigned Bitsize =
4299 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4300 Register Amt = MI.getOperand(2).getReg();
4301 LLT AmtTy = MRI.getType(Amt);
4302 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4303 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4305 MI.getOperand(2).setReg(Amt);
4307}
4308
4310 int64_t &MatchInfo) {
4311 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4312 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4313
4314 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4315 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4316 // KnownBits on the LHS in two cases:
4317 //
4318 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4319 // we cannot do any transforms so we can safely bail out early.
4320 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4321 // >=0.
4322 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4323 if (KnownRHS.isUnknown())
4324 return false;
4325
4326 std::optional<bool> KnownVal;
4327 if (KnownRHS.isZero()) {
4328 // ? uge 0 -> always true
4329 // ? ult 0 -> always false
4330 if (Pred == CmpInst::ICMP_UGE)
4331 KnownVal = true;
4332 else if (Pred == CmpInst::ICMP_ULT)
4333 KnownVal = false;
4334 }
4335
4336 if (!KnownVal) {
4337 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4338 switch (Pred) {
4339 default:
4340 llvm_unreachable("Unexpected G_ICMP predicate?");
4341 case CmpInst::ICMP_EQ:
4342 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4343 break;
4344 case CmpInst::ICMP_NE:
4345 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4346 break;
4347 case CmpInst::ICMP_SGE:
4348 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4349 break;
4350 case CmpInst::ICMP_SGT:
4351 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4352 break;
4353 case CmpInst::ICMP_SLE:
4354 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4355 break;
4356 case CmpInst::ICMP_SLT:
4357 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4358 break;
4359 case CmpInst::ICMP_UGE:
4360 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4361 break;
4362 case CmpInst::ICMP_UGT:
4363 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4364 break;
4365 case CmpInst::ICMP_ULE:
4366 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4367 break;
4368 case CmpInst::ICMP_ULT:
4369 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4370 break;
4371 }
4372 }
4373
4374 if (!KnownVal)
4375 return false;
4376 MatchInfo =
4377 *KnownVal
4379 /*IsVector = */
4380 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4381 /* IsFP = */ false)
4382 : 0;
4383 return true;
4384}
4385
4387 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4388 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4389 // Given:
4390 //
4391 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4392 // %cmp = G_ICMP ne %x, 0
4393 //
4394 // Or:
4395 //
4396 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4397 // %cmp = G_ICMP eq %x, 1
4398 //
4399 // We can replace %cmp with %x assuming true is 1 on the target.
4400 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4401 if (!CmpInst::isEquality(Pred))
4402 return false;
4403 Register Dst = MI.getOperand(0).getReg();
4404 LLT DstTy = MRI.getType(Dst);
4406 /* IsFP = */ false) != 1)
4407 return false;
4408 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4409 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4410 return false;
4411 Register LHS = MI.getOperand(2).getReg();
4412 auto KnownLHS = KB->getKnownBits(LHS);
4413 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4414 return false;
4415 // Make sure replacing Dst with the LHS is a legal operation.
4416 LLT LHSTy = MRI.getType(LHS);
4417 unsigned LHSSize = LHSTy.getSizeInBits();
4418 unsigned DstSize = DstTy.getSizeInBits();
4419 unsigned Op = TargetOpcode::COPY;
4420 if (DstSize != LHSSize)
4421 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4422 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4423 return false;
4424 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4425 return true;
4426}
4427
4428// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4430 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4431 assert(MI.getOpcode() == TargetOpcode::G_AND);
4432
4433 // Ignore vector types to simplify matching the two constants.
4434 // TODO: do this for vectors and scalars via a demanded bits analysis.
4435 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4436 if (Ty.isVector())
4437 return false;
4438
4439 Register Src;
4440 Register AndMaskReg;
4441 int64_t AndMaskBits;
4442 int64_t OrMaskBits;
4443 if (!mi_match(MI, MRI,
4444 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4445 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4446 return false;
4447
4448 // Check if OrMask could turn on any bits in Src.
4449 if (AndMaskBits & OrMaskBits)
4450 return false;
4451
4452 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4454 // Canonicalize the result to have the constant on the RHS.
4455 if (MI.getOperand(1).getReg() == AndMaskReg)
4456 MI.getOperand(2).setReg(AndMaskReg);
4457 MI.getOperand(1).setReg(Src);
4459 };
4460 return true;
4461}
4462
4463/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4465 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4466 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4467 Register Dst = MI.getOperand(0).getReg();
4468 Register Src = MI.getOperand(1).getReg();
4469 LLT Ty = MRI.getType(Src);
4471 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4472 return false;
4473 int64_t Width = MI.getOperand(2).getImm();
4474 Register ShiftSrc;
4475 int64_t ShiftImm;
4476 if (!mi_match(
4477 Src, MRI,
4478 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4479 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4480 return false;
4481 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4482 return false;
4483
4484 MatchInfo = [=](MachineIRBuilder &B) {
4485 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4486 auto Cst2 = B.buildConstant(ExtractTy, Width);
4487 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4488 };
4489 return true;
4490}
4491
4492/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4494 BuildFnTy &MatchInfo) {
4495 GAnd *And = cast<GAnd>(&MI);
4496 Register Dst = And->getReg(0);
4497 LLT Ty = MRI.getType(Dst);
4499 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4500 // into account.
4501 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4502 return false;
4503
4504 int64_t AndImm, LSBImm;
4505 Register ShiftSrc;
4506 const unsigned Size = Ty.getScalarSizeInBits();
4507 if (!mi_match(And->getReg(0), MRI,
4508 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4509 m_ICst(AndImm))))
4510 return false;
4511
4512 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4513 auto MaybeMask = static_cast<uint64_t>(AndImm);
4514 if (MaybeMask & (MaybeMask + 1))
4515 return false;
4516
4517 // LSB must fit within the register.
4518 if (static_cast<uint64_t>(LSBImm) >= Size)
4519 return false;
4520
4521 uint64_t Width = APInt(Size, AndImm).countr_one();
4522 MatchInfo = [=](MachineIRBuilder &B) {
4523 auto WidthCst = B.buildConstant(ExtractTy, Width);
4524 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4525 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4526 };
4527 return true;
4528}
4529
4531 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4532 const unsigned Opcode = MI.getOpcode();
4533 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4534
4535 const Register Dst = MI.getOperand(0).getReg();
4536
4537 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4538 ? TargetOpcode::G_SBFX
4539 : TargetOpcode::G_UBFX;
4540
4541 // Check if the type we would use for the extract is legal
4542 LLT Ty = MRI.getType(Dst);
4544 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4545 return false;
4546
4547 Register ShlSrc;
4548 int64_t ShrAmt;
4549 int64_t ShlAmt;
4550 const unsigned Size = Ty.getScalarSizeInBits();
4551
4552 // Try to match shr (shl x, c1), c2
4553 if (!mi_match(Dst, MRI,
4554 m_BinOp(Opcode,
4555 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4556 m_ICst(ShrAmt))))
4557 return false;
4558
4559 // Make sure that the shift sizes can fit a bitfield extract
4560 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4561 return false;
4562
4563 // Skip this combine if the G_SEXT_INREG combine could handle it
4564 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4565 return false;
4566
4567 // Calculate start position and width of the extract
4568 const int64_t Pos = ShrAmt - ShlAmt;
4569 const int64_t Width = Size - ShrAmt;
4570
4571 MatchInfo = [=](MachineIRBuilder &B) {
4572 auto WidthCst = B.buildConstant(ExtractTy, Width);
4573 auto PosCst = B.buildConstant(ExtractTy, Pos);
4574 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4575 };
4576 return true;
4577}
4578
4580 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4581 const unsigned Opcode = MI.getOpcode();
4582 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4583
4584 const Register Dst = MI.getOperand(0).getReg();
4585 LLT Ty = MRI.getType(Dst);
4587 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4588 return false;
4589
4590 // Try to match shr (and x, c1), c2
4591 Register AndSrc;
4592 int64_t ShrAmt;
4593 int64_t SMask;
4594 if (!mi_match(Dst, MRI,
4595 m_BinOp(Opcode,
4596 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4597 m_ICst(ShrAmt))))
4598 return false;
4599
4600 const unsigned Size = Ty.getScalarSizeInBits();
4601 if (ShrAmt < 0 || ShrAmt >= Size)
4602 return false;
4603
4604 // If the shift subsumes the mask, emit the 0 directly.
4605 if (0 == (SMask >> ShrAmt)) {
4606 MatchInfo = [=](MachineIRBuilder &B) {
4607 B.buildConstant(Dst, 0);
4608 };
4609 return true;
4610 }
4611
4612 // Check that ubfx can do the extraction, with no holes in the mask.
4613 uint64_t UMask = SMask;
4614 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4615 UMask &= maskTrailingOnes<uint64_t>(Size);
4616 if (!isMask_64(UMask))
4617 return false;
4618
4619 // Calculate start position and width of the extract.
4620 const int64_t Pos = ShrAmt;
4621 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4622
4623 // It's preferable to keep the shift, rather than form G_SBFX.
4624 // TODO: remove the G_AND via demanded bits analysis.
4625 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4626 return false;
4627
4628 MatchInfo = [=](MachineIRBuilder &B) {
4629 auto WidthCst = B.buildConstant(ExtractTy, Width);
4630 auto PosCst = B.buildConstant(ExtractTy, Pos);
4631 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4632 };
4633 return true;
4634}
4635
4636bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4637 MachineInstr &MI) {
4638 auto &PtrAdd = cast<GPtrAdd>(MI);
4639
4640 Register Src1Reg = PtrAdd.getBaseReg();
4641 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4642 if (!Src1Def)
4643 return false;
4644
4645 Register Src2Reg = PtrAdd.getOffsetReg();
4646
4647 if (MRI.hasOneNonDBGUse(Src1Reg))
4648 return false;
4649
4650 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4651 if (!C1)
4652 return false;
4653 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4654 if (!C2)
4655 return false;
4656
4657 const APInt &C1APIntVal = *C1;
4658 const APInt &C2APIntVal = *C2;
4659 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4660
4661 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4662 // This combine may end up running before ptrtoint/inttoptr combines
4663 // manage to eliminate redundant conversions, so try to look through them.
4664 MachineInstr *ConvUseMI = &UseMI;
4665 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4666 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4667 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4668 Register DefReg = ConvUseMI->getOperand(0).getReg();
4669 if (!MRI.hasOneNonDBGUse(DefReg))
4670 break;
4671 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4672 ConvUseOpc = ConvUseMI->getOpcode();
4673 }
4674 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4675 if (!LdStMI)
4676 continue;
4677 // Is x[offset2] already not a legal addressing mode? If so then
4678 // reassociating the constants breaks nothing (we test offset2 because
4679 // that's the one we hope to fold into the load or store).
4681 AM.HasBaseReg = true;
4682 AM.BaseOffs = C2APIntVal.getSExtValue();
4683 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4684 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4685 PtrAdd.getMF()->getFunction().getContext());
4686 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4687 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4688 AccessTy, AS))
4689 continue;
4690
4691 // Would x[offset1+offset2] still be a legal addressing mode?
4692 AM.BaseOffs = CombinedValue;
4693 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4694 AccessTy, AS))
4695 return true;
4696 }
4697
4698 return false;
4699}
4700
4702 MachineInstr *RHS,
4703 BuildFnTy &MatchInfo) {
4704 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4705 Register Src1Reg = MI.getOperand(1).getReg();
4706 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4707 return false;
4708 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4709 if (!C2)
4710 return false;
4711
4712 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4713 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4714
4715 auto NewBase =
4716 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4718 MI.getOperand(1).setReg(NewBase.getReg(0));
4719 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4721 };
4722 return !reassociationCanBreakAddressingModePattern(MI);
4723}
4724
4726 MachineInstr *LHS,
4727 MachineInstr *RHS,
4728 BuildFnTy &MatchInfo) {
4729 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4730 // if and only if (G_PTR_ADD X, C) has one use.
4731 Register LHSBase;
4732 std::optional<ValueAndVReg> LHSCstOff;
4733 if (!mi_match(MI.getBaseReg(), MRI,
4734 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4735 return false;
4736
4737 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4738 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4739 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4740 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4741 // doesn't happen.
4742 LHSPtrAdd->moveBefore(&MI);
4743 Register RHSReg = MI.getOffsetReg();
4744 // set VReg will cause type mismatch if it comes from extend/trunc
4745 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4747 MI.getOperand(2).setReg(NewCst.getReg(0));
4749 Observer.changingInstr(*LHSPtrAdd);
4750 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4751 Observer.changedInstr(*LHSPtrAdd);
4752 };
4753 return !reassociationCanBreakAddressingModePattern(MI);
4754}
4755
4757 MachineInstr *LHS,
4758 MachineInstr *RHS,
4759 BuildFnTy &MatchInfo) {
4760 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4761 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4762 if (!LHSPtrAdd)
4763 return false;
4764
4765 Register Src2Reg = MI.getOperand(2).getReg();
4766 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4767 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4768 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4769 if (!C1)
4770 return false;
4771 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4772 if (!C2)
4773 return false;
4774
4775 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4776 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4778 MI.getOperand(1).setReg(LHSSrc1);
4779 MI.getOperand(2).setReg(NewCst.getReg(0));
4781 };
4782 return !reassociationCanBreakAddressingModePattern(MI);
4783}
4784
4786 BuildFnTy &MatchInfo) {
4787 auto &PtrAdd = cast<GPtrAdd>(MI);
4788 // We're trying to match a few pointer computation patterns here for
4789 // re-association opportunities.
4790 // 1) Isolating a constant operand to be on the RHS, e.g.:
4791 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4792 //
4793 // 2) Folding two constants in each sub-tree as long as such folding
4794 // doesn't break a legal addressing mode.
4795 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4796 //
4797 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4798 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4799 // iif (G_PTR_ADD X, C) has one use.
4800 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4801 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4802
4803 // Try to match example 2.
4804 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4805 return true;
4806
4807 // Try to match example 3.
4808 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4809 return true;
4810
4811 // Try to match example 1.
4812 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4813 return true;
4814
4815 return false;
4816}
4818 Register OpLHS, Register OpRHS,
4819 BuildFnTy &MatchInfo) {
4820 LLT OpRHSTy = MRI.getType(OpRHS);
4821 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4822
4823 if (OpLHSDef->getOpcode() != Opc)
4824 return false;
4825
4826 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4827 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4828 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4829
4830 // If the inner op is (X op C), pull the constant out so it can be folded with
4831 // other constants in the expression tree. Folding is not guaranteed so we
4832 // might have (C1 op C2). In that case do not pull a constant out because it
4833 // won't help and can lead to infinite loops.
4836 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4837 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4838 MatchInfo = [=](MachineIRBuilder &B) {
4839 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4840 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4841 };
4842 return true;
4843 }
4844 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4845 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4846 // iff (op x, c1) has one use
4847 MatchInfo = [=](MachineIRBuilder &B) {
4848 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4849 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4850 };
4851 return true;
4852 }
4853 }
4854
4855 return false;
4856}
4857
4859 BuildFnTy &MatchInfo) {
4860 // We don't check if the reassociation will break a legal addressing mode
4861 // here since pointer arithmetic is handled by G_PTR_ADD.
4862 unsigned Opc = MI.getOpcode();
4863 Register DstReg = MI.getOperand(0).getReg();
4864 Register LHSReg = MI.getOperand(1).getReg();
4865 Register RHSReg = MI.getOperand(2).getReg();
4866
4867 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4868 return true;
4869 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4870 return true;
4871 return false;
4872}
4873
4875 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4876 Register SrcOp = MI.getOperand(1).getReg();
4877
4878 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4879 MatchInfo = *MaybeCst;
4880 return true;
4881 }
4882
4883 return false;
4884}
4885
4887 Register Op1 = MI.getOperand(1).getReg();
4888 Register Op2 = MI.getOperand(2).getReg();
4889 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4890 if (!MaybeCst)
4891 return false;
4892 MatchInfo = *MaybeCst;
4893 return true;
4894}
4895
4897 Register Op1 = MI.getOperand(1).getReg();
4898 Register Op2 = MI.getOperand(2).getReg();
4899 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4900 if (!MaybeCst)
4901 return false;
4902 MatchInfo =
4903 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4904 return true;
4905}
4906
4908 ConstantFP *&MatchInfo) {
4909 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4910 MI.getOpcode() == TargetOpcode::G_FMAD);
4911 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4912
4913 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4914 if (!Op3Cst)
4915 return false;
4916
4917 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4918 if (!Op2Cst)
4919 return false;
4920
4921 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4922 if (!Op1Cst)
4923 return false;
4924
4925 APFloat Op1F = Op1Cst->getValueAPF();
4926 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4928 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4929 return true;
4930}
4931
4933 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4934 // Look for a binop feeding into an AND with a mask:
4935 //
4936 // %add = G_ADD %lhs, %rhs
4937 // %and = G_AND %add, 000...11111111
4938 //
4939 // Check if it's possible to perform the binop at a narrower width and zext
4940 // back to the original width like so:
4941 //
4942 // %narrow_lhs = G_TRUNC %lhs
4943 // %narrow_rhs = G_TRUNC %rhs
4944 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4945 // %new_add = G_ZEXT %narrow_add
4946 // %and = G_AND %new_add, 000...11111111
4947 //
4948 // This can allow later combines to eliminate the G_AND if it turns out
4949 // that the mask is irrelevant.
4950 assert(MI.getOpcode() == TargetOpcode::G_AND);
4951 Register Dst = MI.getOperand(0).getReg();
4952 Register AndLHS = MI.getOperand(1).getReg();
4953 Register AndRHS = MI.getOperand(2).getReg();
4954 LLT WideTy = MRI.getType(Dst);
4955
4956 // If the potential binop has more than one use, then it's possible that one
4957 // of those uses will need its full width.
4958 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4959 return false;
4960
4961 // Check if the LHS feeding the AND is impacted by the high bits that we're
4962 // masking out.
4963 //
4964 // e.g. for 64-bit x, y:
4965 //
4966 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4967 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4968 if (!LHSInst)
4969 return false;
4970 unsigned LHSOpc = LHSInst->getOpcode();
4971 switch (LHSOpc) {
4972 default:
4973 return false;
4974 case TargetOpcode::G_ADD:
4975 case TargetOpcode::G_SUB:
4976 case TargetOpcode::G_MUL:
4977 case TargetOpcode::G_AND:
4978 case TargetOpcode::G_OR:
4979 case TargetOpcode::G_XOR:
4980 break;
4981 }
4982
4983 // Find the mask on the RHS.
4984 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4985 if (!Cst)
4986 return false;
4987 auto Mask = Cst->Value;
4988 if (!Mask.isMask())
4989 return false;
4990
4991 // No point in combining if there's nothing to truncate.
4992 unsigned NarrowWidth = Mask.countr_one();
4993 if (NarrowWidth == WideTy.getSizeInBits())
4994 return false;
4995 LLT NarrowTy = LLT::scalar(NarrowWidth);
4996
4997 // Check if adding the zext + truncates could be harmful.
4998 auto &MF = *MI.getMF();
4999 const auto &TLI = getTargetLowering();
5000 LLVMContext &Ctx = MF.getFunction().getContext();
5001 auto &DL = MF.getDataLayout();
5002 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
5003 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
5004 return false;
5005 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5006 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5007 return false;
5008 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5009 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5010 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5011 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5012 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5013 auto NarrowBinOp =
5014 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5015 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5017 MI.getOperand(1).setReg(Ext.getReg(0));
5019 };
5020 return true;
5021}
5022
5024 unsigned Opc = MI.getOpcode();
5025 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5026
5027 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5028 return false;
5029
5030 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5032 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5033 : TargetOpcode::G_SADDO;
5034 MI.setDesc(Builder.getTII().get(NewOpc));
5035 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5037 };
5038 return true;
5039}
5040
5042 // (G_*MULO x, 0) -> 0 + no carry out
5043 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5044 MI.getOpcode() == TargetOpcode::G_SMULO);
5045 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5046 return false;
5047 Register Dst = MI.getOperand(0).getReg();
5048 Register Carry = MI.getOperand(1).getReg();
5051 return false;
5052 MatchInfo = [=](MachineIRBuilder &B) {
5053 B.buildConstant(Dst, 0);
5054 B.buildConstant(Carry, 0);
5055 };
5056 return true;
5057}
5058
5060 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5061 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5062 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5063 MI.getOpcode() == TargetOpcode::G_SADDE ||
5064 MI.getOpcode() == TargetOpcode::G_USUBE ||
5065 MI.getOpcode() == TargetOpcode::G_SSUBE);
5066 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5067 return false;
5068 MatchInfo = [&](MachineIRBuilder &B) {
5069 unsigned NewOpcode;
5070 switch (MI.getOpcode()) {
5071 case TargetOpcode::G_UADDE:
5072 NewOpcode = TargetOpcode::G_UADDO;
5073 break;
5074 case TargetOpcode::G_SADDE:
5075 NewOpcode = TargetOpcode::G_SADDO;
5076 break;
5077 case TargetOpcode::G_USUBE:
5078 NewOpcode = TargetOpcode::G_USUBO;
5079 break;
5080 case TargetOpcode::G_SSUBE:
5081 NewOpcode = TargetOpcode::G_SSUBO;
5082 break;
5083 }
5085 MI.setDesc(B.getTII().get(NewOpcode));
5086 MI.removeOperand(4);
5088 };
5089 return true;
5090}
5091
5093 BuildFnTy &MatchInfo) {
5094 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5095 Register Dst = MI.getOperand(0).getReg();
5096 // (x + y) - z -> x (if y == z)
5097 // (x + y) - z -> y (if x == z)
5098 Register X, Y, Z;
5099 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5100 Register ReplaceReg;
5101 int64_t CstX, CstY;
5102 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5104 ReplaceReg = X;
5105 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5107 ReplaceReg = Y;
5108 if (ReplaceReg) {
5109 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5110 return true;
5111 }
5112 }
5113
5114 // x - (y + z) -> 0 - y (if x == z)
5115 // x - (y + z) -> 0 - z (if x == y)
5116 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5117 Register ReplaceReg;
5118 int64_t CstX;
5119 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5121 ReplaceReg = Y;
5122 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5124 ReplaceReg = Z;
5125 if (ReplaceReg) {
5126 MatchInfo = [=](MachineIRBuilder &B) {
5127 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5128 B.buildSub(Dst, Zero, ReplaceReg);
5129 };
5130 return true;
5131 }
5132 }
5133 return false;
5134}
5135
5137 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5138 auto &UDiv = cast<GenericMachineInstr>(MI);
5139 Register Dst = UDiv.getReg(0);
5140 Register LHS = UDiv.getReg(1);
5141 Register RHS = UDiv.getReg(2);
5142 LLT Ty = MRI.getType(Dst);
5143 LLT ScalarTy = Ty.getScalarType();
5144 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5146 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5147
5148 auto &MIB = Builder;
5149
5150 bool UseSRL = false;
5151 SmallVector<Register, 16> Shifts, Factors;
5152 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5153 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5154
5155 auto BuildExactUDIVPattern = [&](const Constant *C) {
5156 // Don't recompute inverses for each splat element.
5157 if (IsSplat && !Factors.empty()) {
5158 Shifts.push_back(Shifts[0]);
5159 Factors.push_back(Factors[0]);
5160 return true;
5161 }
5162
5163 auto *CI = cast<ConstantInt>(C);
5164 APInt Divisor = CI->getValue();
5165 unsigned Shift = Divisor.countr_zero();
5166 if (Shift) {
5167 Divisor.lshrInPlace(Shift);
5168 UseSRL = true;
5169 }
5170
5171 // Calculate the multiplicative inverse modulo BW.
5172 APInt Factor = Divisor.multiplicativeInverse();
5173 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5174 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5175 return true;
5176 };
5177
5178 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5179 // Collect all magic values from the build vector.
5180 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5181 llvm_unreachable("Expected unary predicate match to succeed");
5182
5183 Register Shift, Factor;
5184 if (Ty.isVector()) {
5185 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5186 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5187 } else {
5188 Shift = Shifts[0];
5189 Factor = Factors[0];
5190 }
5191
5192 Register Res = LHS;
5193
5194 if (UseSRL)
5195 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5196
5197 return MIB.buildMul(Ty, Res, Factor);
5198 }
5199
5200 unsigned KnownLeadingZeros =
5202
5203 bool UseNPQ = false;
5204 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5205 auto BuildUDIVPattern = [&](const Constant *C) {
5206 auto *CI = cast<ConstantInt>(C);
5207 const APInt &Divisor = CI->getValue();
5208
5209 bool SelNPQ = false;
5210 APInt Magic(Divisor.getBitWidth(), 0);
5211 unsigned PreShift = 0, PostShift = 0;
5212
5213 // Magic algorithm doesn't work for division by 1. We need to emit a select
5214 // at the end.
5215 // TODO: Use undef values for divisor of 1.
5216 if (!Divisor.isOne()) {
5217
5218 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5219 // in the dividend exceeds the leading zeros for the divisor.
5222 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5223
5224 Magic = std::move(magics.Magic);
5225
5226 assert(magics.PreShift < Divisor.getBitWidth() &&
5227 "We shouldn't generate an undefined shift!");
5228 assert(magics.PostShift < Divisor.getBitWidth() &&
5229 "We shouldn't generate an undefined shift!");
5230 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5231 PreShift = magics.PreShift;
5232 PostShift = magics.PostShift;
5233 SelNPQ = magics.IsAdd;
5234 }
5235
5236 PreShifts.push_back(
5237 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5238 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5239 NPQFactors.push_back(
5240 MIB.buildConstant(ScalarTy,
5241 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5242 : APInt::getZero(EltBits))
5243 .getReg(0));
5244 PostShifts.push_back(
5245 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5246 UseNPQ |= SelNPQ;
5247 return true;
5248 };
5249
5250 // Collect the shifts/magic values from each element.
5251 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5252 (void)Matched;
5253 assert(Matched && "Expected unary predicate match to succeed");
5254
5255 Register PreShift, PostShift, MagicFactor, NPQFactor;
5256 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5257 if (RHSDef) {
5258 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5259 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5260 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5261 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5262 } else {
5264 "Non-build_vector operation should have been a scalar");
5265 PreShift = PreShifts[0];
5266 MagicFactor = MagicFactors[0];
5267 PostShift = PostShifts[0];
5268 }
5269
5270 Register Q = LHS;
5271 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5272
5273 // Multiply the numerator (operand 0) by the magic value.
5274 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5275
5276 if (UseNPQ) {
5277 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5278
5279 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5280 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5281 if (Ty.isVector())
5282 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5283 else
5284 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5285
5286 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5287 }
5288
5289 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5290 auto One = MIB.buildConstant(Ty, 1);
5291 auto IsOne = MIB.buildICmp(
5293 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5294 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5295}
5296
5298 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5299 Register Dst = MI.getOperand(0).getReg();
5300 Register RHS = MI.getOperand(2).getReg();
5301 LLT DstTy = MRI.getType(Dst);
5302
5303 auto &MF = *MI.getMF();
5304 AttributeList Attr = MF.getFunction().getAttributes();
5305 const auto &TLI = getTargetLowering();
5306 LLVMContext &Ctx = MF.getFunction().getContext();
5307 auto &DL = MF.getDataLayout();
5308 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5309 return false;
5310
5311 // Don't do this for minsize because the instruction sequence is usually
5312 // larger.
5313 if (MF.getFunction().hasMinSize())
5314 return false;
5315
5316 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5317 return matchUnaryPredicate(
5318 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5319 }
5320
5321 auto *RHSDef = MRI.getVRegDef(RHS);
5322 if (!isConstantOrConstantVector(*RHSDef, MRI))
5323 return false;
5324
5325 // Don't do this if the types are not going to be legal.
5326 if (LI) {
5327 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5328 return false;
5329 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5330 return false;
5332 {TargetOpcode::G_ICMP,
5333 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5334 DstTy}}))
5335 return false;
5336 }
5337
5338 return matchUnaryPredicate(
5339 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5340}
5341
5343 auto *NewMI = buildUDivUsingMul(MI);
5344 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5345}
5346
5348 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5349 Register Dst = MI.getOperand(0).getReg();
5350 Register RHS = MI.getOperand(2).getReg();
5351 LLT DstTy = MRI.getType(Dst);
5352
5353 auto &MF = *MI.getMF();
5354 AttributeList Attr = MF.getFunction().getAttributes();
5355 const auto &TLI = getTargetLowering();
5356 LLVMContext &Ctx = MF.getFunction().getContext();
5357 auto &DL = MF.getDataLayout();
5358 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5359 return false;
5360
5361 // Don't do this for minsize because the instruction sequence is usually
5362 // larger.
5363 if (MF.getFunction().hasMinSize())
5364 return false;
5365
5366 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5367 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5368 return matchUnaryPredicate(
5369 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5370 }
5371
5372 // Don't support the general case for now.
5373 return false;
5374}
5375
5377 auto *NewMI = buildSDivUsingMul(MI);
5378 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5379}
5380
5382 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5383 auto &SDiv = cast<GenericMachineInstr>(MI);
5384 Register Dst = SDiv.getReg(0);
5385 Register LHS = SDiv.getReg(1);
5386 Register RHS = SDiv.getReg(2);
5387 LLT Ty = MRI.getType(Dst);
5388 LLT ScalarTy = Ty.getScalarType();
5390 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5391 auto &MIB = Builder;
5392
5393 bool UseSRA = false;
5394 SmallVector<Register, 16> Shifts, Factors;
5395
5396 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5397 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5398
5399 auto BuildSDIVPattern = [&](const Constant *C) {
5400 // Don't recompute inverses for each splat element.
5401 if (IsSplat && !Factors.empty()) {
5402 Shifts.push_back(Shifts[0]);
5403 Factors.push_back(Factors[0]);
5404 return true;
5405 }
5406
5407 auto *CI = cast<ConstantInt>(C);
5408 APInt Divisor = CI->getValue();
5409 unsigned Shift = Divisor.countr_zero();
5410 if (Shift) {
5411 Divisor.ashrInPlace(Shift);
5412 UseSRA = true;
5413 }
5414
5415 // Calculate the multiplicative inverse modulo BW.
5416 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5417 APInt Factor = Divisor.multiplicativeInverse();
5418 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5419 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5420 return true;
5421 };
5422
5423 // Collect all magic values from the build vector.
5424 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5425 (void)Matched;
5426 assert(Matched && "Expected unary predicate match to succeed");
5427
5428 Register Shift, Factor;
5429 if (Ty.isVector()) {
5430 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5431 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5432 } else {
5433 Shift = Shifts[0];
5434 Factor = Factors[0];
5435 }
5436
5437 Register Res = LHS;
5438
5439 if (UseSRA)
5440 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5441
5442 return MIB.buildMul(Ty, Res, Factor);
5443}
5444
5446 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5447 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5448 "Expected SDIV or UDIV");
5449 auto &Div = cast<GenericMachineInstr>(MI);
5450 Register RHS = Div.getReg(2);
5451 auto MatchPow2 = [&](const Constant *C) {
5452 auto *CI = dyn_cast<ConstantInt>(C);
5453 return CI && (CI->getValue().isPowerOf2() ||
5454 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5455 };
5456 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5457}
5458
5460 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5461 auto &SDiv = cast<GenericMachineInstr>(MI);
5462 Register Dst = SDiv.getReg(0);
5463 Register LHS = SDiv.getReg(1);
5464 Register RHS = SDiv.getReg(2);
5465 LLT Ty = MRI.getType(Dst);
5467 LLT CCVT =
5468 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5469
5470 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5471 // to the following version:
5472 //
5473 // %c1 = G_CTTZ %rhs
5474 // %inexact = G_SUB $bitwidth, %c1
5475 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5476 // %lshr = G_LSHR %sign, %inexact
5477 // %add = G_ADD %lhs, %lshr
5478 // %ashr = G_ASHR %add, %c1
5479 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5480 // %zero = G_CONSTANT $0
5481 // %neg = G_NEG %ashr
5482 // %isneg = G_ICMP SLT %rhs, %zero
5483 // %res = G_SELECT %isneg, %neg, %ashr
5484
5485 unsigned BitWidth = Ty.getScalarSizeInBits();
5486 auto Zero = Builder.buildConstant(Ty, 0);
5487
5488 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5489 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5490 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5491 // Splat the sign bit into the register
5492 auto Sign = Builder.buildAShr(
5493 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5494
5495 // Add (LHS < 0) ? abs2 - 1 : 0;
5496 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5497 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5498 auto AShr = Builder.buildAShr(Ty, Add, C1);
5499
5500 // Special case: (sdiv X, 1) -> X
5501 // Special Case: (sdiv X, -1) -> 0-X
5502 auto One = Builder.buildConstant(Ty, 1);
5503 auto MinusOne = Builder.buildConstant(Ty, -1);
5504 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5505 auto IsMinusOne =
5507 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5508 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5509
5510 // If divided by a positive value, we're done. Otherwise, the result must be
5511 // negated.
5512 auto Neg = Builder.buildNeg(Ty, AShr);
5513 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5514 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5515 MI.eraseFromParent();
5516}
5517
5519 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5520 auto &UDiv = cast<GenericMachineInstr>(MI);
5521 Register Dst = UDiv.getReg(0);
5522 Register LHS = UDiv.getReg(1);
5523 Register RHS = UDiv.getReg(2);
5524 LLT Ty = MRI.getType(Dst);
5526
5527 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5528 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5529 MI.eraseFromParent();
5530}
5531
5533 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5534 Register RHS = MI.getOperand(2).getReg();
5535 Register Dst = MI.getOperand(0).getReg();
5536 LLT Ty = MRI.getType(Dst);
5538 auto MatchPow2ExceptOne = [&](const Constant *C) {
5539 if (auto *CI = dyn_cast<ConstantInt>(C))
5540 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5541 return false;
5542 };
5543 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5544 return false;
5545 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5546}
5547
5549 Register LHS = MI.getOperand(1).getReg();
5550 Register RHS = MI.getOperand(2).getReg();
5551 Register Dst = MI.getOperand(0).getReg();
5552 LLT Ty = MRI.getType(Dst);
5554 unsigned NumEltBits = Ty.getScalarSizeInBits();
5555
5556 auto LogBase2 = buildLogBase2(RHS, Builder);
5557 auto ShiftAmt =
5558 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5559 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5560 Builder.buildLShr(Dst, LHS, Trunc);
5561 MI.eraseFromParent();
5562}
5563
5565 BuildFnTy &MatchInfo) {
5566 unsigned Opc = MI.getOpcode();
5567 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5568 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5569 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5570
5571 Register Dst = MI.getOperand(0).getReg();
5572 Register X = MI.getOperand(1).getReg();
5573 Register Y = MI.getOperand(2).getReg();
5574 LLT Type = MRI.getType(Dst);
5575
5576 // fold (fadd x, fneg(y)) -> (fsub x, y)
5577 // fold (fadd fneg(y), x) -> (fsub x, y)
5578 // G_ADD is commutative so both cases are checked by m_GFAdd
5579 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5580 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5581 Opc = TargetOpcode::G_FSUB;
5582 }
5583 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5584 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5585 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5586 Opc = TargetOpcode::G_FADD;
5587 }
5588 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5589 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5590 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5591 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5592 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5593 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5594 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5595 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5596 // no opcode change
5597 } else
5598 return false;
5599
5600 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5602 MI.setDesc(B.getTII().get(Opc));
5603 MI.getOperand(1).setReg(X);
5604 MI.getOperand(2).setReg(Y);
5606 };
5607 return true;
5608}
5609
5611 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5612
5613 Register LHS = MI.getOperand(1).getReg();
5614 MatchInfo = MI.getOperand(2).getReg();
5615 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5616
5617 const auto LHSCst = Ty.isVector()
5618 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5620 if (!LHSCst)
5621 return false;
5622
5623 // -0.0 is always allowed
5624 if (LHSCst->Value.isNegZero())
5625 return true;
5626
5627 // +0.0 is only allowed if nsz is set.
5628 if (LHSCst->Value.isPosZero())
5629 return MI.getFlag(MachineInstr::FmNsz);
5630
5631 return false;
5632}
5633
5635 Register Dst = MI.getOperand(0).getReg();
5637 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5638 eraseInst(MI);
5639}
5640
5641/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5642/// due to global flags or MachineInstr flags.
5643static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5644 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5645 return false;
5646 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5647}
5648
5649static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5650 const MachineRegisterInfo &MRI) {
5651 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5652 MRI.use_instr_nodbg_end()) >
5653 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5654 MRI.use_instr_nodbg_end());
5655}
5656
5658 bool &AllowFusionGlobally,
5659 bool &HasFMAD, bool &Aggressive,
5660 bool CanReassociate) {
5661
5662 auto *MF = MI.getMF();
5663 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5664 const TargetOptions &Options = MF->getTarget().Options;
5665 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5666
5667 if (CanReassociate &&
5668 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5669 return false;
5670
5671 // Floating-point multiply-add with intermediate rounding.
5672 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5673 // Floating-point multiply-add without intermediate rounding.
5674 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5675 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5676 // No valid opcode, do not combine.
5677 if (!HasFMAD && !HasFMA)
5678 return false;
5679
5680 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5681 Options.UnsafeFPMath || HasFMAD;
5682 // If the addition is not contractable, do not combine.
5683 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5684 return false;
5685
5686 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5687 return true;
5688}
5689
5691 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5692 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5693
5694 bool AllowFusionGlobally, HasFMAD, Aggressive;
5695 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5696 return false;
5697
5698 Register Op1 = MI.getOperand(1).getReg();
5699 Register Op2 = MI.getOperand(2).getReg();
5702 unsigned PreferredFusedOpcode =
5703 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5704
5705 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5706 // prefer to fold the multiply with fewer uses.
5707 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5708 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5709 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5710 std::swap(LHS, RHS);
5711 }
5712
5713 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5714 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5715 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5716 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5717 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5718 {LHS.MI->getOperand(1).getReg(),
5719 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5720 };
5721 return true;
5722 }
5723
5724 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5725 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5726 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5727 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5728 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5729 {RHS.MI->getOperand(1).getReg(),
5730 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5731 };
5732 return true;
5733 }
5734
5735 return false;
5736}
5737
5739 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5740 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5741
5742 bool AllowFusionGlobally, HasFMAD, Aggressive;
5743 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5744 return false;
5745
5746 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5747 Register Op1 = MI.getOperand(1).getReg();
5748 Register Op2 = MI.getOperand(2).getReg();
5751 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5752
5753 unsigned PreferredFusedOpcode =
5754 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5755
5756 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5757 // prefer to fold the multiply with fewer uses.
5758 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5759 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5760 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5761 std::swap(LHS, RHS);
5762 }
5763
5764 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5765 MachineInstr *FpExtSrc;
5766 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5767 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5768 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5769 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5770 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5771 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5772 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5773 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5774 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5775 };
5776 return true;
5777 }
5778
5779 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5780 // Note: Commutes FADD operands.
5781 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5782 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5783 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5784 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5785 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5786 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5787 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5788 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5789 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5790 };
5791 return true;
5792 }
5793
5794 return false;
5795}
5796
5798 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5799 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5800
5801 bool AllowFusionGlobally, HasFMAD, Aggressive;
5802 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5803 return false;
5804
5805 Register Op1 = MI.getOperand(1).getReg();
5806 Register Op2 = MI.getOperand(2).getReg();
5809 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5810
5811 unsigned PreferredFusedOpcode =
5812 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5813
5814 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5815 // prefer to fold the multiply with fewer uses.
5816 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5817 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5818 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5819 std::swap(LHS, RHS);
5820 }
5821
5822 MachineInstr *FMA = nullptr;
5823 Register Z;
5824 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5825 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5826 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5827 TargetOpcode::G_FMUL) &&
5828 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5829 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5830 FMA = LHS.MI;
5831 Z = RHS.Reg;
5832 }
5833 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5834 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5835 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5836 TargetOpcode::G_FMUL) &&
5837 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5838 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5839 Z = LHS.Reg;
5840 FMA = RHS.MI;
5841 }
5842
5843 if (FMA) {
5844 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5845 Register X = FMA->getOperand(1).getReg();
5846 Register Y = FMA->getOperand(2).getReg();
5847 Register U = FMulMI->getOperand(1).getReg();
5848 Register V = FMulMI->getOperand(2).getReg();
5849
5850 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5851 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5852 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5853 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5854 {X, Y, InnerFMA});
5855 };
5856 return true;
5857 }
5858
5859 return false;
5860}
5861
5863 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5864 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5865
5866 bool AllowFusionGlobally, HasFMAD, Aggressive;
5867 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5868 return false;
5869
5870 if (!Aggressive)
5871 return false;
5872
5873 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5874 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5875 Register Op1 = MI.getOperand(1).getReg();
5876 Register Op2 = MI.getOperand(2).getReg();
5879
5880 unsigned PreferredFusedOpcode =
5881 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5882
5883 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5884 // prefer to fold the multiply with fewer uses.
5885 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5886 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5887 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5888 std::swap(LHS, RHS);
5889 }
5890
5891 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5892 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5894 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5895 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5896 Register InnerFMA =
5897 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5898 .getReg(0);
5899 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5900 {X, Y, InnerFMA});
5901 };
5902
5903 MachineInstr *FMulMI, *FMAMI;
5904 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5905 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5906 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5907 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5908 m_GFPExt(m_MInstr(FMulMI))) &&
5909 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5910 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5911 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5912 MatchInfo = [=](MachineIRBuilder &B) {
5913 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5914 FMulMI->getOperand(2).getReg(), RHS.Reg,
5915 LHS.MI->getOperand(1).getReg(),
5916 LHS.MI->getOperand(2).getReg(), B);
5917 };
5918 return true;
5919 }
5920
5921 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5922 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5923 // FIXME: This turns two single-precision and one double-precision
5924 // operation into two double-precision operations, which might not be
5925 // interesting for all targets, especially GPUs.
5926 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5927 FMAMI->getOpcode() == PreferredFusedOpcode) {
5928 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5929 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5930 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5931 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5932 MatchInfo = [=](MachineIRBuilder &B) {
5933 Register X = FMAMI->getOperand(1).getReg();
5934 Register Y = FMAMI->getOperand(2).getReg();
5935 X = B.buildFPExt(DstType, X).getReg(0);
5936 Y = B.buildFPExt(DstType, Y).getReg(0);
5937 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5938 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5939 };
5940
5941 return true;
5942 }
5943 }
5944
5945 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5946 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5947 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5948 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5949 m_GFPExt(m_MInstr(FMulMI))) &&
5950 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5951 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5952 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5953 MatchInfo = [=](MachineIRBuilder &B) {
5954 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5955 FMulMI->getOperand(2).getReg(), LHS.Reg,
5956 RHS.MI->getOperand(1).getReg(),
5957 RHS.MI->getOperand(2).getReg(), B);
5958 };
5959 return true;
5960 }
5961
5962 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5963 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5964 // FIXME: This turns two single-precision and one double-precision
5965 // operation into two double-precision operations, which might not be
5966 // interesting for all targets, especially GPUs.
5967 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5968 FMAMI->getOpcode() == PreferredFusedOpcode) {
5969 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5970 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5971 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5972 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5973 MatchInfo = [=](MachineIRBuilder &B) {
5974 Register X = FMAMI->getOperand(1).getReg();
5975 Register Y = FMAMI->getOperand(2).getReg();
5976 X = B.buildFPExt(DstType, X).getReg(0);
5977 Y = B.buildFPExt(DstType, Y).getReg(0);
5978 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5979 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5980 };
5981 return true;
5982 }
5983 }
5984
5985 return false;
5986}
5987
5989 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5990 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5991
5992 bool AllowFusionGlobally, HasFMAD, Aggressive;
5993 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5994 return false;
5995
5996 Register Op1 = MI.getOperand(1).getReg();
5997 Register Op2 = MI.getOperand(2).getReg();
6000 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6001
6002 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6003 // prefer to fold the multiply with fewer uses.
6004 int FirstMulHasFewerUses = true;
6005 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6006 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6007 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6008 FirstMulHasFewerUses = false;
6009
6010 unsigned PreferredFusedOpcode =
6011 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6012
6013 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6014 if (FirstMulHasFewerUses &&
6015 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6016 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6017 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6018 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6019 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6020 {LHS.MI->getOperand(1).getReg(),
6021 LHS.MI->getOperand(2).getReg(), NegZ});
6022 };
6023 return true;
6024 }
6025 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6026 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6027 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6028 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6029 Register NegY =
6030 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6031 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6032 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6033 };
6034 return true;
6035 }
6036
6037 return false;
6038}
6039
6041 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6042 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6043
6044 bool AllowFusionGlobally, HasFMAD, Aggressive;
6045 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6046 return false;
6047
6048 Register LHSReg = MI.getOperand(1).getReg();
6049 Register RHSReg = MI.getOperand(2).getReg();
6050 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6051
6052 unsigned PreferredFusedOpcode =
6053 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6054
6055 MachineInstr *FMulMI;
6056 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6057 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6058 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6059 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6060 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6061 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6062 Register NegX =
6063 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6064 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6065 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6066 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6067 };
6068 return true;
6069 }
6070
6071 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6072 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6073 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6074 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6075 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6076 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6077 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6078 {FMulMI->getOperand(1).getReg(),
6079 FMulMI->getOperand(2).getReg(), LHSReg});
6080 };
6081 return true;
6082 }
6083
6084 return false;
6085}
6086
6088 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6089 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6090
6091 bool AllowFusionGlobally, HasFMAD, Aggressive;
6092 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6093 return false;
6094
6095 Register LHSReg = MI.getOperand(1).getReg();
6096 Register RHSReg = MI.getOperand(2).getReg();
6097 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6098
6099 unsigned PreferredFusedOpcode =
6100 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6101
6102 MachineInstr *FMulMI;
6103 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6104 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6105 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6106 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6107 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6108 Register FpExtX =
6109 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6110 Register FpExtY =
6111 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6112 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6113 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6114 {FpExtX, FpExtY, NegZ});
6115 };
6116 return true;
6117 }
6118
6119 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6120 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6121 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6122 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6123 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6124 Register FpExtY =
6125 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6126 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6127 Register FpExtZ =
6128 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6129 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6130 {NegY, FpExtZ, LHSReg});
6131 };
6132 return true;
6133 }
6134
6135 return false;
6136}
6137
6139 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6140 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6141
6142 bool AllowFusionGlobally, HasFMAD, Aggressive;
6143 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6144 return false;
6145
6146 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6147 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6148 Register LHSReg = MI.getOperand(1).getReg();
6149 Register RHSReg = MI.getOperand(2).getReg();
6150
6151 unsigned PreferredFusedOpcode =
6152 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6153
6154 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6156 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6157 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6158 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6159 };
6160
6161 MachineInstr *FMulMI;
6162 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6163 // (fneg (fma (fpext x), (fpext y), z))
6164 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6165 // (fneg (fma (fpext x), (fpext y), z))
6166 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6167 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6168 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6169 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6170 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6171 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6173 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6174 FMulMI->getOperand(2).getReg(), RHSReg, B);
6175 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6176 };
6177 return true;
6178 }
6179
6180 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6181 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6182 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6183 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6184 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6185 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6186 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6187 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6188 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6189 FMulMI->getOperand(2).getReg(), LHSReg, B);
6190 };
6191 return true;
6192 }
6193
6194 return false;
6195}
6196
6198 unsigned &IdxToPropagate) {
6199 bool PropagateNaN;
6200 switch (MI.getOpcode()) {
6201 default:
6202 return false;
6203 case TargetOpcode::G_FMINNUM:
6204 case TargetOpcode::G_FMAXNUM:
6205 PropagateNaN = false;
6206 break;
6207 case TargetOpcode::G_FMINIMUM:
6208 case TargetOpcode::G_FMAXIMUM:
6209 PropagateNaN = true;
6210 break;
6211 }
6212
6213 auto MatchNaN = [&](unsigned Idx) {
6214 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6215 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6216 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6217 return false;
6218 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6219 return true;
6220 };
6221
6222 return MatchNaN(1) || MatchNaN(2);
6223}
6224
6226 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6227 Register LHS = MI.getOperand(1).getReg();
6228 Register RHS = MI.getOperand(2).getReg();
6229
6230 // Helper lambda to check for opportunities for
6231 // A + (B - A) -> B
6232 // (B - A) + A -> B
6233 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6234 Register Reg;
6235 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6236 Reg == MaybeSameReg;
6237 };
6238 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6239}
6240
6242 Register &MatchInfo) {
6243 // This combine folds the following patterns:
6244 //
6245 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6246 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6247 // into
6248 // x
6249 // if
6250 // k == sizeof(VecEltTy)/2
6251 // type(x) == type(dst)
6252 //
6253 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6254 // into
6255 // x
6256 // if
6257 // type(x) == type(dst)
6258
6259 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6260 LLT DstEltTy = DstVecTy.getElementType();
6261
6262 Register Lo, Hi;
6263
6264 if (mi_match(
6265 MI, MRI,
6267 MatchInfo = Lo;
6268 return MRI.getType(MatchInfo) == DstVecTy;
6269 }
6270
6271 std::optional<ValueAndVReg> ShiftAmount;
6272 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6273 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6274 if (mi_match(
6275 MI, MRI,
6276 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6277 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6278 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6279 MatchInfo = Lo;
6280 return MRI.getType(MatchInfo) == DstVecTy;
6281 }
6282 }
6283
6284 return false;
6285}
6286
6288 Register &MatchInfo) {
6289 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6290 // if type(x) == type(G_TRUNC)
6291 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6292 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6293 return false;
6294
6295 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6296}
6297
6299 Register &MatchInfo) {
6300 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6301 // y if K == size of vector element type
6302 std::optional<ValueAndVReg> ShiftAmt;
6303 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6305 m_GCst(ShiftAmt))))
6306 return false;
6307
6308 LLT MatchTy = MRI.getType(MatchInfo);
6309 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6310 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6311}
6312
6313unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6314 CmpInst::Predicate Pred, LLT DstTy,
6315 SelectPatternNaNBehaviour VsNaNRetVal) const {
6316 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6317 "Expected a NaN behaviour?");
6318 // Choose an opcode based off of legality or the behaviour when one of the
6319 // LHS/RHS may be NaN.
6320 switch (Pred) {
6321 default:
6322 return 0;
6323 case CmpInst::FCMP_UGT:
6324 case CmpInst::FCMP_UGE:
6325 case CmpInst::FCMP_OGT:
6326 case CmpInst::FCMP_OGE:
6327 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6328 return TargetOpcode::G_FMAXNUM;
6329 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6330 return TargetOpcode::G_FMAXIMUM;
6331 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6332 return TargetOpcode::G_FMAXNUM;
6333 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6334 return TargetOpcode::G_FMAXIMUM;
6335 return 0;
6336 case CmpInst::FCMP_ULT:
6337 case CmpInst::FCMP_ULE:
6338 case CmpInst::FCMP_OLT:
6339 case CmpInst::FCMP_OLE:
6340 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6341 return TargetOpcode::G_FMINNUM;
6342 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6343 return TargetOpcode::G_FMINIMUM;
6344 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6345 return TargetOpcode::G_FMINNUM;
6346 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6347 return 0;
6348 return TargetOpcode::G_FMINIMUM;
6349 }
6350}
6351
6352CombinerHelper::SelectPatternNaNBehaviour
6353CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6354 bool IsOrderedComparison) const {
6355 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6356 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6357 // Completely unsafe.
6358 if (!LHSSafe && !RHSSafe)
6359 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6360 if (LHSSafe && RHSSafe)
6361 return SelectPatternNaNBehaviour::RETURNS_ANY;
6362 // An ordered comparison will return false when given a NaN, so it
6363 // returns the RHS.
6364 if (IsOrderedComparison)
6365 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6366 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6367 // An unordered comparison will return true when given a NaN, so it
6368 // returns the LHS.
6369 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6370 : SelectPatternNaNBehaviour::RETURNS_NAN;
6371}
6372
6373bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6374 Register TrueVal, Register FalseVal,
6375 BuildFnTy &MatchInfo) {
6376 // Match: select (fcmp cond x, y) x, y
6377 // select (fcmp cond x, y) y, x
6378 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6379 LLT DstTy = MRI.getType(Dst);
6380 // Bail out early on pointers, since we'll never want to fold to a min/max.
6381 if (DstTy.isPointer())
6382 return false;
6383 // Match a floating point compare with a less-than/greater-than predicate.
6384 // TODO: Allow multiple users of the compare if they are all selects.
6385 CmpInst::Predicate Pred;
6386 Register CmpLHS, CmpRHS;
6387 if (!mi_match(Cond, MRI,
6389 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6390 CmpInst::isEquality(Pred))
6391 return false;
6392 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6393 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6394 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6395 return false;
6396 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6397 std::swap(CmpLHS, CmpRHS);
6398 Pred = CmpInst::getSwappedPredicate(Pred);
6399 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6400 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6401 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6402 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6403 }
6404 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6405 return false;
6406 // Decide what type of max/min this should be based off of the predicate.
6407 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6408 if (!Opc || !isLegal({Opc, {DstTy}}))
6409 return false;
6410 // Comparisons between signed zero and zero may have different results...
6411 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6412 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6413 // We don't know if a comparison between two 0s will give us a consistent
6414 // result. Be conservative and only proceed if at least one side is
6415 // non-zero.
6416 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6417 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6418 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6419 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6420 return false;
6421 }
6422 }
6423 MatchInfo = [=](MachineIRBuilder &B) {
6424 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6425 };
6426 return true;
6427}
6428
6430 BuildFnTy &MatchInfo) {
6431 // TODO: Handle integer cases.
6432 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6433 // Condition may be fed by a truncated compare.
6434 Register Cond = MI.getOperand(1).getReg();
6435 Register MaybeTrunc;
6436 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6437 Cond = MaybeTrunc;
6438 Register Dst = MI.getOperand(0).getReg();
6439 Register TrueVal = MI.getOperand(2).getReg();
6440 Register FalseVal = MI.getOperand(3).getReg();
6441 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6442}
6443
6445 BuildFnTy &MatchInfo) {
6446 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6447 // (X + Y) == X --> Y == 0
6448 // (X + Y) != X --> Y != 0
6449 // (X - Y) == X --> Y == 0
6450 // (X - Y) != X --> Y != 0
6451 // (X ^ Y) == X --> Y == 0
6452 // (X ^ Y) != X --> Y != 0
6453 Register Dst = MI.getOperand(0).getReg();
6454 CmpInst::Predicate Pred;
6455 Register X, Y, OpLHS, OpRHS;
6456 bool MatchedSub = mi_match(
6457 Dst, MRI,
6458 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6459 if (MatchedSub && X != OpLHS)
6460 return false;
6461 if (!MatchedSub) {
6462 if (!mi_match(Dst, MRI,
6463 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6464 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6465 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6466 return false;
6467 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6468 }
6469 MatchInfo = [=](MachineIRBuilder &B) {
6470 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6471 B.buildICmp(Pred, Dst, Y, Zero);
6472 };
6473 return CmpInst::isEquality(Pred) && Y.isValid();
6474}
6475
6477 Register ShiftReg = MI.getOperand(2).getReg();
6478 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6479 auto IsShiftTooBig = [&](const Constant *C) {
6480 auto *CI = dyn_cast<ConstantInt>(C);
6481 return CI && CI->uge(ResTy.getScalarSizeInBits());
6482 };
6483 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6484}
6485
6487 unsigned LHSOpndIdx = 1;
6488 unsigned RHSOpndIdx = 2;
6489 switch (MI.getOpcode()) {
6490 case TargetOpcode::G_UADDO:
6491 case TargetOpcode::G_SADDO:
6492 case TargetOpcode::G_UMULO:
6493 case TargetOpcode::G_SMULO:
6494 LHSOpndIdx = 2;
6495 RHSOpndIdx = 3;
6496 break;
6497 default:
6498 break;
6499 }
6500 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6501 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6502 if (!getIConstantVRegVal(LHS, MRI)) {
6503 // Skip commuting if LHS is not a constant. But, LHS may be a
6504 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6505 // have a constant on the RHS.
6506 if (MRI.getVRegDef(LHS)->getOpcode() !=
6507 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6508 return false;
6509 }
6510 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6511 return MRI.getVRegDef(RHS)->getOpcode() !=
6512 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6514}
6515
6517 Register LHS = MI.getOperand(1).getReg();
6518 Register RHS = MI.getOperand(2).getReg();
6519 std::optional<FPValueAndVReg> ValAndVReg;
6520 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6521 return false;
6522 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6523}
6524
6527 unsigned LHSOpndIdx = 1;
6528 unsigned RHSOpndIdx = 2;
6529 switch (MI.getOpcode()) {
6530 case TargetOpcode::G_UADDO:
6531 case TargetOpcode::G_SADDO:
6532 case TargetOpcode::G_UMULO:
6533 case TargetOpcode::G_SMULO:
6534 LHSOpndIdx = 2;
6535 RHSOpndIdx = 3;
6536 break;
6537 default:
6538 break;
6539 }
6540 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6541 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6542 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6543 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6545}
6546
6547bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6548 LLT SrcTy = MRI.getType(Src);
6549 if (SrcTy.isFixedVector())
6550 return isConstantSplatVector(Src, 1, AllowUndefs);
6551 if (SrcTy.isScalar()) {
6552 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6553 return true;
6554 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6555 return IConstant && IConstant->Value == 1;
6556 }
6557 return false; // scalable vector
6558}
6559
6560bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6561 LLT SrcTy = MRI.getType(Src);
6562 if (SrcTy.isFixedVector())
6563 return isConstantSplatVector(Src, 0, AllowUndefs);
6564 if (SrcTy.isScalar()) {
6565 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6566 return true;
6567 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6568 return IConstant && IConstant->Value == 0;
6569 }
6570 return false; // scalable vector
6571}
6572
6573// Ignores COPYs during conformance checks.
6574// FIXME scalable vectors.
6575bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6576 bool AllowUndefs) {
6577 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6578 if (!BuildVector)
6579 return false;
6580 unsigned NumSources = BuildVector->getNumSources();
6581
6582 for (unsigned I = 0; I < NumSources; ++I) {
6583 GImplicitDef *ImplicitDef =
6584 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6585 if (ImplicitDef && AllowUndefs)
6586 continue;
6587 if (ImplicitDef && !AllowUndefs)
6588 return false;
6589 std::optional<ValueAndVReg> IConstant =
6591 if (IConstant && IConstant->Value == SplatValue)
6592 continue;
6593 return false;
6594 }
6595 return true;
6596}
6597
6598// Ignores COPYs during lookups.
6599// FIXME scalable vectors
6600std::optional<APInt>
6601CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6602 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6603 if (IConstant)
6604 return IConstant->Value;
6605
6606 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6607 if (!BuildVector)
6608 return std::nullopt;
6609 unsigned NumSources = BuildVector->getNumSources();
6610
6611 std::optional<APInt> Value = std::nullopt;
6612 for (unsigned I = 0; I < NumSources; ++I) {
6613 std::optional<ValueAndVReg> IConstant =
6615 if (!IConstant)
6616 return std::nullopt;
6617 if (!Value)
6618 Value = IConstant->Value;
6619 else if (*Value != IConstant->Value)
6620 return std::nullopt;
6621 }
6622 return Value;
6623}
6624
6625// FIXME G_SPLAT_VECTOR
6626bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6627 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6628 if (IConstant)
6629 return true;
6630
6631 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6632 if (!BuildVector)
6633 return false;
6634
6635 unsigned NumSources = BuildVector->getNumSources();
6636 for (unsigned I = 0; I < NumSources; ++I) {
6637 std::optional<ValueAndVReg> IConstant =
6639 if (!IConstant)
6640 return false;
6641 }
6642 return true;
6643}
6644
6645// TODO: use knownbits to determine zeros
6646bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6647 BuildFnTy &MatchInfo) {
6648 uint32_t Flags = Select->getFlags();
6649 Register Dest = Select->getReg(0);
6650 Register Cond = Select->getCondReg();
6651 Register True = Select->getTrueReg();
6652 Register False = Select->getFalseReg();
6653 LLT CondTy = MRI.getType(Select->getCondReg());
6654 LLT TrueTy = MRI.getType(Select->getTrueReg());
6655
6656 // We only do this combine for scalar boolean conditions.
6657 if (CondTy != LLT::scalar(1))
6658 return false;
6659
6660 if (TrueTy.isPointer())
6661 return false;
6662
6663 // Both are scalars.
6664 std::optional<ValueAndVReg> TrueOpt =
6666 std::optional<ValueAndVReg> FalseOpt =
6668
6669 if (!TrueOpt || !FalseOpt)
6670 return false;
6671
6672 APInt TrueValue = TrueOpt->Value;
6673 APInt FalseValue = FalseOpt->Value;
6674
6675 // select Cond, 1, 0 --> zext (Cond)
6676 if (TrueValue.isOne() && FalseValue.isZero()) {
6677 MatchInfo = [=](MachineIRBuilder &B) {
6678 B.setInstrAndDebugLoc(*Select);
6679 B.buildZExtOrTrunc(Dest, Cond);
6680 };
6681 return true;
6682 }
6683
6684 // select Cond, -1, 0 --> sext (Cond)
6685 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6686 MatchInfo = [=](MachineIRBuilder &B) {
6687 B.setInstrAndDebugLoc(*Select);
6688 B.buildSExtOrTrunc(Dest, Cond);
6689 };
6690 return true;
6691 }
6692
6693 // select Cond, 0, 1 --> zext (!Cond)
6694 if (TrueValue.isZero() && FalseValue.isOne()) {
6695 MatchInfo = [=](MachineIRBuilder &B) {
6696 B.setInstrAndDebugLoc(*Select);
6698 B.buildNot(Inner, Cond);
6699 B.buildZExtOrTrunc(Dest, Inner);
6700 };
6701 return true;
6702 }
6703
6704 // select Cond, 0, -1 --> sext (!Cond)
6705 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6706 MatchInfo = [=](MachineIRBuilder &B) {
6707 B.setInstrAndDebugLoc(*Select);
6709 B.buildNot(Inner, Cond);
6710 B.buildSExtOrTrunc(Dest, Inner);
6711 };
6712 return true;
6713 }
6714
6715 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6716 if (TrueValue - 1 == FalseValue) {
6717 MatchInfo = [=](MachineIRBuilder &B) {
6718 B.setInstrAndDebugLoc(*Select);
6720 B.buildZExtOrTrunc(Inner, Cond);
6721 B.buildAdd(Dest, Inner, False);
6722 };
6723 return true;
6724 }
6725
6726 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6727 if (TrueValue + 1 == FalseValue) {
6728 MatchInfo = [=](MachineIRBuilder &B) {
6729 B.setInstrAndDebugLoc(*Select);
6731 B.buildSExtOrTrunc(Inner, Cond);
6732 B.buildAdd(Dest, Inner, False);
6733 };
6734 return true;
6735 }
6736
6737 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6738 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6739 MatchInfo = [=](MachineIRBuilder &B) {
6740 B.setInstrAndDebugLoc(*Select);
6742 B.buildZExtOrTrunc(Inner, Cond);
6743 // The shift amount must be scalar.
6744 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6745 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6746 B.buildShl(Dest, Inner, ShAmtC, Flags);
6747 };
6748 return true;
6749 }
6750 // select Cond, -1, C --> or (sext Cond), C
6751 if (TrueValue.isAllOnes()) {
6752 MatchInfo = [=](MachineIRBuilder &B) {
6753 B.setInstrAndDebugLoc(*Select);
6755 B.buildSExtOrTrunc(Inner, Cond);
6756 B.buildOr(Dest, Inner, False, Flags);
6757 };
6758 return true;
6759 }
6760
6761 // select Cond, C, -1 --> or (sext (not Cond)), C
6762 if (FalseValue.isAllOnes()) {
6763 MatchInfo = [=](MachineIRBuilder &B) {
6764 B.setInstrAndDebugLoc(*Select);
6766 B.buildNot(Not, Cond);
6768 B.buildSExtOrTrunc(Inner, Not);
6769 B.buildOr(Dest, Inner, True, Flags);
6770 };
6771 return true;
6772 }
6773
6774 return false;
6775}
6776
6777// TODO: use knownbits to determine zeros
6778bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6779 BuildFnTy &MatchInfo) {
6780 uint32_t Flags = Select->getFlags();
6781 Register DstReg = Select->getReg(0);
6782 Register Cond = Select->getCondReg();
6783 Register True = Select->getTrueReg();
6784 Register False = Select->getFalseReg();
6785 LLT CondTy = MRI.getType(Select->getCondReg());
6786 LLT TrueTy = MRI.getType(Select->getTrueReg());
6787
6788 // Boolean or fixed vector of booleans.
6789 if (CondTy.isScalableVector() ||
6790 (CondTy.isFixedVector() &&
6791 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6792 CondTy.getScalarSizeInBits() != 1)
6793 return false;
6794
6795 if (CondTy != TrueTy)
6796 return false;
6797
6798 // select Cond, Cond, F --> or Cond, F
6799 // select Cond, 1, F --> or Cond, F
6800 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6801 MatchInfo = [=](MachineIRBuilder &B) {
6802 B.setInstrAndDebugLoc(*Select);
6804 B.buildZExtOrTrunc(Ext, Cond);
6805 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6806 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6807 };
6808 return true;
6809 }
6810
6811 // select Cond, T, Cond --> and Cond, T
6812 // select Cond, T, 0 --> and Cond, T
6813 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6814 MatchInfo = [=](MachineIRBuilder &B) {
6815 B.setInstrAndDebugLoc(*Select);
6817 B.buildZExtOrTrunc(Ext, Cond);
6818 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6819 B.buildAnd(DstReg, Ext, FreezeTrue);
6820 };
6821 return true;
6822 }
6823
6824 // select Cond, T, 1 --> or (not Cond), T
6825 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6826 MatchInfo = [=](MachineIRBuilder &B) {
6827 B.setInstrAndDebugLoc(*Select);
6828 // First the not.
6830 B.buildNot(Inner, Cond);
6831 // Then an ext to match the destination register.
6833 B.buildZExtOrTrunc(Ext, Inner);
6834 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6835 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6836 };
6837 return true;
6838 }
6839
6840 // select Cond, 0, F --> and (not Cond), F
6841 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6842 MatchInfo = [=](MachineIRBuilder &B) {
6843 B.setInstrAndDebugLoc(*Select);
6844 // First the not.
6846 B.buildNot(Inner, Cond);
6847 // Then an ext to match the destination register.
6849 B.buildZExtOrTrunc(Ext, Inner);
6850 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6851 B.buildAnd(DstReg, Ext, FreezeFalse);
6852 };
6853 return true;
6854 }
6855
6856 return false;
6857}
6858
6860 BuildFnTy &MatchInfo) {
6861 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
6862 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
6863
6864 Register DstReg = Select->getReg(0);
6865 Register True = Select->getTrueReg();
6866 Register False = Select->getFalseReg();
6867 LLT DstTy = MRI.getType(DstReg);
6868
6869 if (DstTy.isPointer())
6870 return false;
6871
6872 // We want to fold the icmp and replace the select.
6873 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6874 return false;
6875
6876 CmpInst::Predicate Pred = Cmp->getCond();
6877 // We need a larger or smaller predicate for
6878 // canonicalization.
6879 if (CmpInst::isEquality(Pred))
6880 return false;
6881
6882 Register CmpLHS = Cmp->getLHSReg();
6883 Register CmpRHS = Cmp->getRHSReg();
6884
6885 // We can swap CmpLHS and CmpRHS for higher hitrate.
6886 if (True == CmpRHS && False == CmpLHS) {
6887 std::swap(CmpLHS, CmpRHS);
6888 Pred = CmpInst::getSwappedPredicate(Pred);
6889 }
6890
6891 // (icmp X, Y) ? X : Y -> integer minmax.
6892 // see matchSelectPattern in ValueTracking.
6893 // Legality between G_SELECT and integer minmax can differ.
6894 if (True != CmpLHS || False != CmpRHS)
6895 return false;
6896
6897 switch (Pred) {
6898 case ICmpInst::ICMP_UGT:
6899 case ICmpInst::ICMP_UGE: {
6900 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6901 return false;
6902 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
6903 return true;
6904 }
6905 case ICmpInst::ICMP_SGT:
6906 case ICmpInst::ICMP_SGE: {
6907 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6908 return false;
6909 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
6910 return true;
6911 }
6912 case ICmpInst::ICMP_ULT:
6913 case ICmpInst::ICMP_ULE: {
6914 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6915 return false;
6916 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
6917 return true;
6918 }
6919 case ICmpInst::ICMP_SLT:
6920 case ICmpInst::ICMP_SLE: {
6921 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6922 return false;
6923 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
6924 return true;
6925 }
6926 default:
6927 return false;
6928 }
6929}
6930
6932 GSelect *Select = cast<GSelect>(&MI);
6933
6934 if (tryFoldSelectOfConstants(Select, MatchInfo))
6935 return true;
6936
6937 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6938 return true;
6939
6940 return false;
6941}
6942
6943/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6944/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6945/// into a single comparison using range-based reasoning.
6946/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6947bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6948 BuildFnTy &MatchInfo) {
6949 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6950 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6951 Register DstReg = Logic->getReg(0);
6952 Register LHS = Logic->getLHSReg();
6953 Register RHS = Logic->getRHSReg();
6954 unsigned Flags = Logic->getFlags();
6955
6956 // We need an G_ICMP on the LHS register.
6957 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6958 if (!Cmp1)
6959 return false;
6960
6961 // We need an G_ICMP on the RHS register.
6962 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6963 if (!Cmp2)
6964 return false;
6965
6966 // We want to fold the icmps.
6967 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6968 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6969 return false;
6970
6971 APInt C1;
6972 APInt C2;
6973 std::optional<ValueAndVReg> MaybeC1 =
6975 if (!MaybeC1)
6976 return false;
6977 C1 = MaybeC1->Value;
6978
6979 std::optional<ValueAndVReg> MaybeC2 =
6981 if (!MaybeC2)
6982 return false;
6983 C2 = MaybeC2->Value;
6984
6985 Register R1 = Cmp1->getLHSReg();
6986 Register R2 = Cmp2->getLHSReg();
6987 CmpInst::Predicate Pred1 = Cmp1->getCond();
6988 CmpInst::Predicate Pred2 = Cmp2->getCond();
6989 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6990 LLT CmpOperandTy = MRI.getType(R1);
6991
6992 if (CmpOperandTy.isPointer())
6993 return false;
6994
6995 // We build ands, adds, and constants of type CmpOperandTy.
6996 // They must be legal to build.
6997 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6998 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6999 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7000 return false;
7001
7002 // Look through add of a constant offset on R1, R2, or both operands. This
7003 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7004 std::optional<APInt> Offset1;
7005 std::optional<APInt> Offset2;
7006 if (R1 != R2) {
7007 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7008 std::optional<ValueAndVReg> MaybeOffset1 =
7010 if (MaybeOffset1) {
7011 R1 = Add->getLHSReg();
7012 Offset1 = MaybeOffset1->Value;
7013 }
7014 }
7015 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7016 std::optional<ValueAndVReg> MaybeOffset2 =
7018 if (MaybeOffset2) {
7019 R2 = Add->getLHSReg();
7020 Offset2 = MaybeOffset2->Value;
7021 }
7022 }
7023 }
7024
7025 if (R1 != R2)
7026 return false;
7027
7028 // We calculate the icmp ranges including maybe offsets.
7030 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7031 if (Offset1)
7032 CR1 = CR1.subtract(*Offset1);
7033
7035 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7036 if (Offset2)
7037 CR2 = CR2.subtract(*Offset2);
7038
7039 bool CreateMask = false;
7040 APInt LowerDiff;
7041 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7042 if (!CR) {
7043 // We need non-wrapping ranges.
7044 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7045 return false;
7046
7047 // Check whether we have equal-size ranges that only differ by one bit.
7048 // In that case we can apply a mask to map one range onto the other.
7049 LowerDiff = CR1.getLower() ^ CR2.getLower();
7050 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7051 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7052 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7053 CR1Size != CR2.getUpper() - CR2.getLower())
7054 return false;
7055
7056 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7057 CreateMask = true;
7058 }
7059
7060 if (IsAnd)
7061 CR = CR->inverse();
7062
7063 CmpInst::Predicate NewPred;
7064 APInt NewC, Offset;
7065 CR->getEquivalentICmp(NewPred, NewC, Offset);
7066
7067 // We take the result type of one of the original icmps, CmpTy, for
7068 // the to be build icmp. The operand type, CmpOperandTy, is used for
7069 // the other instructions and constants to be build. The types of
7070 // the parameters and output are the same for add and and. CmpTy
7071 // and the type of DstReg might differ. That is why we zext or trunc
7072 // the icmp into the destination register.
7073
7074 MatchInfo = [=](MachineIRBuilder &B) {
7075 if (CreateMask && Offset != 0) {
7076 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7077 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7078 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7079 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7080 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7081 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7082 B.buildZExtOrTrunc(DstReg, ICmp);
7083 } else if (CreateMask && Offset == 0) {
7084 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7085 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7086 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7087 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7088 B.buildZExtOrTrunc(DstReg, ICmp);
7089 } else if (!CreateMask && Offset != 0) {
7090 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7091 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7092 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7093 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7094 B.buildZExtOrTrunc(DstReg, ICmp);
7095 } else if (!CreateMask && Offset == 0) {
7096 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7097 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7098 B.buildZExtOrTrunc(DstReg, ICmp);
7099 } else {
7100 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7101 }
7102 };
7103 return true;
7104}
7105
7106bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7107 BuildFnTy &MatchInfo) {
7108 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7109 Register DestReg = Logic->getReg(0);
7110 Register LHS = Logic->getLHSReg();
7111 Register RHS = Logic->getRHSReg();
7112 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7113
7114 // We need a compare on the LHS register.
7115 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7116 if (!Cmp1)
7117 return false;
7118
7119 // We need a compare on the RHS register.
7120 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7121 if (!Cmp2)
7122 return false;
7123
7124 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7125 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7126
7127 // We build one fcmp, want to fold the fcmps, replace the logic op,
7128 // and the fcmps must have the same shape.
7130 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7131 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7132 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7133 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7134 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7135 return false;
7136
7137 CmpInst::Predicate PredL = Cmp1->getCond();
7138 CmpInst::Predicate PredR = Cmp2->getCond();
7139 Register LHS0 = Cmp1->getLHSReg();
7140 Register LHS1 = Cmp1->getRHSReg();
7141 Register RHS0 = Cmp2->getLHSReg();
7142 Register RHS1 = Cmp2->getRHSReg();
7143
7144 if (LHS0 == RHS1 && LHS1 == RHS0) {
7145 // Swap RHS operands to match LHS.
7146 PredR = CmpInst::getSwappedPredicate(PredR);
7147 std::swap(RHS0, RHS1);
7148 }
7149
7150 if (LHS0 == RHS0 && LHS1 == RHS1) {
7151 // We determine the new predicate.
7152 unsigned CmpCodeL = getFCmpCode(PredL);
7153 unsigned CmpCodeR = getFCmpCode(PredR);
7154 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7155 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7156 MatchInfo = [=](MachineIRBuilder &B) {
7157 // The fcmp predicates fill the lower part of the enum.
7158 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7159 if (Pred == FCmpInst::FCMP_FALSE &&
7161 auto False = B.buildConstant(CmpTy, 0);
7162 B.buildZExtOrTrunc(DestReg, False);
7163 } else if (Pred == FCmpInst::FCMP_TRUE &&
7165 auto True =
7166 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7167 CmpTy.isVector() /*isVector*/,
7168 true /*isFP*/));
7169 B.buildZExtOrTrunc(DestReg, True);
7170 } else { // We take the predicate without predicate optimizations.
7171 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7172 B.buildZExtOrTrunc(DestReg, Cmp);
7173 }
7174 };
7175 return true;
7176 }
7177
7178 return false;
7179}
7180
7182 GAnd *And = cast<GAnd>(&MI);
7183
7184 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7185 return true;
7186
7187 if (tryFoldLogicOfFCmps(And, MatchInfo))
7188 return true;
7189
7190 return false;
7191}
7192
7194 GOr *Or = cast<GOr>(&MI);
7195
7196 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7197 return true;
7198
7199 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7200 return true;
7201
7202 return false;
7203}
7204
7206 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7207
7208 // Addo has no flags
7209 Register Dst = Add->getReg(0);
7210 Register Carry = Add->getReg(1);
7211 Register LHS = Add->getLHSReg();
7212 Register RHS = Add->getRHSReg();
7213 bool IsSigned = Add->isSigned();
7214 LLT DstTy = MRI.getType(Dst);
7215 LLT CarryTy = MRI.getType(Carry);
7216
7217 // Fold addo, if the carry is dead -> add, undef.
7218 if (MRI.use_nodbg_empty(Carry) &&
7219 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7220 MatchInfo = [=](MachineIRBuilder &B) {
7221 B.buildAdd(Dst, LHS, RHS);
7222 B.buildUndef(Carry);
7223 };
7224 return true;
7225 }
7226
7227 // Canonicalize constant to RHS.
7228 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7229 if (IsSigned) {
7230 MatchInfo = [=](MachineIRBuilder &B) {
7231 B.buildSAddo(Dst, Carry, RHS, LHS);
7232 };
7233 return true;
7234 }
7235 // !IsSigned
7236 MatchInfo = [=](MachineIRBuilder &B) {
7237 B.buildUAddo(Dst, Carry, RHS, LHS);
7238 };
7239 return true;
7240 }
7241
7242 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7243 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7244
7245 // Fold addo(c1, c2) -> c3, carry.
7246 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7248 bool Overflow;
7249 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7250 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7251 MatchInfo = [=](MachineIRBuilder &B) {
7252 B.buildConstant(Dst, Result);
7253 B.buildConstant(Carry, Overflow);
7254 };
7255 return true;
7256 }
7257
7258 // Fold (addo x, 0) -> x, no carry
7259 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7260 MatchInfo = [=](MachineIRBuilder &B) {
7261 B.buildCopy(Dst, LHS);
7262 B.buildConstant(Carry, 0);
7263 };
7264 return true;
7265 }
7266
7267 // Given 2 constant operands whose sum does not overflow:
7268 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7269 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7270 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7271 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7272 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7273 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7274 std::optional<APInt> MaybeAddRHS =
7275 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7276 if (MaybeAddRHS) {
7277 bool Overflow;
7278 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7279 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7280 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7281 if (IsSigned) {
7282 MatchInfo = [=](MachineIRBuilder &B) {
7283 auto ConstRHS = B.buildConstant(DstTy, NewC);
7284 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7285 };
7286 return true;
7287 }
7288 // !IsSigned
7289 MatchInfo = [=](MachineIRBuilder &B) {
7290 auto ConstRHS = B.buildConstant(DstTy, NewC);
7291 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7292 };
7293 return true;
7294 }
7295 }
7296 };
7297
7298 // We try to combine addo to non-overflowing add.
7299 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7301 return false;
7302
7303 // We try to combine uaddo to non-overflowing add.
7304 if (!IsSigned) {
7305 ConstantRange CRLHS =
7306 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7307 ConstantRange CRRHS =
7308 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7309
7310 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7312 return false;
7314 MatchInfo = [=](MachineIRBuilder &B) {
7315 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7316 B.buildConstant(Carry, 0);
7317 };
7318 return true;
7319 }
7322 MatchInfo = [=](MachineIRBuilder &B) {
7323 B.buildAdd(Dst, LHS, RHS);
7324 B.buildConstant(Carry, 1);
7325 };
7326 return true;
7327 }
7328 }
7329 return false;
7330 }
7331
7332 // We try to combine saddo to non-overflowing add.
7333
7334 // If LHS and RHS each have at least two sign bits, then there is no signed
7335 // overflow.
7336 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7337 MatchInfo = [=](MachineIRBuilder &B) {
7338 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7339 B.buildConstant(Carry, 0);
7340 };
7341 return true;
7342 }
7343
7344 ConstantRange CRLHS =
7345 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7346 ConstantRange CRRHS =
7347 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7348
7349 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7351 return false;
7353 MatchInfo = [=](MachineIRBuilder &B) {
7354 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7355 B.buildConstant(Carry, 0);
7356 };
7357 return true;
7358 }
7361 MatchInfo = [=](MachineIRBuilder &B) {
7362 B.buildAdd(Dst, LHS, RHS);
7363 B.buildConstant(Carry, 1);
7364 };
7365 return true;
7366 }
7367 }
7368
7369 return false;
7370}
7371
7373 BuildFnTy &MatchInfo) {
7375 MatchInfo(Builder);
7376 Root->eraseFromParent();
7377}
7378
7380 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
7382}
7383
7385 auto [Dst, Base] = MI.getFirst2Regs();
7386 LLT Ty = MRI.getType(Dst);
7387 int64_t ExpVal = Exponent;
7388
7389 if (ExpVal == 0) {
7390 Builder.buildFConstant(Dst, 1.0);
7391 MI.removeFromParent();
7392 return;
7393 }
7394
7395 if (ExpVal < 0)
7396 ExpVal = -ExpVal;
7397
7398 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
7399 // to generate the multiply sequence. There are more optimal ways to do this
7400 // (for example, powi(x,15) generates one more multiply than it should), but
7401 // this has the benefit of being both really simple and much better than a
7402 // libcall.
7403 std::optional<SrcOp> Res;
7404 SrcOp CurSquare = Base;
7405 while (ExpVal > 0) {
7406 if (ExpVal & 1) {
7407 if (!Res)
7408 Res = CurSquare;
7409 else
7410 Res = Builder.buildFMul(Ty, *Res, CurSquare);
7411 }
7412
7413 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
7414 ExpVal >>= 1;
7415 }
7416
7417 // If the original exponent was negative, invert the result, producing
7418 // 1/(x*x*x).
7419 if (Exponent < 0)
7420 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
7421 MI.getFlags());
7422
7423 Builder.buildCopy(Dst, *Res);
7424 MI.eraseFromParent();
7425}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1362
bool isNaN() const
Definition: APFloat.h:1352
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1146
APInt bitcastToAPInt() const
Definition: APFloat.h:1260
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
int32_t exactLogBase2() const
Definition: APInt.h:1741
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:814
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1574
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:468
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:369
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:997
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:774
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:787
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:763
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:772
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:761
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:762
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:781
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:771
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:769
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:764
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:785
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:770
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:759
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent)
Match FPOWI if it's safe to extend it into a series of multiplications.
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent)
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo)
Combine select to integer min/max.
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValue() const
Definition: Constants.h:313
const APFloat & getValueAPF() const
Definition: Constants.h:312
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FDIV Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:733
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1433
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition: Utils.cpp:1974
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:452
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:295
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1393
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1546
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:727
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1516
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1528
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1561
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1593
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:658
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1496
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:201
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:273
bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1426
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:953
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:440
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1618
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:460
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:486
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1411
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:277
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:488
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:496
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...