LLVM 22.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
41#include <cmath>
42#include <optional>
43#include <tuple>
44
45#define DEBUG_TYPE "gi-combiner"
46
47using namespace llvm;
48using namespace MIPatternMatch;
49
50// Option to allow testing of the combiner while no targets know about indexed
51// addressing.
52static cl::opt<bool>
53 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
54 cl::desc("Force all indexed operations to be "
55 "legal for the GlobalISel combiner"));
56
61 const LegalizerInfo *LI)
62 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
64 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
65 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
66 (void)this->VT;
67}
68
70 return *Builder.getMF().getSubtarget().getTargetLowering();
71}
72
74 return Builder.getMF();
75}
76
80
81LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
82
83/// \returns The little endian in-memory byte position of byte \p I in a
84/// \p ByteWidth bytes wide type.
85///
86/// E.g. Given a 4-byte type x, x[0] -> byte 0
87static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
88 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
89 return I;
90}
91
92/// Determines the LogBase2 value for a non-null input value using the
93/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
95 auto &MRI = *MIB.getMRI();
96 LLT Ty = MRI.getType(V);
97 auto Ctlz = MIB.buildCTLZ(Ty, V);
98 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
99 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
100}
101
102/// \returns The big endian in-memory byte position of byte \p I in a
103/// \p ByteWidth bytes wide type.
104///
105/// E.g. Given a 4-byte type x, x[0] -> byte 3
106static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
107 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
108 return ByteWidth - I - 1;
109}
110
111/// Given a map from byte offsets in memory to indices in a load/store,
112/// determine if that map corresponds to a little or big endian byte pattern.
113///
114/// \param MemOffset2Idx maps memory offsets to address offsets.
115/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
116///
117/// \returns true if the map corresponds to a big endian byte pattern, false if
118/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
119///
120/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
121/// are as follows:
122///
123/// AddrOffset Little endian Big endian
124/// 0 0 3
125/// 1 1 2
126/// 2 2 1
127/// 3 3 0
128static std::optional<bool>
130 int64_t LowestIdx) {
131 // Need at least two byte positions to decide on endianness.
132 unsigned Width = MemOffset2Idx.size();
133 if (Width < 2)
134 return std::nullopt;
135 bool BigEndian = true, LittleEndian = true;
136 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
137 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
138 if (MemOffsetAndIdx == MemOffset2Idx.end())
139 return std::nullopt;
140 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
141 assert(Idx >= 0 && "Expected non-negative byte offset?");
142 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
143 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
144 if (!BigEndian && !LittleEndian)
145 return std::nullopt;
146 }
147
148 assert((BigEndian != LittleEndian) &&
149 "Pattern cannot be both big and little endian!");
150 return BigEndian;
151}
152
154
155bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
156 assert(LI && "Must have LegalizerInfo to query isLegal!");
157 return LI->getAction(Query).Action == LegalizeActions::Legal;
158}
159
161 const LegalityQuery &Query) const {
162 return isPreLegalize() || isLegal(Query);
163}
164
166 return isLegal(Query) ||
167 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
168}
169
171 if (!Ty.isVector())
172 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
173 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
174 if (isPreLegalize())
175 return true;
176 LLT EltTy = Ty.getElementType();
177 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
178 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
179}
180
182 Register ToReg) const {
183 Observer.changingAllUsesOfReg(MRI, FromReg);
184
185 if (MRI.constrainRegAttrs(ToReg, FromReg))
186 MRI.replaceRegWith(FromReg, ToReg);
187 else
188 Builder.buildCopy(FromReg, ToReg);
189
190 Observer.finishedChangingAllUsesOfReg();
191}
192
194 MachineOperand &FromRegOp,
195 Register ToReg) const {
196 assert(FromRegOp.getParent() && "Expected an operand in an MI");
197 Observer.changingInstr(*FromRegOp.getParent());
198
199 FromRegOp.setReg(ToReg);
200
201 Observer.changedInstr(*FromRegOp.getParent());
202}
203
205 unsigned ToOpcode) const {
206 Observer.changingInstr(FromMI);
207
208 FromMI.setDesc(Builder.getTII().get(ToOpcode));
209
210 Observer.changedInstr(FromMI);
211}
212
214 return RBI->getRegBank(Reg, MRI, *TRI);
215}
216
218 const RegisterBank *RegBank) const {
219 if (RegBank)
220 MRI.setRegBank(Reg, *RegBank);
221}
222
224 if (matchCombineCopy(MI)) {
226 return true;
227 }
228 return false;
229}
231 if (MI.getOpcode() != TargetOpcode::COPY)
232 return false;
233 Register DstReg = MI.getOperand(0).getReg();
234 Register SrcReg = MI.getOperand(1).getReg();
235 return canReplaceReg(DstReg, SrcReg, MRI);
236}
238 Register DstReg = MI.getOperand(0).getReg();
239 Register SrcReg = MI.getOperand(1).getReg();
240 replaceRegWith(MRI, DstReg, SrcReg);
241 MI.eraseFromParent();
242}
243
245 MachineInstr &MI, BuildFnTy &MatchInfo) const {
246 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
247 Register DstOp = MI.getOperand(0).getReg();
248 Register OrigOp = MI.getOperand(1).getReg();
249
250 if (!MRI.hasOneNonDBGUse(OrigOp))
251 return false;
252
253 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
254 // Even if only a single operand of the PHI is not guaranteed non-poison,
255 // moving freeze() backwards across a PHI can cause optimization issues for
256 // other users of that operand.
257 //
258 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
259 // the source register is unprofitable because it makes the freeze() more
260 // strict than is necessary (it would affect the whole register instead of
261 // just the subreg being frozen).
262 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
263 return false;
264
265 if (canCreateUndefOrPoison(OrigOp, MRI,
266 /*ConsiderFlagsAndMetadata=*/false))
267 return false;
268
269 std::optional<MachineOperand> MaybePoisonOperand;
270 for (MachineOperand &Operand : OrigDef->uses()) {
271 if (!Operand.isReg())
272 return false;
273
274 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
275 continue;
276
277 if (!MaybePoisonOperand)
278 MaybePoisonOperand = Operand;
279 else {
280 // We have more than one maybe-poison operand. Moving the freeze is
281 // unsafe.
282 return false;
283 }
284 }
285
286 // Eliminate freeze if all operands are guaranteed non-poison.
287 if (!MaybePoisonOperand) {
288 MatchInfo = [=](MachineIRBuilder &B) {
289 Observer.changingInstr(*OrigDef);
290 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
291 Observer.changedInstr(*OrigDef);
292 B.buildCopy(DstOp, OrigOp);
293 };
294 return true;
295 }
296
297 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
298 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
299
300 MatchInfo = [=](MachineIRBuilder &B) mutable {
301 Observer.changingInstr(*OrigDef);
302 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
303 Observer.changedInstr(*OrigDef);
304 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
305 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
307 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
308 Freeze.getReg(0));
309 replaceRegWith(MRI, DstOp, OrigOp);
310 };
311 return true;
312}
313
316 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
317 "Invalid instruction");
318 bool IsUndef = true;
319 MachineInstr *Undef = nullptr;
320
321 // Walk over all the operands of concat vectors and check if they are
322 // build_vector themselves or undef.
323 // Then collect their operands in Ops.
324 for (const MachineOperand &MO : MI.uses()) {
325 Register Reg = MO.getReg();
326 MachineInstr *Def = MRI.getVRegDef(Reg);
327 assert(Def && "Operand not defined");
328 if (!MRI.hasOneNonDBGUse(Reg))
329 return false;
330 switch (Def->getOpcode()) {
331 case TargetOpcode::G_BUILD_VECTOR:
332 IsUndef = false;
333 // Remember the operands of the build_vector to fold
334 // them into the yet-to-build flattened concat vectors.
335 for (const MachineOperand &BuildVecMO : Def->uses())
336 Ops.push_back(BuildVecMO.getReg());
337 break;
338 case TargetOpcode::G_IMPLICIT_DEF: {
339 LLT OpType = MRI.getType(Reg);
340 // Keep one undef value for all the undef operands.
341 if (!Undef) {
342 Builder.setInsertPt(*MI.getParent(), MI);
343 Undef = Builder.buildUndef(OpType.getScalarType());
344 }
345 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
346 OpType.getScalarType() &&
347 "All undefs should have the same type");
348 // Break the undef vector in as many scalar elements as needed
349 // for the flattening.
350 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
351 EltIdx != EltEnd; ++EltIdx)
352 Ops.push_back(Undef->getOperand(0).getReg());
353 break;
354 }
355 default:
356 return false;
357 }
358 }
359
360 // Check if the combine is illegal
361 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
363 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
364 return false;
365 }
366
367 if (IsUndef)
368 Ops.clear();
369
370 return true;
371}
374 // We determined that the concat_vectors can be flatten.
375 // Generate the flattened build_vector.
376 Register DstReg = MI.getOperand(0).getReg();
377 Builder.setInsertPt(*MI.getParent(), MI);
378 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
379
380 // Note: IsUndef is sort of redundant. We could have determine it by
381 // checking that at all Ops are undef. Alternatively, we could have
382 // generate a build_vector of undefs and rely on another combine to
383 // clean that up. For now, given we already gather this information
384 // in matchCombineConcatVectors, just save compile time and issue the
385 // right thing.
386 if (Ops.empty())
387 Builder.buildUndef(NewDstReg);
388 else
389 Builder.buildBuildVector(NewDstReg, Ops);
390 replaceRegWith(MRI, DstReg, NewDstReg);
391 MI.eraseFromParent();
392}
393
395 auto &Shuffle = cast<GShuffleVector>(MI);
396
397 Register SrcVec1 = Shuffle.getSrc1Reg();
398 Register SrcVec2 = Shuffle.getSrc2Reg();
399 LLT EltTy = MRI.getType(SrcVec1).getElementType();
400 int Width = MRI.getType(SrcVec1).getNumElements();
401
402 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
403 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
404
405 SmallVector<Register> Extracts;
406 // Select only applicable elements from unmerged values.
407 for (int Val : Shuffle.getMask()) {
408 if (Val == -1)
409 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
410 else if (Val < Width)
411 Extracts.push_back(Unmerge1.getReg(Val));
412 else
413 Extracts.push_back(Unmerge2.getReg(Val - Width));
414 }
415 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
416 if (Extracts.size() == 1)
417 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
418 else
419 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
420 MI.eraseFromParent();
421}
422
425 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
426 auto ConcatMI1 =
427 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
428 auto ConcatMI2 =
429 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
430 if (!ConcatMI1 || !ConcatMI2)
431 return false;
432
433 // Check that the sources of the Concat instructions have the same type
434 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
435 MRI.getType(ConcatMI2->getSourceReg(0)))
436 return false;
437
438 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
439 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
440 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
441 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
442 // Check if the index takes a whole source register from G_CONCAT_VECTORS
443 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
444 if (Mask[i] == -1) {
445 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
446 if (i + j >= Mask.size())
447 return false;
448 if (Mask[i + j] != -1)
449 return false;
450 }
452 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
453 return false;
454 Ops.push_back(0);
455 } else if (Mask[i] % ConcatSrcNumElt == 0) {
456 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
457 if (i + j >= Mask.size())
458 return false;
459 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
460 return false;
461 }
462 // Retrieve the source register from its respective G_CONCAT_VECTORS
463 // instruction
464 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
465 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
466 } else {
467 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
468 ConcatMI1->getNumSources()));
469 }
470 } else {
471 return false;
472 }
473 }
474
476 {TargetOpcode::G_CONCAT_VECTORS,
477 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
478 return false;
479
480 return !Ops.empty();
481}
482
485 LLT SrcTy;
486 for (Register &Reg : Ops) {
487 if (Reg != 0)
488 SrcTy = MRI.getType(Reg);
489 }
490 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
491
492 Register UndefReg = 0;
493
494 for (Register &Reg : Ops) {
495 if (Reg == 0) {
496 if (UndefReg == 0)
497 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
498 Reg = UndefReg;
499 }
500 }
501
502 if (Ops.size() > 1)
503 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
504 else
505 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
506 MI.eraseFromParent();
507}
508
513 return true;
514 }
515 return false;
516}
517
520 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
521 "Invalid instruction kind");
522 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
523 Register Src1 = MI.getOperand(1).getReg();
524 LLT SrcType = MRI.getType(Src1);
525
526 unsigned DstNumElts = DstType.getNumElements();
527 unsigned SrcNumElts = SrcType.getNumElements();
528
529 // If the resulting vector is smaller than the size of the source
530 // vectors being concatenated, we won't be able to replace the
531 // shuffle vector into a concat_vectors.
532 //
533 // Note: We may still be able to produce a concat_vectors fed by
534 // extract_vector_elt and so on. It is less clear that would
535 // be better though, so don't bother for now.
536 //
537 // If the destination is a scalar, the size of the sources doesn't
538 // matter. we will lower the shuffle to a plain copy. This will
539 // work only if the source and destination have the same size. But
540 // that's covered by the next condition.
541 //
542 // TODO: If the size between the source and destination don't match
543 // we could still emit an extract vector element in that case.
544 if (DstNumElts < 2 * SrcNumElts)
545 return false;
546
547 // Check that the shuffle mask can be broken evenly between the
548 // different sources.
549 if (DstNumElts % SrcNumElts != 0)
550 return false;
551
552 // Mask length is a multiple of the source vector length.
553 // Check if the shuffle is some kind of concatenation of the input
554 // vectors.
555 unsigned NumConcat = DstNumElts / SrcNumElts;
556 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
557 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
558 for (unsigned i = 0; i != DstNumElts; ++i) {
559 int Idx = Mask[i];
560 // Undef value.
561 if (Idx < 0)
562 continue;
563 // Ensure the indices in each SrcType sized piece are sequential and that
564 // the same source is used for the whole piece.
565 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
566 (ConcatSrcs[i / SrcNumElts] >= 0 &&
567 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
568 return false;
569 // Remember which source this index came from.
570 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
571 }
572
573 // The shuffle is concatenating multiple vectors together.
574 // Collect the different operands for that.
575 Register UndefReg;
576 Register Src2 = MI.getOperand(2).getReg();
577 for (auto Src : ConcatSrcs) {
578 if (Src < 0) {
579 if (!UndefReg) {
580 Builder.setInsertPt(*MI.getParent(), MI);
581 UndefReg = Builder.buildUndef(SrcType).getReg(0);
582 }
583 Ops.push_back(UndefReg);
584 } else if (Src == 0)
585 Ops.push_back(Src1);
586 else
587 Ops.push_back(Src2);
588 }
589 return true;
590}
591
593 MachineInstr &MI, const ArrayRef<Register> Ops) const {
594 Register DstReg = MI.getOperand(0).getReg();
595 Builder.setInsertPt(*MI.getParent(), MI);
596 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
597
598 if (Ops.size() == 1)
599 Builder.buildCopy(NewDstReg, Ops[0]);
600 else
601 Builder.buildMergeLikeInstr(NewDstReg, Ops);
602
603 replaceRegWith(MRI, DstReg, NewDstReg);
604 MI.eraseFromParent();
605}
606
607namespace {
608
609/// Select a preference between two uses. CurrentUse is the current preference
610/// while *ForCandidate is attributes of the candidate under consideration.
611PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
612 PreferredTuple &CurrentUse,
613 const LLT TyForCandidate,
614 unsigned OpcodeForCandidate,
615 MachineInstr *MIForCandidate) {
616 if (!CurrentUse.Ty.isValid()) {
617 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
618 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
619 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
620 return CurrentUse;
621 }
622
623 // We permit the extend to hoist through basic blocks but this is only
624 // sensible if the target has extending loads. If you end up lowering back
625 // into a load and extend during the legalizer then the end result is
626 // hoisting the extend up to the load.
627
628 // Prefer defined extensions to undefined extensions as these are more
629 // likely to reduce the number of instructions.
630 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
631 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
632 return CurrentUse;
633 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
634 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
635 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
636
637 // Prefer sign extensions to zero extensions as sign-extensions tend to be
638 // more expensive. Don't do this if the load is already a zero-extend load
639 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
640 // later.
641 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
642 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
643 OpcodeForCandidate == TargetOpcode::G_ZEXT)
644 return CurrentUse;
645 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
646 OpcodeForCandidate == TargetOpcode::G_SEXT)
647 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
648 }
649
650 // This is potentially target specific. We've chosen the largest type
651 // because G_TRUNC is usually free. One potential catch with this is that
652 // some targets have a reduced number of larger registers than smaller
653 // registers and this choice potentially increases the live-range for the
654 // larger value.
655 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
656 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
657 }
658 return CurrentUse;
659}
660
661/// Find a suitable place to insert some instructions and insert them. This
662/// function accounts for special cases like inserting before a PHI node.
663/// The current strategy for inserting before PHI's is to duplicate the
664/// instructions for each predecessor. However, while that's ok for G_TRUNC
665/// on most targets since it generally requires no code, other targets/cases may
666/// want to try harder to find a dominating block.
667static void InsertInsnsWithoutSideEffectsBeforeUse(
670 MachineOperand &UseMO)>
671 Inserter) {
672 MachineInstr &UseMI = *UseMO.getParent();
673
674 MachineBasicBlock *InsertBB = UseMI.getParent();
675
676 // If the use is a PHI then we want the predecessor block instead.
677 if (UseMI.isPHI()) {
678 MachineOperand *PredBB = std::next(&UseMO);
679 InsertBB = PredBB->getMBB();
680 }
681
682 // If the block is the same block as the def then we want to insert just after
683 // the def instead of at the start of the block.
684 if (InsertBB == DefMI.getParent()) {
686 Inserter(InsertBB, std::next(InsertPt), UseMO);
687 return;
688 }
689
690 // Otherwise we want the start of the BB
691 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
692}
693} // end anonymous namespace
694
696 PreferredTuple Preferred;
697 if (matchCombineExtendingLoads(MI, Preferred)) {
698 applyCombineExtendingLoads(MI, Preferred);
699 return true;
700 }
701 return false;
702}
703
704static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
705 unsigned CandidateLoadOpc;
706 switch (ExtOpc) {
707 case TargetOpcode::G_ANYEXT:
708 CandidateLoadOpc = TargetOpcode::G_LOAD;
709 break;
710 case TargetOpcode::G_SEXT:
711 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
712 break;
713 case TargetOpcode::G_ZEXT:
714 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
715 break;
716 default:
717 llvm_unreachable("Unexpected extend opc");
718 }
719 return CandidateLoadOpc;
720}
721
723 MachineInstr &MI, PreferredTuple &Preferred) const {
724 // We match the loads and follow the uses to the extend instead of matching
725 // the extends and following the def to the load. This is because the load
726 // must remain in the same position for correctness (unless we also add code
727 // to find a safe place to sink it) whereas the extend is freely movable.
728 // It also prevents us from duplicating the load for the volatile case or just
729 // for performance.
730 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
731 if (!LoadMI)
732 return false;
733
734 Register LoadReg = LoadMI->getDstReg();
735
736 LLT LoadValueTy = MRI.getType(LoadReg);
737 if (!LoadValueTy.isScalar())
738 return false;
739
740 // Most architectures are going to legalize <s8 loads into at least a 1 byte
741 // load, and the MMOs can only describe memory accesses in multiples of bytes.
742 // If we try to perform extload combining on those, we can end up with
743 // %a(s8) = extload %ptr (load 1 byte from %ptr)
744 // ... which is an illegal extload instruction.
745 if (LoadValueTy.getSizeInBits() < 8)
746 return false;
747
748 // For non power-of-2 types, they will very likely be legalized into multiple
749 // loads. Don't bother trying to match them into extending loads.
751 return false;
752
753 // Find the preferred type aside from the any-extends (unless it's the only
754 // one) and non-extending ops. We'll emit an extending load to that type and
755 // and emit a variant of (extend (trunc X)) for the others according to the
756 // relative type sizes. At the same time, pick an extend to use based on the
757 // extend involved in the chosen type.
758 unsigned PreferredOpcode =
759 isa<GLoad>(&MI)
760 ? TargetOpcode::G_ANYEXT
761 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
762 Preferred = {LLT(), PreferredOpcode, nullptr};
763 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
764 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
765 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
766 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
767 const auto &MMO = LoadMI->getMMO();
768 // Don't do anything for atomics.
769 if (MMO.isAtomic())
770 continue;
771 // Check for legality.
772 if (!isPreLegalize()) {
773 LegalityQuery::MemDesc MMDesc(MMO);
774 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
775 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
776 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
777 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
778 .Action != LegalizeActions::Legal)
779 continue;
780 }
781 Preferred = ChoosePreferredUse(MI, Preferred,
782 MRI.getType(UseMI.getOperand(0).getReg()),
783 UseMI.getOpcode(), &UseMI);
784 }
785 }
786
787 // There were no extends
788 if (!Preferred.MI)
789 return false;
790 // It should be impossible to chose an extend without selecting a different
791 // type since by definition the result of an extend is larger.
792 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
793
794 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
795 return true;
796}
797
799 MachineInstr &MI, PreferredTuple &Preferred) const {
800 // Rewrite the load to the chosen extending load.
801 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
802
803 // Inserter to insert a truncate back to the original type at a given point
804 // with some basic CSE to limit truncate duplication to one per BB.
806 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
807 MachineBasicBlock::iterator InsertBefore,
808 MachineOperand &UseMO) {
809 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
810 if (PreviouslyEmitted) {
811 Observer.changingInstr(*UseMO.getParent());
812 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
813 Observer.changedInstr(*UseMO.getParent());
814 return;
815 }
816
817 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
818 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
819 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
820 EmittedInsns[InsertIntoBB] = NewMI;
821 replaceRegOpWith(MRI, UseMO, NewDstReg);
822 };
823
824 Observer.changingInstr(MI);
825 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
826 MI.setDesc(Builder.getTII().get(LoadOpc));
827
828 // Rewrite all the uses to fix up the types.
829 auto &LoadValue = MI.getOperand(0);
831 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
832
833 for (auto *UseMO : Uses) {
834 MachineInstr *UseMI = UseMO->getParent();
835
836 // If the extend is compatible with the preferred extend then we should fix
837 // up the type and extend so that it uses the preferred use.
838 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
839 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
840 Register UseDstReg = UseMI->getOperand(0).getReg();
841 MachineOperand &UseSrcMO = UseMI->getOperand(1);
842 const LLT UseDstTy = MRI.getType(UseDstReg);
843 if (UseDstReg != ChosenDstReg) {
844 if (Preferred.Ty == UseDstTy) {
845 // If the use has the same type as the preferred use, then merge
846 // the vregs and erase the extend. For example:
847 // %1:_(s8) = G_LOAD ...
848 // %2:_(s32) = G_SEXT %1(s8)
849 // %3:_(s32) = G_ANYEXT %1(s8)
850 // ... = ... %3(s32)
851 // rewrites to:
852 // %2:_(s32) = G_SEXTLOAD ...
853 // ... = ... %2(s32)
854 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
855 Observer.erasingInstr(*UseMO->getParent());
856 UseMO->getParent()->eraseFromParent();
857 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
858 // If the preferred size is smaller, then keep the extend but extend
859 // from the result of the extending load. For example:
860 // %1:_(s8) = G_LOAD ...
861 // %2:_(s32) = G_SEXT %1(s8)
862 // %3:_(s64) = G_ANYEXT %1(s8)
863 // ... = ... %3(s64)
864 /// rewrites to:
865 // %2:_(s32) = G_SEXTLOAD ...
866 // %3:_(s64) = G_ANYEXT %2:_(s32)
867 // ... = ... %3(s64)
868 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
869 } else {
870 // If the preferred size is large, then insert a truncate. For
871 // example:
872 // %1:_(s8) = G_LOAD ...
873 // %2:_(s64) = G_SEXT %1(s8)
874 // %3:_(s32) = G_ZEXT %1(s8)
875 // ... = ... %3(s32)
876 /// rewrites to:
877 // %2:_(s64) = G_SEXTLOAD ...
878 // %4:_(s8) = G_TRUNC %2:_(s32)
879 // %3:_(s64) = G_ZEXT %2:_(s8)
880 // ... = ... %3(s64)
881 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
882 InsertTruncAt);
883 }
884 continue;
885 }
886 // The use is (one of) the uses of the preferred use we chose earlier.
887 // We're going to update the load to def this value later so just erase
888 // the old extend.
889 Observer.erasingInstr(*UseMO->getParent());
890 UseMO->getParent()->eraseFromParent();
891 continue;
892 }
893
894 // The use isn't an extend. Truncate back to the type we originally loaded.
895 // This is free on many targets.
896 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
897 }
898
899 MI.getOperand(0).setReg(ChosenDstReg);
900 Observer.changedInstr(MI);
901}
902
904 BuildFnTy &MatchInfo) const {
905 assert(MI.getOpcode() == TargetOpcode::G_AND);
906
907 // If we have the following code:
908 // %mask = G_CONSTANT 255
909 // %ld = G_LOAD %ptr, (load s16)
910 // %and = G_AND %ld, %mask
911 //
912 // Try to fold it into
913 // %ld = G_ZEXTLOAD %ptr, (load s8)
914
915 Register Dst = MI.getOperand(0).getReg();
916 if (MRI.getType(Dst).isVector())
917 return false;
918
919 auto MaybeMask =
920 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
921 if (!MaybeMask)
922 return false;
923
924 APInt MaskVal = MaybeMask->Value;
925
926 if (!MaskVal.isMask())
927 return false;
928
929 Register SrcReg = MI.getOperand(1).getReg();
930 // Don't use getOpcodeDef() here since intermediate instructions may have
931 // multiple users.
932 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
933 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
934 return false;
935
936 Register LoadReg = LoadMI->getDstReg();
937 LLT RegTy = MRI.getType(LoadReg);
938 Register PtrReg = LoadMI->getPointerReg();
939 unsigned RegSize = RegTy.getSizeInBits();
940 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
941 unsigned MaskSizeBits = MaskVal.countr_one();
942
943 // The mask may not be larger than the in-memory type, as it might cover sign
944 // extended bits
945 if (MaskSizeBits > LoadSizeBits.getValue())
946 return false;
947
948 // If the mask covers the whole destination register, there's nothing to
949 // extend
950 if (MaskSizeBits >= RegSize)
951 return false;
952
953 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
954 // at least byte loads. Avoid creating such loads here
955 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
956 return false;
957
958 const MachineMemOperand &MMO = LoadMI->getMMO();
959 LegalityQuery::MemDesc MemDesc(MMO);
960
961 // Don't modify the memory access size if this is atomic/volatile, but we can
962 // still adjust the opcode to indicate the high bit behavior.
963 if (LoadMI->isSimple())
964 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
965 else if (LoadSizeBits.getValue() > MaskSizeBits ||
966 LoadSizeBits.getValue() == RegSize)
967 return false;
968
969 // TODO: Could check if it's legal with the reduced or original memory size.
971 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
972 return false;
973
974 MatchInfo = [=](MachineIRBuilder &B) {
975 B.setInstrAndDebugLoc(*LoadMI);
976 auto &MF = B.getMF();
977 auto PtrInfo = MMO.getPointerInfo();
978 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
979 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
980 LoadMI->eraseFromParent();
981 };
982 return true;
983}
984
986 const MachineInstr &UseMI) const {
987 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
988 "shouldn't consider debug uses");
989 assert(DefMI.getParent() == UseMI.getParent());
990 if (&DefMI == &UseMI)
991 return true;
992 const MachineBasicBlock &MBB = *DefMI.getParent();
993 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
994 return &MI == &DefMI || &MI == &UseMI;
995 });
996 if (DefOrUse == MBB.end())
997 llvm_unreachable("Block must contain both DefMI and UseMI!");
998 return &*DefOrUse == &DefMI;
999}
1000
1002 const MachineInstr &UseMI) const {
1003 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1004 "shouldn't consider debug uses");
1005 if (MDT)
1006 return MDT->dominates(&DefMI, &UseMI);
1007 else if (DefMI.getParent() != UseMI.getParent())
1008 return false;
1009
1010 return isPredecessor(DefMI, UseMI);
1011}
1012
1014 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1015 Register SrcReg = MI.getOperand(1).getReg();
1016 Register LoadUser = SrcReg;
1017
1018 if (MRI.getType(SrcReg).isVector())
1019 return false;
1020
1021 Register TruncSrc;
1022 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1023 LoadUser = TruncSrc;
1024
1025 uint64_t SizeInBits = MI.getOperand(2).getImm();
1026 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1027 // need any extend at all, just a truncate.
1028 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1029 // If truncating more than the original extended value, abort.
1030 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1031 if (TruncSrc &&
1032 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1033 return false;
1034 if (LoadSizeBits == SizeInBits)
1035 return true;
1036 }
1037 return false;
1038}
1039
1041 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1042 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1043 MI.eraseFromParent();
1044}
1045
1047 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1048 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1049
1050 Register DstReg = MI.getOperand(0).getReg();
1051 LLT RegTy = MRI.getType(DstReg);
1052
1053 // Only supports scalars for now.
1054 if (RegTy.isVector())
1055 return false;
1056
1057 Register SrcReg = MI.getOperand(1).getReg();
1058 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1059 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1060 return false;
1061
1062 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1063
1064 // If the sign extend extends from a narrower width than the load's width,
1065 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1066 // Avoid widening the load at all.
1067 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1068
1069 // Don't generate G_SEXTLOADs with a < 1 byte width.
1070 if (NewSizeBits < 8)
1071 return false;
1072 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1073 // anyway for most targets.
1074 if (!isPowerOf2_32(NewSizeBits))
1075 return false;
1076
1077 const MachineMemOperand &MMO = LoadDef->getMMO();
1078 LegalityQuery::MemDesc MMDesc(MMO);
1079
1080 // Don't modify the memory access size if this is atomic/volatile, but we can
1081 // still adjust the opcode to indicate the high bit behavior.
1082 if (LoadDef->isSimple())
1083 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1084 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1085 return false;
1086
1087 // TODO: Could check if it's legal with the reduced or original memory size.
1088 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1089 {MRI.getType(LoadDef->getDstReg()),
1090 MRI.getType(LoadDef->getPointerReg())},
1091 {MMDesc}}))
1092 return false;
1093
1094 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1095 return true;
1096}
1097
1099 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1100 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1101 Register LoadReg;
1102 unsigned ScalarSizeBits;
1103 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1104 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1105
1106 // If we have the following:
1107 // %ld = G_LOAD %ptr, (load 2)
1108 // %ext = G_SEXT_INREG %ld, 8
1109 // ==>
1110 // %ld = G_SEXTLOAD %ptr (load 1)
1111
1112 auto &MMO = LoadDef->getMMO();
1113 Builder.setInstrAndDebugLoc(*LoadDef);
1114 auto &MF = Builder.getMF();
1115 auto PtrInfo = MMO.getPointerInfo();
1116 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1117 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1118 LoadDef->getPointerReg(), *NewMMO);
1119 MI.eraseFromParent();
1120
1121 // Not all loads can be deleted, so make sure the old one is removed.
1122 LoadDef->eraseFromParent();
1123}
1124
1125/// Return true if 'MI' is a load or a store that may be fold it's address
1126/// operand into the load / store addressing mode.
1130 auto *MF = MI->getMF();
1131 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1132 if (!Addr)
1133 return false;
1134
1135 AM.HasBaseReg = true;
1136 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1137 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1138 else
1139 AM.Scale = 1; // [reg +/- reg]
1140
1141 return TLI.isLegalAddressingMode(
1142 MF->getDataLayout(), AM,
1143 getTypeForLLT(MI->getMMO().getMemoryType(),
1144 MF->getFunction().getContext()),
1145 MI->getMMO().getAddrSpace());
1146}
1147
1148static unsigned getIndexedOpc(unsigned LdStOpc) {
1149 switch (LdStOpc) {
1150 case TargetOpcode::G_LOAD:
1151 return TargetOpcode::G_INDEXED_LOAD;
1152 case TargetOpcode::G_STORE:
1153 return TargetOpcode::G_INDEXED_STORE;
1154 case TargetOpcode::G_ZEXTLOAD:
1155 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1156 case TargetOpcode::G_SEXTLOAD:
1157 return TargetOpcode::G_INDEXED_SEXTLOAD;
1158 default:
1159 llvm_unreachable("Unexpected opcode");
1160 }
1161}
1162
1163bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1164 // Check for legality.
1165 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1166 LLT Ty = MRI.getType(LdSt.getReg(0));
1167 LLT MemTy = LdSt.getMMO().getMemoryType();
1169 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1171 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1172 SmallVector<LLT> OpTys;
1173 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1174 OpTys = {PtrTy, Ty, Ty};
1175 else
1176 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1177
1178 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1179 return isLegal(Q);
1180}
1181
1183 "post-index-use-threshold", cl::Hidden, cl::init(32),
1184 cl::desc("Number of uses of a base pointer to check before it is no longer "
1185 "considered for post-indexing."));
1186
1187bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1189 bool &RematOffset) const {
1190 // We're looking for the following pattern, for either load or store:
1191 // %baseptr:_(p0) = ...
1192 // G_STORE %val(s64), %baseptr(p0)
1193 // %offset:_(s64) = G_CONSTANT i64 -256
1194 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1195 const auto &TLI = getTargetLowering();
1196
1197 Register Ptr = LdSt.getPointerReg();
1198 // If the store is the only use, don't bother.
1199 if (MRI.hasOneNonDBGUse(Ptr))
1200 return false;
1201
1202 if (!isIndexedLoadStoreLegal(LdSt))
1203 return false;
1204
1205 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1206 return false;
1207
1208 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1209 auto *PtrDef = MRI.getVRegDef(Ptr);
1210
1211 unsigned NumUsesChecked = 0;
1212 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1213 if (++NumUsesChecked > PostIndexUseThreshold)
1214 return false; // Try to avoid exploding compile time.
1215
1216 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1217 // The use itself might be dead. This can happen during combines if DCE
1218 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1219 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1220 continue;
1221
1222 // Check the user of this isn't the store, otherwise we'd be generate a
1223 // indexed store defining its own use.
1224 if (StoredValDef == &Use)
1225 continue;
1226
1227 Offset = PtrAdd->getOffsetReg();
1228 if (!ForceLegalIndexing &&
1229 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1230 /*IsPre*/ false, MRI))
1231 continue;
1232
1233 // Make sure the offset calculation is before the potentially indexed op.
1234 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1235 RematOffset = false;
1236 if (!dominates(*OffsetDef, LdSt)) {
1237 // If the offset however is just a G_CONSTANT, we can always just
1238 // rematerialize it where we need it.
1239 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1240 continue;
1241 RematOffset = true;
1242 }
1243
1244 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1245 if (&BasePtrUse == PtrDef)
1246 continue;
1247
1248 // If the user is a later load/store that can be post-indexed, then don't
1249 // combine this one.
1250 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1251 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1252 dominates(LdSt, *BasePtrLdSt) &&
1253 isIndexedLoadStoreLegal(*BasePtrLdSt))
1254 return false;
1255
1256 // Now we're looking for the key G_PTR_ADD instruction, which contains
1257 // the offset add that we want to fold.
1258 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1259 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1260 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1261 // If the use is in a different block, then we may produce worse code
1262 // due to the extra register pressure.
1263 if (BaseUseUse.getParent() != LdSt.getParent())
1264 return false;
1265
1266 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1267 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1268 return false;
1269 }
1270 if (!dominates(LdSt, BasePtrUse))
1271 return false; // All use must be dominated by the load/store.
1272 }
1273 }
1274
1275 Addr = PtrAdd->getReg(0);
1276 Base = PtrAdd->getBaseReg();
1277 return true;
1278 }
1279
1280 return false;
1281}
1282
1283bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1284 Register &Base,
1285 Register &Offset) const {
1286 auto &MF = *LdSt.getParent()->getParent();
1287 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1288
1289 Addr = LdSt.getPointerReg();
1290 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1291 MRI.hasOneNonDBGUse(Addr))
1292 return false;
1293
1294 if (!ForceLegalIndexing &&
1295 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1296 return false;
1297
1298 if (!isIndexedLoadStoreLegal(LdSt))
1299 return false;
1300
1301 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1302 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1303 return false;
1304
1305 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1306 // Would require a copy.
1307 if (Base == St->getValueReg())
1308 return false;
1309
1310 // We're expecting one use of Addr in MI, but it could also be the
1311 // value stored, which isn't actually dominated by the instruction.
1312 if (St->getValueReg() == Addr)
1313 return false;
1314 }
1315
1316 // Avoid increasing cross-block register pressure.
1317 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1318 if (AddrUse.getParent() != LdSt.getParent())
1319 return false;
1320
1321 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1322 // That might allow us to end base's liveness here by adjusting the constant.
1323 bool RealUse = false;
1324 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1325 if (!dominates(LdSt, AddrUse))
1326 return false; // All use must be dominated by the load/store.
1327
1328 // If Ptr may be folded in addressing mode of other use, then it's
1329 // not profitable to do this transformation.
1330 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1331 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1332 RealUse = true;
1333 } else {
1334 RealUse = true;
1335 }
1336 }
1337 return RealUse;
1338}
1339
1341 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1342 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1343
1344 // Check if there is a load that defines the vector being extracted from.
1345 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1346 if (!LoadMI)
1347 return false;
1348
1349 Register Vector = MI.getOperand(1).getReg();
1350 LLT VecEltTy = MRI.getType(Vector).getElementType();
1351
1352 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1353
1354 // Checking whether we should reduce the load width.
1355 if (!MRI.hasOneNonDBGUse(Vector))
1356 return false;
1357
1358 // Check if the defining load is simple.
1359 if (!LoadMI->isSimple())
1360 return false;
1361
1362 // If the vector element type is not a multiple of a byte then we are unable
1363 // to correctly compute an address to load only the extracted element as a
1364 // scalar.
1365 if (!VecEltTy.isByteSized())
1366 return false;
1367
1368 // Check for load fold barriers between the extraction and the load.
1369 if (MI.getParent() != LoadMI->getParent())
1370 return false;
1371 const unsigned MaxIter = 20;
1372 unsigned Iter = 0;
1373 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1374 if (II->isLoadFoldBarrier())
1375 return false;
1376 if (Iter++ == MaxIter)
1377 return false;
1378 }
1379
1380 // Check if the new load that we are going to create is legal
1381 // if we are in the post-legalization phase.
1382 MachineMemOperand MMO = LoadMI->getMMO();
1383 Align Alignment = MMO.getAlign();
1384 MachinePointerInfo PtrInfo;
1386
1387 // Finding the appropriate PtrInfo if offset is a known constant.
1388 // This is required to create the memory operand for the narrowed load.
1389 // This machine memory operand object helps us infer about legality
1390 // before we proceed to combine the instruction.
1391 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1392 int Elt = CVal->getZExtValue();
1393 // FIXME: should be (ABI size)*Elt.
1394 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1395 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1396 } else {
1397 // Discard the pointer info except the address space because the memory
1398 // operand can't represent this new access since the offset is variable.
1399 Offset = VecEltTy.getSizeInBits() / 8;
1401 }
1402
1403 Alignment = commonAlignment(Alignment, Offset);
1404
1405 Register VecPtr = LoadMI->getPointerReg();
1406 LLT PtrTy = MRI.getType(VecPtr);
1407
1408 MachineFunction &MF = *MI.getMF();
1409 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1410
1411 LegalityQuery::MemDesc MMDesc(*NewMMO);
1412
1414 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1415 return false;
1416
1417 // Load must be allowed and fast on the target.
1419 auto &DL = MF.getDataLayout();
1420 unsigned Fast = 0;
1421 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1422 &Fast) ||
1423 !Fast)
1424 return false;
1425
1426 Register Result = MI.getOperand(0).getReg();
1427 Register Index = MI.getOperand(2).getReg();
1428
1429 MatchInfo = [=](MachineIRBuilder &B) {
1430 GISelObserverWrapper DummyObserver;
1431 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1432 //// Get pointer to the vector element.
1433 Register finalPtr = Helper.getVectorElementPointer(
1434 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1435 Index);
1436 // New G_LOAD instruction.
1437 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1438 // Remove original GLOAD instruction.
1439 LoadMI->eraseFromParent();
1440 };
1441
1442 return true;
1443}
1444
1446 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1447 auto &LdSt = cast<GLoadStore>(MI);
1448
1449 if (LdSt.isAtomic())
1450 return false;
1451
1452 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1453 MatchInfo.Offset);
1454 if (!MatchInfo.IsPre &&
1455 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1456 MatchInfo.Offset, MatchInfo.RematOffset))
1457 return false;
1458
1459 return true;
1460}
1461
1463 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1464 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1465 unsigned Opcode = MI.getOpcode();
1466 bool IsStore = Opcode == TargetOpcode::G_STORE;
1467 unsigned NewOpcode = getIndexedOpc(Opcode);
1468
1469 // If the offset constant didn't happen to dominate the load/store, we can
1470 // just clone it as needed.
1471 if (MatchInfo.RematOffset) {
1472 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1473 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1474 *OldCst->getOperand(1).getCImm());
1475 MatchInfo.Offset = NewCst.getReg(0);
1476 }
1477
1478 auto MIB = Builder.buildInstr(NewOpcode);
1479 if (IsStore) {
1480 MIB.addDef(MatchInfo.Addr);
1481 MIB.addUse(MI.getOperand(0).getReg());
1482 } else {
1483 MIB.addDef(MI.getOperand(0).getReg());
1484 MIB.addDef(MatchInfo.Addr);
1485 }
1486
1487 MIB.addUse(MatchInfo.Base);
1488 MIB.addUse(MatchInfo.Offset);
1489 MIB.addImm(MatchInfo.IsPre);
1490 MIB->cloneMemRefs(*MI.getMF(), MI);
1491 MI.eraseFromParent();
1492 AddrDef.eraseFromParent();
1493
1494 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1495}
1496
1498 MachineInstr *&OtherMI) const {
1499 unsigned Opcode = MI.getOpcode();
1500 bool IsDiv, IsSigned;
1501
1502 switch (Opcode) {
1503 default:
1504 llvm_unreachable("Unexpected opcode!");
1505 case TargetOpcode::G_SDIV:
1506 case TargetOpcode::G_UDIV: {
1507 IsDiv = true;
1508 IsSigned = Opcode == TargetOpcode::G_SDIV;
1509 break;
1510 }
1511 case TargetOpcode::G_SREM:
1512 case TargetOpcode::G_UREM: {
1513 IsDiv = false;
1514 IsSigned = Opcode == TargetOpcode::G_SREM;
1515 break;
1516 }
1517 }
1518
1519 Register Src1 = MI.getOperand(1).getReg();
1520 unsigned DivOpcode, RemOpcode, DivremOpcode;
1521 if (IsSigned) {
1522 DivOpcode = TargetOpcode::G_SDIV;
1523 RemOpcode = TargetOpcode::G_SREM;
1524 DivremOpcode = TargetOpcode::G_SDIVREM;
1525 } else {
1526 DivOpcode = TargetOpcode::G_UDIV;
1527 RemOpcode = TargetOpcode::G_UREM;
1528 DivremOpcode = TargetOpcode::G_UDIVREM;
1529 }
1530
1531 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1532 return false;
1533
1534 // Combine:
1535 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1536 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1537 // into:
1538 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1539
1540 // Combine:
1541 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1542 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1543 // into:
1544 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1545
1546 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1547 if (MI.getParent() == UseMI.getParent() &&
1548 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1549 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1550 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1551 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1552 OtherMI = &UseMI;
1553 return true;
1554 }
1555 }
1556
1557 return false;
1558}
1559
1561 MachineInstr *&OtherMI) const {
1562 unsigned Opcode = MI.getOpcode();
1563 assert(OtherMI && "OtherMI shouldn't be empty.");
1564
1565 Register DestDivReg, DestRemReg;
1566 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1567 DestDivReg = MI.getOperand(0).getReg();
1568 DestRemReg = OtherMI->getOperand(0).getReg();
1569 } else {
1570 DestDivReg = OtherMI->getOperand(0).getReg();
1571 DestRemReg = MI.getOperand(0).getReg();
1572 }
1573
1574 bool IsSigned =
1575 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1576
1577 // Check which instruction is first in the block so we don't break def-use
1578 // deps by "moving" the instruction incorrectly. Also keep track of which
1579 // instruction is first so we pick it's operands, avoiding use-before-def
1580 // bugs.
1581 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1582 Builder.setInstrAndDebugLoc(*FirstInst);
1583
1584 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1585 : TargetOpcode::G_UDIVREM,
1586 {DestDivReg, DestRemReg},
1587 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1588 MI.eraseFromParent();
1589 OtherMI->eraseFromParent();
1590}
1591
1593 MachineInstr &MI, MachineInstr *&BrCond) const {
1594 assert(MI.getOpcode() == TargetOpcode::G_BR);
1595
1596 // Try to match the following:
1597 // bb1:
1598 // G_BRCOND %c1, %bb2
1599 // G_BR %bb3
1600 // bb2:
1601 // ...
1602 // bb3:
1603
1604 // The above pattern does not have a fall through to the successor bb2, always
1605 // resulting in a branch no matter which path is taken. Here we try to find
1606 // and replace that pattern with conditional branch to bb3 and otherwise
1607 // fallthrough to bb2. This is generally better for branch predictors.
1608
1609 MachineBasicBlock *MBB = MI.getParent();
1611 if (BrIt == MBB->begin())
1612 return false;
1613 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1614
1615 BrCond = &*std::prev(BrIt);
1616 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1617 return false;
1618
1619 // Check that the next block is the conditional branch target. Also make sure
1620 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1621 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1622 return BrCondTarget != MI.getOperand(0).getMBB() &&
1623 MBB->isLayoutSuccessor(BrCondTarget);
1624}
1625
1627 MachineInstr &MI, MachineInstr *&BrCond) const {
1628 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1629 Builder.setInstrAndDebugLoc(*BrCond);
1630 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1631 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1632 // this to i1 only since we might not know for sure what kind of
1633 // compare generated the condition value.
1634 auto True = Builder.buildConstant(
1635 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1636 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1637
1638 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1639 Observer.changingInstr(MI);
1640 MI.getOperand(0).setMBB(FallthroughBB);
1641 Observer.changedInstr(MI);
1642
1643 // Change the conditional branch to use the inverted condition and
1644 // new target block.
1645 Observer.changingInstr(*BrCond);
1646 BrCond->getOperand(0).setReg(Xor.getReg(0));
1647 BrCond->getOperand(1).setMBB(BrTarget);
1648 Observer.changedInstr(*BrCond);
1649}
1650
1652 MachineIRBuilder HelperBuilder(MI);
1653 GISelObserverWrapper DummyObserver;
1654 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1655 return Helper.lowerMemcpyInline(MI) ==
1657}
1658
1660 unsigned MaxLen) const {
1661 MachineIRBuilder HelperBuilder(MI);
1662 GISelObserverWrapper DummyObserver;
1663 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1664 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1666}
1667
1669 const MachineRegisterInfo &MRI,
1670 const APFloat &Val) {
1671 APFloat Result(Val);
1672 switch (MI.getOpcode()) {
1673 default:
1674 llvm_unreachable("Unexpected opcode!");
1675 case TargetOpcode::G_FNEG: {
1676 Result.changeSign();
1677 return Result;
1678 }
1679 case TargetOpcode::G_FABS: {
1680 Result.clearSign();
1681 return Result;
1682 }
1683 case TargetOpcode::G_FPEXT:
1684 case TargetOpcode::G_FPTRUNC: {
1685 bool Unused;
1686 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1688 &Unused);
1689 return Result;
1690 }
1691 case TargetOpcode::G_FSQRT: {
1692 bool Unused;
1694 &Unused);
1695 Result = APFloat(sqrt(Result.convertToDouble()));
1696 break;
1697 }
1698 case TargetOpcode::G_FLOG2: {
1699 bool Unused;
1701 &Unused);
1702 Result = APFloat(log2(Result.convertToDouble()));
1703 break;
1704 }
1705 }
1706 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1707 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1708 // `G_FLOG2` reach here.
1709 bool Unused;
1710 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1711 return Result;
1712}
1713
1715 MachineInstr &MI, const ConstantFP *Cst) const {
1716 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1717 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1718 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1719 MI.eraseFromParent();
1720}
1721
1723 PtrAddChain &MatchInfo) const {
1724 // We're trying to match the following pattern:
1725 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1726 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1727 // -->
1728 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1729
1730 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1731 return false;
1732
1733 Register Add2 = MI.getOperand(1).getReg();
1734 Register Imm1 = MI.getOperand(2).getReg();
1735 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1736 if (!MaybeImmVal)
1737 return false;
1738
1739 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1740 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1741 return false;
1742
1743 Register Base = Add2Def->getOperand(1).getReg();
1744 Register Imm2 = Add2Def->getOperand(2).getReg();
1745 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1746 if (!MaybeImm2Val)
1747 return false;
1748
1749 // Check if the new combined immediate forms an illegal addressing mode.
1750 // Do not combine if it was legal before but would get illegal.
1751 // To do so, we need to find a load/store user of the pointer to get
1752 // the access type.
1753 Type *AccessTy = nullptr;
1754 auto &MF = *MI.getMF();
1755 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1756 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1757 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1758 MF.getFunction().getContext());
1759 break;
1760 }
1761 }
1763 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1764 AMNew.BaseOffs = CombinedImm.getSExtValue();
1765 if (AccessTy) {
1766 AMNew.HasBaseReg = true;
1768 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1769 AMOld.HasBaseReg = true;
1770 unsigned AS = MRI.getType(Add2).getAddressSpace();
1771 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1772 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1773 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1774 return false;
1775 }
1776
1777 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1778 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1779 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1780 // largest signed integer that fits into the index type, which is the maximum
1781 // size of allocated objects according to the IR Language Reference.
1782 unsigned PtrAddFlags = MI.getFlags();
1783 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1784 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1785 bool IsInBounds =
1786 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1787 unsigned Flags = 0;
1788 if (IsNoUWrap)
1790 if (IsInBounds) {
1793 }
1794
1795 // Pass the combined immediate to the apply function.
1796 MatchInfo.Imm = AMNew.BaseOffs;
1797 MatchInfo.Base = Base;
1798 MatchInfo.Bank = getRegBank(Imm2);
1799 MatchInfo.Flags = Flags;
1800 return true;
1801}
1802
1804 PtrAddChain &MatchInfo) const {
1805 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1806 MachineIRBuilder MIB(MI);
1807 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1808 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1809 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1810 Observer.changingInstr(MI);
1811 MI.getOperand(1).setReg(MatchInfo.Base);
1812 MI.getOperand(2).setReg(NewOffset.getReg(0));
1813 MI.setFlags(MatchInfo.Flags);
1814 Observer.changedInstr(MI);
1815}
1816
1818 RegisterImmPair &MatchInfo) const {
1819 // We're trying to match the following pattern with any of
1820 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1821 // %t1 = SHIFT %base, G_CONSTANT imm1
1822 // %root = SHIFT %t1, G_CONSTANT imm2
1823 // -->
1824 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1825
1826 unsigned Opcode = MI.getOpcode();
1827 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1828 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1829 Opcode == TargetOpcode::G_USHLSAT) &&
1830 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1831
1832 Register Shl2 = MI.getOperand(1).getReg();
1833 Register Imm1 = MI.getOperand(2).getReg();
1834 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1835 if (!MaybeImmVal)
1836 return false;
1837
1838 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1839 if (Shl2Def->getOpcode() != Opcode)
1840 return false;
1841
1842 Register Base = Shl2Def->getOperand(1).getReg();
1843 Register Imm2 = Shl2Def->getOperand(2).getReg();
1844 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1845 if (!MaybeImm2Val)
1846 return false;
1847
1848 // Pass the combined immediate to the apply function.
1849 MatchInfo.Imm =
1850 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1851 MatchInfo.Reg = Base;
1852
1853 // There is no simple replacement for a saturating unsigned left shift that
1854 // exceeds the scalar size.
1855 if (Opcode == TargetOpcode::G_USHLSAT &&
1856 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1857 return false;
1858
1859 return true;
1860}
1861
1863 RegisterImmPair &MatchInfo) const {
1864 unsigned Opcode = MI.getOpcode();
1865 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1866 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1867 Opcode == TargetOpcode::G_USHLSAT) &&
1868 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1869
1870 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1871 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1872 auto Imm = MatchInfo.Imm;
1873
1874 if (Imm >= ScalarSizeInBits) {
1875 // Any logical shift that exceeds scalar size will produce zero.
1876 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1877 Builder.buildConstant(MI.getOperand(0), 0);
1878 MI.eraseFromParent();
1879 return;
1880 }
1881 // Arithmetic shift and saturating signed left shift have no effect beyond
1882 // scalar size.
1883 Imm = ScalarSizeInBits - 1;
1884 }
1885
1886 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1887 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1888 Observer.changingInstr(MI);
1889 MI.getOperand(1).setReg(MatchInfo.Reg);
1890 MI.getOperand(2).setReg(NewImm);
1891 Observer.changedInstr(MI);
1892}
1893
1895 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1896 // We're trying to match the following pattern with any of
1897 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1898 // with any of G_AND/G_OR/G_XOR logic instructions.
1899 // %t1 = SHIFT %X, G_CONSTANT C0
1900 // %t2 = LOGIC %t1, %Y
1901 // %root = SHIFT %t2, G_CONSTANT C1
1902 // -->
1903 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1904 // %t4 = SHIFT %Y, G_CONSTANT C1
1905 // %root = LOGIC %t3, %t4
1906 unsigned ShiftOpcode = MI.getOpcode();
1907 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1908 ShiftOpcode == TargetOpcode::G_ASHR ||
1909 ShiftOpcode == TargetOpcode::G_LSHR ||
1910 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1911 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1912 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1913
1914 // Match a one-use bitwise logic op.
1915 Register LogicDest = MI.getOperand(1).getReg();
1916 if (!MRI.hasOneNonDBGUse(LogicDest))
1917 return false;
1918
1919 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1920 unsigned LogicOpcode = LogicMI->getOpcode();
1921 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1922 LogicOpcode != TargetOpcode::G_XOR)
1923 return false;
1924
1925 // Find a matching one-use shift by constant.
1926 const Register C1 = MI.getOperand(2).getReg();
1927 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1928 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1929 return false;
1930
1931 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1932
1933 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1934 // Shift should match previous one and should be a one-use.
1935 if (MI->getOpcode() != ShiftOpcode ||
1936 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1937 return false;
1938
1939 // Must be a constant.
1940 auto MaybeImmVal =
1941 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1942 if (!MaybeImmVal)
1943 return false;
1944
1945 ShiftVal = MaybeImmVal->Value.getSExtValue();
1946 return true;
1947 };
1948
1949 // Logic ops are commutative, so check each operand for a match.
1950 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1951 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1952 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1953 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1954 uint64_t C0Val;
1955
1956 if (matchFirstShift(LogicMIOp1, C0Val)) {
1957 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1958 MatchInfo.Shift2 = LogicMIOp1;
1959 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1960 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1961 MatchInfo.Shift2 = LogicMIOp2;
1962 } else
1963 return false;
1964
1965 MatchInfo.ValSum = C0Val + C1Val;
1966
1967 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1968 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1969 return false;
1970
1971 MatchInfo.Logic = LogicMI;
1972 return true;
1973}
1974
1976 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1977 unsigned Opcode = MI.getOpcode();
1978 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1979 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1980 Opcode == TargetOpcode::G_SSHLSAT) &&
1981 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1982
1983 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1984 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1985
1986 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1987
1988 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1989 Register Shift1 =
1990 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1991
1992 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1993 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1994 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1995 // remove old shift1. And it will cause crash later. So erase it earlier to
1996 // avoid the crash.
1997 MatchInfo.Shift2->eraseFromParent();
1998
1999 Register Shift2Const = MI.getOperand(2).getReg();
2000 Register Shift2 = Builder
2001 .buildInstr(Opcode, {DestType},
2002 {MatchInfo.LogicNonShiftReg, Shift2Const})
2003 .getReg(0);
2004
2005 Register Dest = MI.getOperand(0).getReg();
2006 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2007
2008 // This was one use so it's safe to remove it.
2009 MatchInfo.Logic->eraseFromParent();
2010
2011 MI.eraseFromParent();
2012}
2013
2015 BuildFnTy &MatchInfo) const {
2016 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2017 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2018 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2019 auto &Shl = cast<GenericMachineInstr>(MI);
2020 Register DstReg = Shl.getReg(0);
2021 Register SrcReg = Shl.getReg(1);
2022 Register ShiftReg = Shl.getReg(2);
2023 Register X, C1;
2024
2025 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2026 return false;
2027
2028 if (!mi_match(SrcReg, MRI,
2030 m_GOr(m_Reg(X), m_Reg(C1))))))
2031 return false;
2032
2033 APInt C1Val, C2Val;
2034 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2035 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2036 return false;
2037
2038 auto *SrcDef = MRI.getVRegDef(SrcReg);
2039 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2040 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2041 LLT SrcTy = MRI.getType(SrcReg);
2042 MatchInfo = [=](MachineIRBuilder &B) {
2043 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2044 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2045 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2046 };
2047 return true;
2048}
2049
2051 LshrOfTruncOfLshr &MatchInfo,
2052 MachineInstr &ShiftMI) const {
2053 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2054
2055 Register N0 = MI.getOperand(1).getReg();
2056 Register N1 = MI.getOperand(2).getReg();
2057 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2058
2059 APInt N1C, N001C;
2060 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2061 return false;
2062 auto N001 = ShiftMI.getOperand(2).getReg();
2063 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2064 return false;
2065
2066 if (N001C.getBitWidth() > N1C.getBitWidth())
2067 N1C = N1C.zext(N001C.getBitWidth());
2068 else
2069 N001C = N001C.zext(N1C.getBitWidth());
2070
2071 Register InnerShift = ShiftMI.getOperand(0).getReg();
2072 LLT InnerShiftTy = MRI.getType(InnerShift);
2073 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2074 if ((N1C + N001C).ult(InnerShiftSize)) {
2075 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2076 MatchInfo.ShiftAmt = N1C + N001C;
2077 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2078 MatchInfo.InnerShiftTy = InnerShiftTy;
2079
2080 if ((N001C + OpSizeInBits) == InnerShiftSize)
2081 return true;
2082 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2083 MatchInfo.Mask = true;
2084 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2085 return true;
2086 }
2087 }
2088 return false;
2089}
2090
2092 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2093 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2094
2095 Register Dst = MI.getOperand(0).getReg();
2096 auto ShiftAmt =
2097 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2098 auto Shift =
2099 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2100 if (MatchInfo.Mask == true) {
2101 APInt MaskVal =
2103 MatchInfo.MaskVal.getZExtValue());
2104 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2105 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2106 Builder.buildTrunc(Dst, And);
2107 } else
2108 Builder.buildTrunc(Dst, Shift);
2109 MI.eraseFromParent();
2110}
2111
2113 unsigned &ShiftVal) const {
2114 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2115 auto MaybeImmVal =
2116 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2117 if (!MaybeImmVal)
2118 return false;
2119
2120 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2121 return (static_cast<int32_t>(ShiftVal) != -1);
2122}
2123
2125 unsigned &ShiftVal) const {
2126 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2127 MachineIRBuilder MIB(MI);
2128 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2129 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2130 Observer.changingInstr(MI);
2131 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2132 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2133 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2135 Observer.changedInstr(MI);
2136}
2137
2139 BuildFnTy &MatchInfo) const {
2140 GSub &Sub = cast<GSub>(MI);
2141
2142 LLT Ty = MRI.getType(Sub.getReg(0));
2143
2144 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2145 return false;
2146
2148 return false;
2149
2150 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2151
2152 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2153 auto NegCst = B.buildConstant(Ty, -Imm);
2154 Observer.changingInstr(MI);
2155 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2156 MI.getOperand(2).setReg(NegCst.getReg(0));
2158 if (Imm.isMinSignedValue())
2160 Observer.changedInstr(MI);
2161 };
2162 return true;
2163}
2164
2165// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2167 RegisterImmPair &MatchData) const {
2168 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2169 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2170 return false;
2171
2172 Register LHS = MI.getOperand(1).getReg();
2173
2174 Register ExtSrc;
2175 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2176 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2177 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2178 return false;
2179
2180 Register RHS = MI.getOperand(2).getReg();
2181 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2182 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2183 if (!MaybeShiftAmtVal)
2184 return false;
2185
2186 if (LI) {
2187 LLT SrcTy = MRI.getType(ExtSrc);
2188
2189 // We only really care about the legality with the shifted value. We can
2190 // pick any type the constant shift amount, so ask the target what to
2191 // use. Otherwise we would have to guess and hope it is reported as legal.
2192 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2193 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2194 return false;
2195 }
2196
2197 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2198 MatchData.Reg = ExtSrc;
2199 MatchData.Imm = ShiftAmt;
2200
2201 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2202 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2203 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2204}
2205
2207 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2208 Register ExtSrcReg = MatchData.Reg;
2209 int64_t ShiftAmtVal = MatchData.Imm;
2210
2211 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2212 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2213 auto NarrowShift =
2214 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2215 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2216 MI.eraseFromParent();
2217}
2218
2220 Register &MatchInfo) const {
2222 SmallVector<Register, 16> MergedValues;
2223 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2224 MergedValues.emplace_back(Merge.getSourceReg(I));
2225
2226 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2227 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2228 return false;
2229
2230 for (unsigned I = 0; I < MergedValues.size(); ++I)
2231 if (MergedValues[I] != Unmerge->getReg(I))
2232 return false;
2233
2234 MatchInfo = Unmerge->getSourceReg();
2235 return true;
2236}
2237
2239 const MachineRegisterInfo &MRI) {
2240 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2241 ;
2242
2243 return Reg;
2244}
2245
2247 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2248 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2249 "Expected an unmerge");
2250 auto &Unmerge = cast<GUnmerge>(MI);
2251 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2252
2253 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2254 if (!SrcInstr)
2255 return false;
2256
2257 // Check the source type of the merge.
2258 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2259 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2260 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2261 if (SrcMergeTy != Dst0Ty && !SameSize)
2262 return false;
2263 // They are the same now (modulo a bitcast).
2264 // We can collect all the src registers.
2265 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2266 Operands.push_back(SrcInstr->getSourceReg(Idx));
2267 return true;
2268}
2269
2271 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2272 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2273 "Expected an unmerge");
2274 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2275 "Not enough operands to replace all defs");
2276 unsigned NumElems = MI.getNumOperands() - 1;
2277
2278 LLT SrcTy = MRI.getType(Operands[0]);
2279 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2280 bool CanReuseInputDirectly = DstTy == SrcTy;
2281 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2282 Register DstReg = MI.getOperand(Idx).getReg();
2283 Register SrcReg = Operands[Idx];
2284
2285 // This combine may run after RegBankSelect, so we need to be aware of
2286 // register banks.
2287 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2288 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2289 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2290 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2291 }
2292
2293 if (CanReuseInputDirectly)
2294 replaceRegWith(MRI, DstReg, SrcReg);
2295 else
2296 Builder.buildCast(DstReg, SrcReg);
2297 }
2298 MI.eraseFromParent();
2299}
2300
2302 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2303 unsigned SrcIdx = MI.getNumOperands() - 1;
2304 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2305 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2306 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2307 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2308 return false;
2309 // Break down the big constant in smaller ones.
2310 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2311 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2312 ? CstVal.getCImm()->getValue()
2313 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2314
2315 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2316 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2317 // Unmerge a constant.
2318 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2319 Csts.emplace_back(Val.trunc(ShiftAmt));
2320 Val = Val.lshr(ShiftAmt);
2321 }
2322
2323 return true;
2324}
2325
2327 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2328 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2329 "Expected an unmerge");
2330 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2331 "Not enough operands to replace all defs");
2332 unsigned NumElems = MI.getNumOperands() - 1;
2333 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2334 Register DstReg = MI.getOperand(Idx).getReg();
2335 Builder.buildConstant(DstReg, Csts[Idx]);
2336 }
2337
2338 MI.eraseFromParent();
2339}
2340
2343 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2344 unsigned SrcIdx = MI.getNumOperands() - 1;
2345 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2346 MatchInfo = [&MI](MachineIRBuilder &B) {
2347 unsigned NumElems = MI.getNumOperands() - 1;
2348 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2349 Register DstReg = MI.getOperand(Idx).getReg();
2350 B.buildUndef(DstReg);
2351 }
2352 };
2353 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2354}
2355
2357 MachineInstr &MI) const {
2358 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2359 "Expected an unmerge");
2360 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2361 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2362 return false;
2363 // Check that all the lanes are dead except the first one.
2364 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2365 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2366 return false;
2367 }
2368 return true;
2369}
2370
2372 MachineInstr &MI) const {
2373 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2374 Register Dst0Reg = MI.getOperand(0).getReg();
2375 Builder.buildTrunc(Dst0Reg, SrcReg);
2376 MI.eraseFromParent();
2377}
2378
2380 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2381 "Expected an unmerge");
2382 Register Dst0Reg = MI.getOperand(0).getReg();
2383 LLT Dst0Ty = MRI.getType(Dst0Reg);
2384 // G_ZEXT on vector applies to each lane, so it will
2385 // affect all destinations. Therefore we won't be able
2386 // to simplify the unmerge to just the first definition.
2387 if (Dst0Ty.isVector())
2388 return false;
2389 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2390 LLT SrcTy = MRI.getType(SrcReg);
2391 if (SrcTy.isVector())
2392 return false;
2393
2394 Register ZExtSrcReg;
2395 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2396 return false;
2397
2398 // Finally we can replace the first definition with
2399 // a zext of the source if the definition is big enough to hold
2400 // all of ZExtSrc bits.
2401 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2402 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2403}
2404
2406 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2407 "Expected an unmerge");
2408
2409 Register Dst0Reg = MI.getOperand(0).getReg();
2410
2411 MachineInstr *ZExtInstr =
2412 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2413 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2414 "Expecting a G_ZEXT");
2415
2416 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2417 LLT Dst0Ty = MRI.getType(Dst0Reg);
2418 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2419
2420 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2421 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2422 } else {
2423 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2424 "ZExt src doesn't fit in destination");
2425 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2426 }
2427
2428 Register ZeroReg;
2429 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2430 if (!ZeroReg)
2431 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2432 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2433 }
2434 MI.eraseFromParent();
2435}
2436
2438 unsigned TargetShiftSize,
2439 unsigned &ShiftVal) const {
2440 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2441 MI.getOpcode() == TargetOpcode::G_LSHR ||
2442 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2443
2444 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2445 if (Ty.isVector()) // TODO:
2446 return false;
2447
2448 // Don't narrow further than the requested size.
2449 unsigned Size = Ty.getSizeInBits();
2450 if (Size <= TargetShiftSize)
2451 return false;
2452
2453 auto MaybeImmVal =
2454 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2455 if (!MaybeImmVal)
2456 return false;
2457
2458 ShiftVal = MaybeImmVal->Value.getSExtValue();
2459 return ShiftVal >= Size / 2 && ShiftVal < Size;
2460}
2461
2463 MachineInstr &MI, const unsigned &ShiftVal) const {
2464 Register DstReg = MI.getOperand(0).getReg();
2465 Register SrcReg = MI.getOperand(1).getReg();
2466 LLT Ty = MRI.getType(SrcReg);
2467 unsigned Size = Ty.getSizeInBits();
2468 unsigned HalfSize = Size / 2;
2469 assert(ShiftVal >= HalfSize);
2470
2471 LLT HalfTy = LLT::scalar(HalfSize);
2472
2473 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2474 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2475
2476 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2477 Register Narrowed = Unmerge.getReg(1);
2478
2479 // dst = G_LSHR s64:x, C for C >= 32
2480 // =>
2481 // lo, hi = G_UNMERGE_VALUES x
2482 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2483
2484 if (NarrowShiftAmt != 0) {
2485 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2486 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2487 }
2488
2489 auto Zero = Builder.buildConstant(HalfTy, 0);
2490 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2491 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2492 Register Narrowed = Unmerge.getReg(0);
2493 // dst = G_SHL s64:x, C for C >= 32
2494 // =>
2495 // lo, hi = G_UNMERGE_VALUES x
2496 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2497 if (NarrowShiftAmt != 0) {
2498 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2499 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2500 }
2501
2502 auto Zero = Builder.buildConstant(HalfTy, 0);
2503 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2504 } else {
2505 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2506 auto Hi = Builder.buildAShr(
2507 HalfTy, Unmerge.getReg(1),
2508 Builder.buildConstant(HalfTy, HalfSize - 1));
2509
2510 if (ShiftVal == HalfSize) {
2511 // (G_ASHR i64:x, 32) ->
2512 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2513 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2514 } else if (ShiftVal == Size - 1) {
2515 // Don't need a second shift.
2516 // (G_ASHR i64:x, 63) ->
2517 // %narrowed = (G_ASHR hi_32(x), 31)
2518 // G_MERGE_VALUES %narrowed, %narrowed
2519 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2520 } else {
2521 auto Lo = Builder.buildAShr(
2522 HalfTy, Unmerge.getReg(1),
2523 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2524
2525 // (G_ASHR i64:x, C) ->, for C >= 32
2526 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2527 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2528 }
2529 }
2530
2531 MI.eraseFromParent();
2532}
2533
2535 MachineInstr &MI, unsigned TargetShiftAmount) const {
2536 unsigned ShiftAmt;
2537 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2538 applyCombineShiftToUnmerge(MI, ShiftAmt);
2539 return true;
2540 }
2541
2542 return false;
2543}
2544
2546 Register &Reg) const {
2547 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2548 Register DstReg = MI.getOperand(0).getReg();
2549 LLT DstTy = MRI.getType(DstReg);
2550 Register SrcReg = MI.getOperand(1).getReg();
2551 return mi_match(SrcReg, MRI,
2552 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2553}
2554
2556 Register &Reg) const {
2557 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2558 Register DstReg = MI.getOperand(0).getReg();
2559 Builder.buildCopy(DstReg, Reg);
2560 MI.eraseFromParent();
2561}
2562
2564 Register &Reg) const {
2565 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2566 Register DstReg = MI.getOperand(0).getReg();
2567 Builder.buildZExtOrTrunc(DstReg, Reg);
2568 MI.eraseFromParent();
2569}
2570
2572 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2573 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2574 Register LHS = MI.getOperand(1).getReg();
2575 Register RHS = MI.getOperand(2).getReg();
2576 LLT IntTy = MRI.getType(LHS);
2577
2578 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2579 // instruction.
2580 PtrReg.second = false;
2581 for (Register SrcReg : {LHS, RHS}) {
2582 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2583 // Don't handle cases where the integer is implicitly converted to the
2584 // pointer width.
2585 LLT PtrTy = MRI.getType(PtrReg.first);
2586 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2587 return true;
2588 }
2589
2590 PtrReg.second = true;
2591 }
2592
2593 return false;
2594}
2595
2597 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2598 Register Dst = MI.getOperand(0).getReg();
2599 Register LHS = MI.getOperand(1).getReg();
2600 Register RHS = MI.getOperand(2).getReg();
2601
2602 const bool DoCommute = PtrReg.second;
2603 if (DoCommute)
2604 std::swap(LHS, RHS);
2605 LHS = PtrReg.first;
2606
2607 LLT PtrTy = MRI.getType(LHS);
2608
2609 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2610 Builder.buildPtrToInt(Dst, PtrAdd);
2611 MI.eraseFromParent();
2612}
2613
2615 APInt &NewCst) const {
2616 auto &PtrAdd = cast<GPtrAdd>(MI);
2617 Register LHS = PtrAdd.getBaseReg();
2618 Register RHS = PtrAdd.getOffsetReg();
2619 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2620
2621 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2622 APInt Cst;
2623 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2624 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2625 // G_INTTOPTR uses zero-extension
2626 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2627 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2628 return true;
2629 }
2630 }
2631
2632 return false;
2633}
2634
2636 APInt &NewCst) const {
2637 auto &PtrAdd = cast<GPtrAdd>(MI);
2638 Register Dst = PtrAdd.getReg(0);
2639
2640 Builder.buildConstant(Dst, NewCst);
2641 PtrAdd.eraseFromParent();
2642}
2643
2645 Register &Reg) const {
2646 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2647 Register DstReg = MI.getOperand(0).getReg();
2648 Register SrcReg = MI.getOperand(1).getReg();
2649 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2650 if (OriginalSrcReg.isValid())
2651 SrcReg = OriginalSrcReg;
2652 LLT DstTy = MRI.getType(DstReg);
2653 return mi_match(SrcReg, MRI,
2654 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2655 canReplaceReg(DstReg, Reg, MRI);
2656}
2657
2659 Register &Reg) const {
2660 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2661 Register DstReg = MI.getOperand(0).getReg();
2662 Register SrcReg = MI.getOperand(1).getReg();
2663 LLT DstTy = MRI.getType(DstReg);
2664 if (mi_match(SrcReg, MRI,
2665 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2666 canReplaceReg(DstReg, Reg, MRI)) {
2667 unsigned DstSize = DstTy.getScalarSizeInBits();
2668 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2669 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2670 }
2671 return false;
2672}
2673
2675 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2676 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2677
2678 // ShiftTy > 32 > TruncTy -> 32
2679 if (ShiftSize > 32 && TruncSize < 32)
2680 return ShiftTy.changeElementSize(32);
2681
2682 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2683 // Some targets like it, some don't, some only like it under certain
2684 // conditions/processor versions, etc.
2685 // A TL hook might be needed for this.
2686
2687 // Don't combine
2688 return ShiftTy;
2689}
2690
2692 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2693 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2694 Register DstReg = MI.getOperand(0).getReg();
2695 Register SrcReg = MI.getOperand(1).getReg();
2696
2697 if (!MRI.hasOneNonDBGUse(SrcReg))
2698 return false;
2699
2700 LLT SrcTy = MRI.getType(SrcReg);
2701 LLT DstTy = MRI.getType(DstReg);
2702
2703 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2704 const auto &TL = getTargetLowering();
2705
2706 LLT NewShiftTy;
2707 switch (SrcMI->getOpcode()) {
2708 default:
2709 return false;
2710 case TargetOpcode::G_SHL: {
2711 NewShiftTy = DstTy;
2712
2713 // Make sure new shift amount is legal.
2714 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2715 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2716 return false;
2717 break;
2718 }
2719 case TargetOpcode::G_LSHR:
2720 case TargetOpcode::G_ASHR: {
2721 // For right shifts, we conservatively do not do the transform if the TRUNC
2722 // has any STORE users. The reason is that if we change the type of the
2723 // shift, we may break the truncstore combine.
2724 //
2725 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2726 for (auto &User : MRI.use_instructions(DstReg))
2727 if (User.getOpcode() == TargetOpcode::G_STORE)
2728 return false;
2729
2730 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2731 if (NewShiftTy == SrcTy)
2732 return false;
2733
2734 // Make sure we won't lose information by truncating the high bits.
2735 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2736 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2737 DstTy.getScalarSizeInBits()))
2738 return false;
2739 break;
2740 }
2741 }
2742
2744 {SrcMI->getOpcode(),
2745 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2746 return false;
2747
2748 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2749 return true;
2750}
2751
2753 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2754 MachineInstr *ShiftMI = MatchInfo.first;
2755 LLT NewShiftTy = MatchInfo.second;
2756
2757 Register Dst = MI.getOperand(0).getReg();
2758 LLT DstTy = MRI.getType(Dst);
2759
2760 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2761 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2762 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2763
2764 Register NewShift =
2765 Builder
2766 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2767 .getReg(0);
2768
2769 if (NewShiftTy == DstTy)
2770 replaceRegWith(MRI, Dst, NewShift);
2771 else
2772 Builder.buildTrunc(Dst, NewShift);
2773
2774 eraseInst(MI);
2775}
2776
2778 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2779 return MO.isReg() &&
2780 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2781 });
2782}
2783
2785 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2786 return !MO.isReg() ||
2787 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2788 });
2789}
2790
2792 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2793 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2794 return all_of(Mask, [](int Elt) { return Elt < 0; });
2795}
2796
2798 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2799 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2800 MRI);
2801}
2802
2804 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2805 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2806 MRI);
2807}
2808
2810 MachineInstr &MI) const {
2811 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2812 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2813 "Expected an insert/extract element op");
2814 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2815 if (VecTy.isScalableVector())
2816 return false;
2817
2818 unsigned IdxIdx =
2819 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2820 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2821 if (!Idx)
2822 return false;
2823 return Idx->getZExtValue() >= VecTy.getNumElements();
2824}
2825
2827 unsigned &OpIdx) const {
2828 GSelect &SelMI = cast<GSelect>(MI);
2829 auto Cst =
2830 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2831 if (!Cst)
2832 return false;
2833 OpIdx = Cst->isZero() ? 3 : 2;
2834 return true;
2835}
2836
2837void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2838
2840 const MachineOperand &MOP2) const {
2841 if (!MOP1.isReg() || !MOP2.isReg())
2842 return false;
2843 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2844 if (!InstAndDef1)
2845 return false;
2846 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2847 if (!InstAndDef2)
2848 return false;
2849 MachineInstr *I1 = InstAndDef1->MI;
2850 MachineInstr *I2 = InstAndDef2->MI;
2851
2852 // Handle a case like this:
2853 //
2854 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2855 //
2856 // Even though %0 and %1 are produced by the same instruction they are not
2857 // the same values.
2858 if (I1 == I2)
2859 return MOP1.getReg() == MOP2.getReg();
2860
2861 // If we have an instruction which loads or stores, we can't guarantee that
2862 // it is identical.
2863 //
2864 // For example, we may have
2865 //
2866 // %x1 = G_LOAD %addr (load N from @somewhere)
2867 // ...
2868 // call @foo
2869 // ...
2870 // %x2 = G_LOAD %addr (load N from @somewhere)
2871 // ...
2872 // %or = G_OR %x1, %x2
2873 //
2874 // It's possible that @foo will modify whatever lives at the address we're
2875 // loading from. To be safe, let's just assume that all loads and stores
2876 // are different (unless we have something which is guaranteed to not
2877 // change.)
2878 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2879 return false;
2880
2881 // If both instructions are loads or stores, they are equal only if both
2882 // are dereferenceable invariant loads with the same number of bits.
2883 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2886 if (!LS1 || !LS2)
2887 return false;
2888
2889 if (!I2->isDereferenceableInvariantLoad() ||
2890 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2891 return false;
2892 }
2893
2894 // Check for physical registers on the instructions first to avoid cases
2895 // like this:
2896 //
2897 // %a = COPY $physreg
2898 // ...
2899 // SOMETHING implicit-def $physreg
2900 // ...
2901 // %b = COPY $physreg
2902 //
2903 // These copies are not equivalent.
2904 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2905 return MO.isReg() && MO.getReg().isPhysical();
2906 })) {
2907 // Check if we have a case like this:
2908 //
2909 // %a = COPY $physreg
2910 // %b = COPY %a
2911 //
2912 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2913 // From that, we know that they must have the same value, since they must
2914 // have come from the same COPY.
2915 return I1->isIdenticalTo(*I2);
2916 }
2917
2918 // We don't have any physical registers, so we don't necessarily need the
2919 // same vreg defs.
2920 //
2921 // On the off-chance that there's some target instruction feeding into the
2922 // instruction, let's use produceSameValue instead of isIdenticalTo.
2923 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2924 // Handle instructions with multiple defs that produce same values. Values
2925 // are same for operands with same index.
2926 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2927 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2928 // I1 and I2 are different instructions but produce same values,
2929 // %1 and %6 are same, %1 and %7 are not the same value.
2930 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2931 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2932 }
2933 return false;
2934}
2935
2937 int64_t C) const {
2938 if (!MOP.isReg())
2939 return false;
2940 auto *MI = MRI.getVRegDef(MOP.getReg());
2941 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2942 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2943 MaybeCst->getSExtValue() == C;
2944}
2945
2947 double C) const {
2948 if (!MOP.isReg())
2949 return false;
2950 std::optional<FPValueAndVReg> MaybeCst;
2951 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2952 return false;
2953
2954 return MaybeCst->Value.isExactlyValue(C);
2955}
2956
2958 unsigned OpIdx) const {
2959 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2960 Register OldReg = MI.getOperand(0).getReg();
2961 Register Replacement = MI.getOperand(OpIdx).getReg();
2962 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2963 replaceRegWith(MRI, OldReg, Replacement);
2964 MI.eraseFromParent();
2965}
2966
2968 Register Replacement) const {
2969 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2970 Register OldReg = MI.getOperand(0).getReg();
2971 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2972 replaceRegWith(MRI, OldReg, Replacement);
2973 MI.eraseFromParent();
2974}
2975
2977 unsigned ConstIdx) const {
2978 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2979 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2980
2981 // Get the shift amount
2982 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2983 if (!VRegAndVal)
2984 return false;
2985
2986 // Return true of shift amount >= Bitwidth
2987 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2988}
2989
2991 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2992 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2993 "This is not a funnel shift operation");
2994
2995 Register ConstReg = MI.getOperand(3).getReg();
2996 LLT ConstTy = MRI.getType(ConstReg);
2997 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2998
2999 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3000 assert((VRegAndVal) && "Value is not a constant");
3001
3002 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3003 APInt NewConst = VRegAndVal->Value.urem(
3004 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3005
3006 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3007 Builder.buildInstr(
3008 MI.getOpcode(), {MI.getOperand(0)},
3009 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3010
3011 MI.eraseFromParent();
3012}
3013
3015 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3016 // Match (cond ? x : x)
3017 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3018 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3019 MRI);
3020}
3021
3023 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3024 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3025 MRI);
3026}
3027
3029 unsigned OpIdx) const {
3030 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
3031 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
3032 MRI);
3033}
3034
3036 unsigned OpIdx) const {
3037 MachineOperand &MO = MI.getOperand(OpIdx);
3038 return MO.isReg() &&
3039 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3040}
3041
3043 unsigned OpIdx) const {
3044 MachineOperand &MO = MI.getOperand(OpIdx);
3045 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3046}
3047
3049 double C) const {
3050 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3051 Builder.buildFConstant(MI.getOperand(0), C);
3052 MI.eraseFromParent();
3053}
3054
3056 int64_t C) const {
3057 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3058 Builder.buildConstant(MI.getOperand(0), C);
3059 MI.eraseFromParent();
3060}
3061
3063 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3064 Builder.buildConstant(MI.getOperand(0), C);
3065 MI.eraseFromParent();
3066}
3067
3069 ConstantFP *CFP) const {
3070 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3071 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3072 MI.eraseFromParent();
3073}
3074
3076 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3077 Builder.buildUndef(MI.getOperand(0));
3078 MI.eraseFromParent();
3079}
3080
3082 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3083 Register LHS = MI.getOperand(1).getReg();
3084 Register RHS = MI.getOperand(2).getReg();
3085 Register &NewLHS = std::get<0>(MatchInfo);
3086 Register &NewRHS = std::get<1>(MatchInfo);
3087
3088 // Helper lambda to check for opportunities for
3089 // ((0-A) + B) -> B - A
3090 // (A + (0-B)) -> A - B
3091 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3092 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3093 return false;
3094 NewLHS = MaybeNewLHS;
3095 return true;
3096 };
3097
3098 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3099}
3100
3102 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3103 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3104 "Invalid opcode");
3105 Register DstReg = MI.getOperand(0).getReg();
3106 LLT DstTy = MRI.getType(DstReg);
3107 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3108
3109 if (DstTy.isScalableVector())
3110 return false;
3111
3112 unsigned NumElts = DstTy.getNumElements();
3113 // If this MI is part of a sequence of insert_vec_elts, then
3114 // don't do the combine in the middle of the sequence.
3115 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3116 TargetOpcode::G_INSERT_VECTOR_ELT)
3117 return false;
3118 MachineInstr *CurrInst = &MI;
3119 MachineInstr *TmpInst;
3120 int64_t IntImm;
3121 Register TmpReg;
3122 MatchInfo.resize(NumElts);
3123 while (mi_match(
3124 CurrInst->getOperand(0).getReg(), MRI,
3125 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3126 if (IntImm >= NumElts || IntImm < 0)
3127 return false;
3128 if (!MatchInfo[IntImm])
3129 MatchInfo[IntImm] = TmpReg;
3130 CurrInst = TmpInst;
3131 }
3132 // Variable index.
3133 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3134 return false;
3135 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3136 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3137 if (!MatchInfo[I - 1].isValid())
3138 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3139 }
3140 return true;
3141 }
3142 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3143 // overwritten, bail out.
3144 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3145 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3146}
3147
3149 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3150 Register UndefReg;
3151 auto GetUndef = [&]() {
3152 if (UndefReg)
3153 return UndefReg;
3154 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3155 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3156 return UndefReg;
3157 };
3158 for (Register &Reg : MatchInfo) {
3159 if (!Reg)
3160 Reg = GetUndef();
3161 }
3162 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3163 MI.eraseFromParent();
3164}
3165
3167 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3168 Register SubLHS, SubRHS;
3169 std::tie(SubLHS, SubRHS) = MatchInfo;
3170 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3171 MI.eraseFromParent();
3172}
3173
3175 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3176 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3177 //
3178 // Creates the new hand + logic instruction (but does not insert them.)
3179 //
3180 // On success, MatchInfo is populated with the new instructions. These are
3181 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3182 unsigned LogicOpcode = MI.getOpcode();
3183 assert(LogicOpcode == TargetOpcode::G_AND ||
3184 LogicOpcode == TargetOpcode::G_OR ||
3185 LogicOpcode == TargetOpcode::G_XOR);
3186 MachineIRBuilder MIB(MI);
3187 Register Dst = MI.getOperand(0).getReg();
3188 Register LHSReg = MI.getOperand(1).getReg();
3189 Register RHSReg = MI.getOperand(2).getReg();
3190
3191 // Don't recompute anything.
3192 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3193 return false;
3194
3195 // Make sure we have (hand x, ...), (hand y, ...)
3196 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3197 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3198 if (!LeftHandInst || !RightHandInst)
3199 return false;
3200 unsigned HandOpcode = LeftHandInst->getOpcode();
3201 if (HandOpcode != RightHandInst->getOpcode())
3202 return false;
3203 if (LeftHandInst->getNumOperands() < 2 ||
3204 !LeftHandInst->getOperand(1).isReg() ||
3205 RightHandInst->getNumOperands() < 2 ||
3206 !RightHandInst->getOperand(1).isReg())
3207 return false;
3208
3209 // Make sure the types match up, and if we're doing this post-legalization,
3210 // we end up with legal types.
3211 Register X = LeftHandInst->getOperand(1).getReg();
3212 Register Y = RightHandInst->getOperand(1).getReg();
3213 LLT XTy = MRI.getType(X);
3214 LLT YTy = MRI.getType(Y);
3215 if (!XTy.isValid() || XTy != YTy)
3216 return false;
3217
3218 // Optional extra source register.
3219 Register ExtraHandOpSrcReg;
3220 switch (HandOpcode) {
3221 default:
3222 return false;
3223 case TargetOpcode::G_ANYEXT:
3224 case TargetOpcode::G_SEXT:
3225 case TargetOpcode::G_ZEXT: {
3226 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3227 break;
3228 }
3229 case TargetOpcode::G_TRUNC: {
3230 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3231 const MachineFunction *MF = MI.getMF();
3232 LLVMContext &Ctx = MF->getFunction().getContext();
3233
3234 LLT DstTy = MRI.getType(Dst);
3235 const TargetLowering &TLI = getTargetLowering();
3236
3237 // Be extra careful sinking truncate. If it's free, there's no benefit in
3238 // widening a binop.
3239 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3240 return false;
3241 break;
3242 }
3243 case TargetOpcode::G_AND:
3244 case TargetOpcode::G_ASHR:
3245 case TargetOpcode::G_LSHR:
3246 case TargetOpcode::G_SHL: {
3247 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3248 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3249 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3250 return false;
3251 ExtraHandOpSrcReg = ZOp.getReg();
3252 break;
3253 }
3254 }
3255
3256 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3257 return false;
3258
3259 // Record the steps to build the new instructions.
3260 //
3261 // Steps to build (logic x, y)
3262 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3263 OperandBuildSteps LogicBuildSteps = {
3264 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3265 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3266 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3267 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3268
3269 // Steps to build hand (logic x, y), ...z
3270 OperandBuildSteps HandBuildSteps = {
3271 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3272 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3273 if (ExtraHandOpSrcReg.isValid())
3274 HandBuildSteps.push_back(
3275 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3276 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3277
3278 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3279 return true;
3280}
3281
3283 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3284 assert(MatchInfo.InstrsToBuild.size() &&
3285 "Expected at least one instr to build?");
3286 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3287 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3288 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3289 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3290 for (auto &OperandFn : InstrToBuild.OperandFns)
3291 OperandFn(Instr);
3292 }
3293 MI.eraseFromParent();
3294}
3295
3297 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3298 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3299 int64_t ShlCst, AshrCst;
3300 Register Src;
3301 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3302 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3303 m_ICstOrSplat(AshrCst))))
3304 return false;
3305 if (ShlCst != AshrCst)
3306 return false;
3308 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3309 return false;
3310 MatchInfo = std::make_tuple(Src, ShlCst);
3311 return true;
3312}
3313
3315 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3316 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3317 Register Src;
3318 int64_t ShiftAmt;
3319 std::tie(Src, ShiftAmt) = MatchInfo;
3320 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3321 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3322 MI.eraseFromParent();
3323}
3324
3325/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3328 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3329 assert(MI.getOpcode() == TargetOpcode::G_AND);
3330
3331 Register Dst = MI.getOperand(0).getReg();
3332 LLT Ty = MRI.getType(Dst);
3333
3334 Register R;
3335 int64_t C1;
3336 int64_t C2;
3337 if (!mi_match(
3338 Dst, MRI,
3339 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3340 return false;
3341
3342 MatchInfo = [=](MachineIRBuilder &B) {
3343 if (C1 & C2) {
3344 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3345 return;
3346 }
3347 auto Zero = B.buildConstant(Ty, 0);
3348 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3349 };
3350 return true;
3351}
3352
3354 Register &Replacement) const {
3355 // Given
3356 //
3357 // %y:_(sN) = G_SOMETHING
3358 // %x:_(sN) = G_SOMETHING
3359 // %res:_(sN) = G_AND %x, %y
3360 //
3361 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3362 //
3363 // Patterns like this can appear as a result of legalization. E.g.
3364 //
3365 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3366 // %one:_(s32) = G_CONSTANT i32 1
3367 // %and:_(s32) = G_AND %cmp, %one
3368 //
3369 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3370 assert(MI.getOpcode() == TargetOpcode::G_AND);
3371 if (!VT)
3372 return false;
3373
3374 Register AndDst = MI.getOperand(0).getReg();
3375 Register LHS = MI.getOperand(1).getReg();
3376 Register RHS = MI.getOperand(2).getReg();
3377
3378 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3379 // we can't do anything. If we do, then it depends on whether we have
3380 // KnownBits on the LHS.
3381 KnownBits RHSBits = VT->getKnownBits(RHS);
3382 if (RHSBits.isUnknown())
3383 return false;
3384
3385 KnownBits LHSBits = VT->getKnownBits(LHS);
3386
3387 // Check that x & Mask == x.
3388 // x & 1 == x, always
3389 // x & 0 == x, only if x is also 0
3390 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3391 //
3392 // Check if we can replace AndDst with the LHS of the G_AND
3393 if (canReplaceReg(AndDst, LHS, MRI) &&
3394 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3395 Replacement = LHS;
3396 return true;
3397 }
3398
3399 // Check if we can replace AndDst with the RHS of the G_AND
3400 if (canReplaceReg(AndDst, RHS, MRI) &&
3401 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3402 Replacement = RHS;
3403 return true;
3404 }
3405
3406 return false;
3407}
3408
3410 Register &Replacement) const {
3411 // Given
3412 //
3413 // %y:_(sN) = G_SOMETHING
3414 // %x:_(sN) = G_SOMETHING
3415 // %res:_(sN) = G_OR %x, %y
3416 //
3417 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3418 assert(MI.getOpcode() == TargetOpcode::G_OR);
3419 if (!VT)
3420 return false;
3421
3422 Register OrDst = MI.getOperand(0).getReg();
3423 Register LHS = MI.getOperand(1).getReg();
3424 Register RHS = MI.getOperand(2).getReg();
3425
3426 KnownBits LHSBits = VT->getKnownBits(LHS);
3427 KnownBits RHSBits = VT->getKnownBits(RHS);
3428
3429 // Check that x | Mask == x.
3430 // x | 0 == x, always
3431 // x | 1 == x, only if x is also 1
3432 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3433 //
3434 // Check if we can replace OrDst with the LHS of the G_OR
3435 if (canReplaceReg(OrDst, LHS, MRI) &&
3436 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3437 Replacement = LHS;
3438 return true;
3439 }
3440
3441 // Check if we can replace OrDst with the RHS of the G_OR
3442 if (canReplaceReg(OrDst, RHS, MRI) &&
3443 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3444 Replacement = RHS;
3445 return true;
3446 }
3447
3448 return false;
3449}
3450
3452 // If the input is already sign extended, just drop the extension.
3453 Register Src = MI.getOperand(1).getReg();
3454 unsigned ExtBits = MI.getOperand(2).getImm();
3455 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3456 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3457}
3458
3459static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3460 int64_t Cst, bool IsVector, bool IsFP) {
3461 // For i1, Cst will always be -1 regardless of boolean contents.
3462 return (ScalarSizeBits == 1 && Cst == -1) ||
3463 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3464}
3465
3466// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3467// using vector truncates instead
3468//
3469// EXAMPLE:
3470// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3471// %T_a(i16) = G_TRUNC %a(i32)
3472// %T_b(i16) = G_TRUNC %b(i32)
3473// %Undef(i16) = G_IMPLICIT_DEF(i16)
3474// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3475//
3476// ===>
3477// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3478// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3479// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3480//
3481// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3483 Register &MatchInfo) const {
3484 auto BuildMI = cast<GBuildVector>(&MI);
3485 unsigned NumOperands = BuildMI->getNumSources();
3486 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3487
3488 // Check the G_BUILD_VECTOR sources
3489 unsigned I;
3490 MachineInstr *UnmergeMI = nullptr;
3491
3492 // Check all source TRUNCs come from the same UNMERGE instruction
3493 for (I = 0; I < NumOperands; ++I) {
3494 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3495 auto SrcMIOpc = SrcMI->getOpcode();
3496
3497 // Check if the G_TRUNC instructions all come from the same MI
3498 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3499 if (!UnmergeMI) {
3500 UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3501 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3502 return false;
3503 } else {
3504 auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3505 if (UnmergeMI != UnmergeSrcMI)
3506 return false;
3507 }
3508 } else {
3509 break;
3510 }
3511 }
3512 if (I < 2)
3513 return false;
3514
3515 // Check the remaining source elements are only G_IMPLICIT_DEF
3516 for (; I < NumOperands; ++I) {
3517 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3518 auto SrcMIOpc = SrcMI->getOpcode();
3519
3520 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3521 return false;
3522 }
3523
3524 // Check the size of unmerge source
3525 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3526 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3527 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3528 return false;
3529
3530 // Check the unmerge source and destination element types match
3531 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3532 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3533 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3534 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3535 return false;
3536
3537 // Only generate legal instructions post-legalizer
3538 if (!IsPreLegalize) {
3539 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3540
3541 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3542 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3543 return false;
3544
3545 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3546 return false;
3547 }
3548
3549 return true;
3550}
3551
3553 Register &MatchInfo) const {
3554 Register MidReg;
3555 auto BuildMI = cast<GBuildVector>(&MI);
3556 Register DstReg = BuildMI->getReg(0);
3557 LLT DstTy = MRI.getType(DstReg);
3558 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3559 unsigned DstTyNumElt = DstTy.getNumElements();
3560 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3561
3562 // No need to pad vector if only G_TRUNC is needed
3563 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3564 MidReg = MatchInfo;
3565 } else {
3566 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3567 SmallVector<Register> ConcatRegs = {MatchInfo};
3568 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3569 ConcatRegs.push_back(UndefReg);
3570
3571 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3572 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3573 }
3574
3575 Builder.buildTrunc(DstReg, MidReg);
3576 MI.eraseFromParent();
3577}
3578
3580 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3581 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3582 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3583 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3584 Register XorSrc;
3585 Register CstReg;
3586 // We match xor(src, true) here.
3587 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3588 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3589 return false;
3590
3591 if (!MRI.hasOneNonDBGUse(XorSrc))
3592 return false;
3593
3594 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3595 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3596 // list of tree nodes to visit.
3597 RegsToNegate.push_back(XorSrc);
3598 // Remember whether the comparisons are all integer or all floating point.
3599 bool IsInt = false;
3600 bool IsFP = false;
3601 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3602 Register Reg = RegsToNegate[I];
3603 if (!MRI.hasOneNonDBGUse(Reg))
3604 return false;
3605 MachineInstr *Def = MRI.getVRegDef(Reg);
3606 switch (Def->getOpcode()) {
3607 default:
3608 // Don't match if the tree contains anything other than ANDs, ORs and
3609 // comparisons.
3610 return false;
3611 case TargetOpcode::G_ICMP:
3612 if (IsFP)
3613 return false;
3614 IsInt = true;
3615 // When we apply the combine we will invert the predicate.
3616 break;
3617 case TargetOpcode::G_FCMP:
3618 if (IsInt)
3619 return false;
3620 IsFP = true;
3621 // When we apply the combine we will invert the predicate.
3622 break;
3623 case TargetOpcode::G_AND:
3624 case TargetOpcode::G_OR:
3625 // Implement De Morgan's laws:
3626 // ~(x & y) -> ~x | ~y
3627 // ~(x | y) -> ~x & ~y
3628 // When we apply the combine we will change the opcode and recursively
3629 // negate the operands.
3630 RegsToNegate.push_back(Def->getOperand(1).getReg());
3631 RegsToNegate.push_back(Def->getOperand(2).getReg());
3632 break;
3633 }
3634 }
3635
3636 // Now we know whether the comparisons are integer or floating point, check
3637 // the constant in the xor.
3638 int64_t Cst;
3639 if (Ty.isVector()) {
3640 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3641 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3642 if (!MaybeCst)
3643 return false;
3644 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3645 return false;
3646 } else {
3647 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3648 return false;
3649 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3650 return false;
3651 }
3652
3653 return true;
3654}
3655
3657 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3658 for (Register Reg : RegsToNegate) {
3659 MachineInstr *Def = MRI.getVRegDef(Reg);
3660 Observer.changingInstr(*Def);
3661 // For each comparison, invert the opcode. For each AND and OR, change the
3662 // opcode.
3663 switch (Def->getOpcode()) {
3664 default:
3665 llvm_unreachable("Unexpected opcode");
3666 case TargetOpcode::G_ICMP:
3667 case TargetOpcode::G_FCMP: {
3668 MachineOperand &PredOp = Def->getOperand(1);
3671 PredOp.setPredicate(NewP);
3672 break;
3673 }
3674 case TargetOpcode::G_AND:
3675 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3676 break;
3677 case TargetOpcode::G_OR:
3678 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3679 break;
3680 }
3681 Observer.changedInstr(*Def);
3682 }
3683
3684 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3685 MI.eraseFromParent();
3686}
3687
3689 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3690 // Match (xor (and x, y), y) (or any of its commuted cases)
3691 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3692 Register &X = MatchInfo.first;
3693 Register &Y = MatchInfo.second;
3694 Register AndReg = MI.getOperand(1).getReg();
3695 Register SharedReg = MI.getOperand(2).getReg();
3696
3697 // Find a G_AND on either side of the G_XOR.
3698 // Look for one of
3699 //
3700 // (xor (and x, y), SharedReg)
3701 // (xor SharedReg, (and x, y))
3702 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3703 std::swap(AndReg, SharedReg);
3704 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3705 return false;
3706 }
3707
3708 // Only do this if we'll eliminate the G_AND.
3709 if (!MRI.hasOneNonDBGUse(AndReg))
3710 return false;
3711
3712 // We can combine if SharedReg is the same as either the LHS or RHS of the
3713 // G_AND.
3714 if (Y != SharedReg)
3715 std::swap(X, Y);
3716 return Y == SharedReg;
3717}
3718
3720 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3721 // Fold (xor (and x, y), y) -> (and (not x), y)
3722 Register X, Y;
3723 std::tie(X, Y) = MatchInfo;
3724 auto Not = Builder.buildNot(MRI.getType(X), X);
3725 Observer.changingInstr(MI);
3726 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3727 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3728 MI.getOperand(2).setReg(Y);
3729 Observer.changedInstr(MI);
3730}
3731
3733 auto &PtrAdd = cast<GPtrAdd>(MI);
3734 Register DstReg = PtrAdd.getReg(0);
3735 LLT Ty = MRI.getType(DstReg);
3736 const DataLayout &DL = Builder.getMF().getDataLayout();
3737
3738 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3739 return false;
3740
3741 if (Ty.isPointer()) {
3742 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3743 return ConstVal && *ConstVal == 0;
3744 }
3745
3746 assert(Ty.isVector() && "Expecting a vector type");
3747 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3748 return isBuildVectorAllZeros(*VecMI, MRI);
3749}
3750
3752 auto &PtrAdd = cast<GPtrAdd>(MI);
3753 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3754 PtrAdd.eraseFromParent();
3755}
3756
3757/// The second source operand is known to be a power of 2.
3759 Register DstReg = MI.getOperand(0).getReg();
3760 Register Src0 = MI.getOperand(1).getReg();
3761 Register Pow2Src1 = MI.getOperand(2).getReg();
3762 LLT Ty = MRI.getType(DstReg);
3763
3764 // Fold (urem x, pow2) -> (and x, pow2-1)
3765 auto NegOne = Builder.buildConstant(Ty, -1);
3766 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3767 Builder.buildAnd(DstReg, Src0, Add);
3768 MI.eraseFromParent();
3769}
3770
3772 unsigned &SelectOpNo) const {
3773 Register LHS = MI.getOperand(1).getReg();
3774 Register RHS = MI.getOperand(2).getReg();
3775
3776 Register OtherOperandReg = RHS;
3777 SelectOpNo = 1;
3778 MachineInstr *Select = MRI.getVRegDef(LHS);
3779
3780 // Don't do this unless the old select is going away. We want to eliminate the
3781 // binary operator, not replace a binop with a select.
3782 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3783 !MRI.hasOneNonDBGUse(LHS)) {
3784 OtherOperandReg = LHS;
3785 SelectOpNo = 2;
3786 Select = MRI.getVRegDef(RHS);
3787 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3788 !MRI.hasOneNonDBGUse(RHS))
3789 return false;
3790 }
3791
3792 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3793 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3794
3795 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3796 /*AllowFP*/ true,
3797 /*AllowOpaqueConstants*/ false))
3798 return false;
3799 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3800 /*AllowFP*/ true,
3801 /*AllowOpaqueConstants*/ false))
3802 return false;
3803
3804 unsigned BinOpcode = MI.getOpcode();
3805
3806 // We know that one of the operands is a select of constants. Now verify that
3807 // the other binary operator operand is either a constant, or we can handle a
3808 // variable.
3809 bool CanFoldNonConst =
3810 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3811 (isNullOrNullSplat(*SelectLHS, MRI) ||
3812 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3813 (isNullOrNullSplat(*SelectRHS, MRI) ||
3814 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3815 if (CanFoldNonConst)
3816 return true;
3817
3818 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3819 /*AllowFP*/ true,
3820 /*AllowOpaqueConstants*/ false);
3821}
3822
3823/// \p SelectOperand is the operand in binary operator \p MI that is the select
3824/// to fold.
3826 MachineInstr &MI, const unsigned &SelectOperand) const {
3827 Register Dst = MI.getOperand(0).getReg();
3828 Register LHS = MI.getOperand(1).getReg();
3829 Register RHS = MI.getOperand(2).getReg();
3830 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3831
3832 Register SelectCond = Select->getOperand(1).getReg();
3833 Register SelectTrue = Select->getOperand(2).getReg();
3834 Register SelectFalse = Select->getOperand(3).getReg();
3835
3836 LLT Ty = MRI.getType(Dst);
3837 unsigned BinOpcode = MI.getOpcode();
3838
3839 Register FoldTrue, FoldFalse;
3840
3841 // We have a select-of-constants followed by a binary operator with a
3842 // constant. Eliminate the binop by pulling the constant math into the select.
3843 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3844 if (SelectOperand == 1) {
3845 // TODO: SelectionDAG verifies this actually constant folds before
3846 // committing to the combine.
3847
3848 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3849 FoldFalse =
3850 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3851 } else {
3852 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3853 FoldFalse =
3854 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3855 }
3856
3857 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3858 MI.eraseFromParent();
3859}
3860
3861std::optional<SmallVector<Register, 8>>
3862CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3863 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3864 // We want to detect if Root is part of a tree which represents a bunch
3865 // of loads being merged into a larger load. We'll try to recognize patterns
3866 // like, for example:
3867 //
3868 // Reg Reg
3869 // \ /
3870 // OR_1 Reg
3871 // \ /
3872 // OR_2
3873 // \ Reg
3874 // .. /
3875 // Root
3876 //
3877 // Reg Reg Reg Reg
3878 // \ / \ /
3879 // OR_1 OR_2
3880 // \ /
3881 // \ /
3882 // ...
3883 // Root
3884 //
3885 // Each "Reg" may have been produced by a load + some arithmetic. This
3886 // function will save each of them.
3887 SmallVector<Register, 8> RegsToVisit;
3889
3890 // In the "worst" case, we're dealing with a load for each byte. So, there
3891 // are at most #bytes - 1 ORs.
3892 const unsigned MaxIter =
3893 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3894 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3895 if (Ors.empty())
3896 break;
3897 const MachineInstr *Curr = Ors.pop_back_val();
3898 Register OrLHS = Curr->getOperand(1).getReg();
3899 Register OrRHS = Curr->getOperand(2).getReg();
3900
3901 // In the combine, we want to elimate the entire tree.
3902 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3903 return std::nullopt;
3904
3905 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3906 // something that may be a load + arithmetic.
3907 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3908 Ors.push_back(Or);
3909 else
3910 RegsToVisit.push_back(OrLHS);
3911 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3912 Ors.push_back(Or);
3913 else
3914 RegsToVisit.push_back(OrRHS);
3915 }
3916
3917 // We're going to try and merge each register into a wider power-of-2 type,
3918 // so we ought to have an even number of registers.
3919 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3920 return std::nullopt;
3921 return RegsToVisit;
3922}
3923
3924/// Helper function for findLoadOffsetsForLoadOrCombine.
3925///
3926/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3927/// and then moving that value into a specific byte offset.
3928///
3929/// e.g. x[i] << 24
3930///
3931/// \returns The load instruction and the byte offset it is moved into.
3932static std::optional<std::pair<GZExtLoad *, int64_t>>
3933matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3934 const MachineRegisterInfo &MRI) {
3935 assert(MRI.hasOneNonDBGUse(Reg) &&
3936 "Expected Reg to only have one non-debug use?");
3937 Register MaybeLoad;
3938 int64_t Shift;
3939 if (!mi_match(Reg, MRI,
3940 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3941 Shift = 0;
3942 MaybeLoad = Reg;
3943 }
3944
3945 if (Shift % MemSizeInBits != 0)
3946 return std::nullopt;
3947
3948 // TODO: Handle other types of loads.
3949 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3950 if (!Load)
3951 return std::nullopt;
3952
3953 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3954 return std::nullopt;
3955
3956 return std::make_pair(Load, Shift / MemSizeInBits);
3957}
3958
3959std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3960CombinerHelper::findLoadOffsetsForLoadOrCombine(
3962 const SmallVector<Register, 8> &RegsToVisit,
3963 const unsigned MemSizeInBits) const {
3964
3965 // Each load found for the pattern. There should be one for each RegsToVisit.
3966 SmallSetVector<const MachineInstr *, 8> Loads;
3967
3968 // The lowest index used in any load. (The lowest "i" for each x[i].)
3969 int64_t LowestIdx = INT64_MAX;
3970
3971 // The load which uses the lowest index.
3972 GZExtLoad *LowestIdxLoad = nullptr;
3973
3974 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3975 SmallSet<int64_t, 8> SeenIdx;
3976
3977 // Ensure each load is in the same MBB.
3978 // TODO: Support multiple MachineBasicBlocks.
3979 MachineBasicBlock *MBB = nullptr;
3980 const MachineMemOperand *MMO = nullptr;
3981
3982 // Earliest instruction-order load in the pattern.
3983 GZExtLoad *EarliestLoad = nullptr;
3984
3985 // Latest instruction-order load in the pattern.
3986 GZExtLoad *LatestLoad = nullptr;
3987
3988 // Base pointer which every load should share.
3990
3991 // We want to find a load for each register. Each load should have some
3992 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3993 // track of the load which uses the lowest index. Later, we will check if we
3994 // can use its pointer in the final, combined load.
3995 for (auto Reg : RegsToVisit) {
3996 // Find the load, and find the position that it will end up in (e.g. a
3997 // shifted) value.
3998 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3999 if (!LoadAndPos)
4000 return std::nullopt;
4001 GZExtLoad *Load;
4002 int64_t DstPos;
4003 std::tie(Load, DstPos) = *LoadAndPos;
4004
4005 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4006 // it is difficult to check for stores/calls/etc between loads.
4007 MachineBasicBlock *LoadMBB = Load->getParent();
4008 if (!MBB)
4009 MBB = LoadMBB;
4010 if (LoadMBB != MBB)
4011 return std::nullopt;
4012
4013 // Make sure that the MachineMemOperands of every seen load are compatible.
4014 auto &LoadMMO = Load->getMMO();
4015 if (!MMO)
4016 MMO = &LoadMMO;
4017 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4018 return std::nullopt;
4019
4020 // Find out what the base pointer and index for the load is.
4021 Register LoadPtr;
4022 int64_t Idx;
4023 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4024 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4025 LoadPtr = Load->getOperand(1).getReg();
4026 Idx = 0;
4027 }
4028
4029 // Don't combine things like a[i], a[i] -> a bigger load.
4030 if (!SeenIdx.insert(Idx).second)
4031 return std::nullopt;
4032
4033 // Every load must share the same base pointer; don't combine things like:
4034 //
4035 // a[i], b[i + 1] -> a bigger load.
4036 if (!BasePtr.isValid())
4037 BasePtr = LoadPtr;
4038 if (BasePtr != LoadPtr)
4039 return std::nullopt;
4040
4041 if (Idx < LowestIdx) {
4042 LowestIdx = Idx;
4043 LowestIdxLoad = Load;
4044 }
4045
4046 // Keep track of the byte offset that this load ends up at. If we have seen
4047 // the byte offset, then stop here. We do not want to combine:
4048 //
4049 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4050 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4051 return std::nullopt;
4052 Loads.insert(Load);
4053
4054 // Keep track of the position of the earliest/latest loads in the pattern.
4055 // We will check that there are no load fold barriers between them later
4056 // on.
4057 //
4058 // FIXME: Is there a better way to check for load fold barriers?
4059 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4060 EarliestLoad = Load;
4061 if (!LatestLoad || dominates(*LatestLoad, *Load))
4062 LatestLoad = Load;
4063 }
4064
4065 // We found a load for each register. Let's check if each load satisfies the
4066 // pattern.
4067 assert(Loads.size() == RegsToVisit.size() &&
4068 "Expected to find a load for each register?");
4069 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4070 LatestLoad && "Expected at least two loads?");
4071
4072 // Check if there are any stores, calls, etc. between any of the loads. If
4073 // there are, then we can't safely perform the combine.
4074 //
4075 // MaxIter is chosen based off the (worst case) number of iterations it
4076 // typically takes to succeed in the LLVM test suite plus some padding.
4077 //
4078 // FIXME: Is there a better way to check for load fold barriers?
4079 const unsigned MaxIter = 20;
4080 unsigned Iter = 0;
4081 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4082 LatestLoad->getIterator())) {
4083 if (Loads.count(&MI))
4084 continue;
4085 if (MI.isLoadFoldBarrier())
4086 return std::nullopt;
4087 if (Iter++ == MaxIter)
4088 return std::nullopt;
4089 }
4090
4091 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4092}
4093
4096 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4097 assert(MI.getOpcode() == TargetOpcode::G_OR);
4098 MachineFunction &MF = *MI.getMF();
4099 // Assuming a little-endian target, transform:
4100 // s8 *a = ...
4101 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4102 // =>
4103 // s32 val = *((i32)a)
4104 //
4105 // s8 *a = ...
4106 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4107 // =>
4108 // s32 val = BSWAP(*((s32)a))
4109 Register Dst = MI.getOperand(0).getReg();
4110 LLT Ty = MRI.getType(Dst);
4111 if (Ty.isVector())
4112 return false;
4113
4114 // We need to combine at least two loads into this type. Since the smallest
4115 // possible load is into a byte, we need at least a 16-bit wide type.
4116 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4117 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4118 return false;
4119
4120 // Match a collection of non-OR instructions in the pattern.
4121 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4122 if (!RegsToVisit)
4123 return false;
4124
4125 // We have a collection of non-OR instructions. Figure out how wide each of
4126 // the small loads should be based off of the number of potential loads we
4127 // found.
4128 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4129 if (NarrowMemSizeInBits % 8 != 0)
4130 return false;
4131
4132 // Check if each register feeding into each OR is a load from the same
4133 // base pointer + some arithmetic.
4134 //
4135 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4136 //
4137 // Also verify that each of these ends up putting a[i] into the same memory
4138 // offset as a load into a wide type would.
4140 GZExtLoad *LowestIdxLoad, *LatestLoad;
4141 int64_t LowestIdx;
4142 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4143 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4144 if (!MaybeLoadInfo)
4145 return false;
4146 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4147
4148 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4149 // we found before, check if this corresponds to a big or little endian byte
4150 // pattern. If it does, then we can represent it using a load + possibly a
4151 // BSWAP.
4152 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4153 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4154 if (!IsBigEndian)
4155 return false;
4156 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4157 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4158 return false;
4159
4160 // Make sure that the load from the lowest index produces offset 0 in the
4161 // final value.
4162 //
4163 // This ensures that we won't combine something like this:
4164 //
4165 // load x[i] -> byte 2
4166 // load x[i+1] -> byte 0 ---> wide_load x[i]
4167 // load x[i+2] -> byte 1
4168 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4169 const unsigned ZeroByteOffset =
4170 *IsBigEndian
4171 ? bigEndianByteAt(NumLoadsInTy, 0)
4172 : littleEndianByteAt(NumLoadsInTy, 0);
4173 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4174 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4175 ZeroOffsetIdx->second != LowestIdx)
4176 return false;
4177
4178 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4179 // may not use index 0.
4180 Register Ptr = LowestIdxLoad->getPointerReg();
4181 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4182 LegalityQuery::MemDesc MMDesc(MMO);
4183 MMDesc.MemoryTy = Ty;
4185 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4186 return false;
4187 auto PtrInfo = MMO.getPointerInfo();
4188 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4189
4190 // Load must be allowed and fast on the target.
4192 auto &DL = MF.getDataLayout();
4193 unsigned Fast = 0;
4194 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4195 !Fast)
4196 return false;
4197
4198 MatchInfo = [=](MachineIRBuilder &MIB) {
4199 MIB.setInstrAndDebugLoc(*LatestLoad);
4200 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4201 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4202 if (NeedsBSwap)
4203 MIB.buildBSwap(Dst, LoadDst);
4204 };
4205 return true;
4206}
4207
4209 MachineInstr *&ExtMI) const {
4210 auto &PHI = cast<GPhi>(MI);
4211 Register DstReg = PHI.getReg(0);
4212
4213 // TODO: Extending a vector may be expensive, don't do this until heuristics
4214 // are better.
4215 if (MRI.getType(DstReg).isVector())
4216 return false;
4217
4218 // Try to match a phi, whose only use is an extend.
4219 if (!MRI.hasOneNonDBGUse(DstReg))
4220 return false;
4221 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4222 switch (ExtMI->getOpcode()) {
4223 case TargetOpcode::G_ANYEXT:
4224 return true; // G_ANYEXT is usually free.
4225 case TargetOpcode::G_ZEXT:
4226 case TargetOpcode::G_SEXT:
4227 break;
4228 default:
4229 return false;
4230 }
4231
4232 // If the target is likely to fold this extend away, don't propagate.
4233 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4234 return false;
4235
4236 // We don't want to propagate the extends unless there's a good chance that
4237 // they'll be optimized in some way.
4238 // Collect the unique incoming values.
4240 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4241 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4242 switch (DefMI->getOpcode()) {
4243 case TargetOpcode::G_LOAD:
4244 case TargetOpcode::G_TRUNC:
4245 case TargetOpcode::G_SEXT:
4246 case TargetOpcode::G_ZEXT:
4247 case TargetOpcode::G_ANYEXT:
4248 case TargetOpcode::G_CONSTANT:
4249 InSrcs.insert(DefMI);
4250 // Don't try to propagate if there are too many places to create new
4251 // extends, chances are it'll increase code size.
4252 if (InSrcs.size() > 2)
4253 return false;
4254 break;
4255 default:
4256 return false;
4257 }
4258 }
4259 return true;
4260}
4261
4263 MachineInstr *&ExtMI) const {
4264 auto &PHI = cast<GPhi>(MI);
4265 Register DstReg = ExtMI->getOperand(0).getReg();
4266 LLT ExtTy = MRI.getType(DstReg);
4267
4268 // Propagate the extension into the block of each incoming reg's block.
4269 // Use a SetVector here because PHIs can have duplicate edges, and we want
4270 // deterministic iteration order.
4273 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4274 auto SrcReg = PHI.getIncomingValue(I);
4275 auto *SrcMI = MRI.getVRegDef(SrcReg);
4276 if (!SrcMIs.insert(SrcMI))
4277 continue;
4278
4279 // Build an extend after each src inst.
4280 auto *MBB = SrcMI->getParent();
4281 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4282 if (InsertPt != MBB->end() && InsertPt->isPHI())
4283 InsertPt = MBB->getFirstNonPHI();
4284
4285 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4286 Builder.setDebugLoc(MI.getDebugLoc());
4287 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4288 OldToNewSrcMap[SrcMI] = NewExt;
4289 }
4290
4291 // Create a new phi with the extended inputs.
4292 Builder.setInstrAndDebugLoc(MI);
4293 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4294 NewPhi.addDef(DstReg);
4295 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4296 if (!MO.isReg()) {
4297 NewPhi.addMBB(MO.getMBB());
4298 continue;
4299 }
4300 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4301 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4302 }
4303 Builder.insertInstr(NewPhi);
4304 ExtMI->eraseFromParent();
4305}
4306
4308 Register &Reg) const {
4309 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4310 // If we have a constant index, look for a G_BUILD_VECTOR source
4311 // and find the source register that the index maps to.
4312 Register SrcVec = MI.getOperand(1).getReg();
4313 LLT SrcTy = MRI.getType(SrcVec);
4314 if (SrcTy.isScalableVector())
4315 return false;
4316
4317 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4318 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4319 return false;
4320
4321 unsigned VecIdx = Cst->Value.getZExtValue();
4322
4323 // Check if we have a build_vector or build_vector_trunc with an optional
4324 // trunc in front.
4325 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4326 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4327 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4328 }
4329
4330 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4331 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4332 return false;
4333
4334 EVT Ty(getMVTForLLT(SrcTy));
4335 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4336 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4337 return false;
4338
4339 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4340 return true;
4341}
4342
4344 Register &Reg) const {
4345 // Check the type of the register, since it may have come from a
4346 // G_BUILD_VECTOR_TRUNC.
4347 LLT ScalarTy = MRI.getType(Reg);
4348 Register DstReg = MI.getOperand(0).getReg();
4349 LLT DstTy = MRI.getType(DstReg);
4350
4351 if (ScalarTy != DstTy) {
4352 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4353 Builder.buildTrunc(DstReg, Reg);
4354 MI.eraseFromParent();
4355 return;
4356 }
4358}
4359
4362 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4363 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4364 // This combine tries to find build_vector's which have every source element
4365 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4366 // the masked load scalarization is run late in the pipeline. There's already
4367 // a combine for a similar pattern starting from the extract, but that
4368 // doesn't attempt to do it if there are multiple uses of the build_vector,
4369 // which in this case is true. Starting the combine from the build_vector
4370 // feels more natural than trying to find sibling nodes of extracts.
4371 // E.g.
4372 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4373 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4374 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4375 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4376 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4377 // ==>
4378 // replace ext{1,2,3,4} with %s{1,2,3,4}
4379
4380 Register DstReg = MI.getOperand(0).getReg();
4381 LLT DstTy = MRI.getType(DstReg);
4382 unsigned NumElts = DstTy.getNumElements();
4383
4384 SmallBitVector ExtractedElts(NumElts);
4385 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4386 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4387 return false;
4388 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4389 if (!Cst)
4390 return false;
4391 unsigned Idx = Cst->getZExtValue();
4392 if (Idx >= NumElts)
4393 return false; // Out of range.
4394 ExtractedElts.set(Idx);
4395 SrcDstPairs.emplace_back(
4396 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4397 }
4398 // Match if every element was extracted.
4399 return ExtractedElts.all();
4400}
4401
4404 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4405 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4406 for (auto &Pair : SrcDstPairs) {
4407 auto *ExtMI = Pair.second;
4408 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4409 ExtMI->eraseFromParent();
4410 }
4411 MI.eraseFromParent();
4412}
4413
4416 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4417 applyBuildFnNoErase(MI, MatchInfo);
4418 MI.eraseFromParent();
4419}
4420
4423 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4424 MatchInfo(Builder);
4425}
4426
4428 bool AllowScalarConstants,
4429 BuildFnTy &MatchInfo) const {
4430 assert(MI.getOpcode() == TargetOpcode::G_OR);
4431
4432 Register Dst = MI.getOperand(0).getReg();
4433 LLT Ty = MRI.getType(Dst);
4434 unsigned BitWidth = Ty.getScalarSizeInBits();
4435
4436 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4437 unsigned FshOpc = 0;
4438
4439 // Match (or (shl ...), (lshr ...)).
4440 if (!mi_match(Dst, MRI,
4441 // m_GOr() handles the commuted version as well.
4442 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4443 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4444 return false;
4445
4446 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4447 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4448 int64_t CstShlAmt = 0, CstLShrAmt;
4449 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4450 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4451 CstShlAmt + CstLShrAmt == BitWidth) {
4452 FshOpc = TargetOpcode::G_FSHR;
4453 Amt = LShrAmt;
4454 } else if (mi_match(LShrAmt, MRI,
4456 ShlAmt == Amt) {
4457 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4458 FshOpc = TargetOpcode::G_FSHL;
4459 } else if (mi_match(ShlAmt, MRI,
4461 LShrAmt == Amt) {
4462 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4463 FshOpc = TargetOpcode::G_FSHR;
4464 } else {
4465 return false;
4466 }
4467
4468 LLT AmtTy = MRI.getType(Amt);
4469 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4470 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4471 return false;
4472
4473 MatchInfo = [=](MachineIRBuilder &B) {
4474 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4475 };
4476 return true;
4477}
4478
4479/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4481 unsigned Opc = MI.getOpcode();
4482 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4483 Register X = MI.getOperand(1).getReg();
4484 Register Y = MI.getOperand(2).getReg();
4485 if (X != Y)
4486 return false;
4487 unsigned RotateOpc =
4488 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4489 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4490}
4491
4493 unsigned Opc = MI.getOpcode();
4494 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4495 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4496 Observer.changingInstr(MI);
4497 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4498 : TargetOpcode::G_ROTR));
4499 MI.removeOperand(2);
4500 Observer.changedInstr(MI);
4501}
4502
4503// Fold (rot x, c) -> (rot x, c % BitSize)
4505 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4506 MI.getOpcode() == TargetOpcode::G_ROTR);
4507 unsigned Bitsize =
4508 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4509 Register AmtReg = MI.getOperand(2).getReg();
4510 bool OutOfRange = false;
4511 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4512 if (auto *CI = dyn_cast<ConstantInt>(C))
4513 OutOfRange |= CI->getValue().uge(Bitsize);
4514 return true;
4515 };
4516 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4517}
4518
4520 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4521 MI.getOpcode() == TargetOpcode::G_ROTR);
4522 unsigned Bitsize =
4523 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4524 Register Amt = MI.getOperand(2).getReg();
4525 LLT AmtTy = MRI.getType(Amt);
4526 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4527 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4528 Observer.changingInstr(MI);
4529 MI.getOperand(2).setReg(Amt);
4530 Observer.changedInstr(MI);
4531}
4532
4534 int64_t &MatchInfo) const {
4535 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4536 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4537
4538 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4539 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4540 // KnownBits on the LHS in two cases:
4541 //
4542 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4543 // we cannot do any transforms so we can safely bail out early.
4544 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4545 // >=0.
4546 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4547 if (KnownRHS.isUnknown())
4548 return false;
4549
4550 std::optional<bool> KnownVal;
4551 if (KnownRHS.isZero()) {
4552 // ? uge 0 -> always true
4553 // ? ult 0 -> always false
4554 if (Pred == CmpInst::ICMP_UGE)
4555 KnownVal = true;
4556 else if (Pred == CmpInst::ICMP_ULT)
4557 KnownVal = false;
4558 }
4559
4560 if (!KnownVal) {
4561 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4562 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4563 }
4564
4565 if (!KnownVal)
4566 return false;
4567 MatchInfo =
4568 *KnownVal
4570 /*IsVector = */
4571 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4572 /* IsFP = */ false)
4573 : 0;
4574 return true;
4575}
4576
4579 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4580 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4581 // Given:
4582 //
4583 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4584 // %cmp = G_ICMP ne %x, 0
4585 //
4586 // Or:
4587 //
4588 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4589 // %cmp = G_ICMP eq %x, 1
4590 //
4591 // We can replace %cmp with %x assuming true is 1 on the target.
4592 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4593 if (!CmpInst::isEquality(Pred))
4594 return false;
4595 Register Dst = MI.getOperand(0).getReg();
4596 LLT DstTy = MRI.getType(Dst);
4598 /* IsFP = */ false) != 1)
4599 return false;
4600 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4601 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4602 return false;
4603 Register LHS = MI.getOperand(2).getReg();
4604 auto KnownLHS = VT->getKnownBits(LHS);
4605 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4606 return false;
4607 // Make sure replacing Dst with the LHS is a legal operation.
4608 LLT LHSTy = MRI.getType(LHS);
4609 unsigned LHSSize = LHSTy.getSizeInBits();
4610 unsigned DstSize = DstTy.getSizeInBits();
4611 unsigned Op = TargetOpcode::COPY;
4612 if (DstSize != LHSSize)
4613 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4614 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4615 return false;
4616 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4617 return true;
4618}
4619
4620// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4623 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4624 assert(MI.getOpcode() == TargetOpcode::G_AND);
4625
4626 // Ignore vector types to simplify matching the two constants.
4627 // TODO: do this for vectors and scalars via a demanded bits analysis.
4628 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4629 if (Ty.isVector())
4630 return false;
4631
4632 Register Src;
4633 Register AndMaskReg;
4634 int64_t AndMaskBits;
4635 int64_t OrMaskBits;
4636 if (!mi_match(MI, MRI,
4637 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4638 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4639 return false;
4640
4641 // Check if OrMask could turn on any bits in Src.
4642 if (AndMaskBits & OrMaskBits)
4643 return false;
4644
4645 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4646 Observer.changingInstr(MI);
4647 // Canonicalize the result to have the constant on the RHS.
4648 if (MI.getOperand(1).getReg() == AndMaskReg)
4649 MI.getOperand(2).setReg(AndMaskReg);
4650 MI.getOperand(1).setReg(Src);
4651 Observer.changedInstr(MI);
4652 };
4653 return true;
4654}
4655
4656/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4659 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4660 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4661 Register Dst = MI.getOperand(0).getReg();
4662 Register Src = MI.getOperand(1).getReg();
4663 LLT Ty = MRI.getType(Src);
4665 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4666 return false;
4667 int64_t Width = MI.getOperand(2).getImm();
4668 Register ShiftSrc;
4669 int64_t ShiftImm;
4670 if (!mi_match(
4671 Src, MRI,
4672 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4673 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4674 return false;
4675 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4676 return false;
4677
4678 MatchInfo = [=](MachineIRBuilder &B) {
4679 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4680 auto Cst2 = B.buildConstant(ExtractTy, Width);
4681 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4682 };
4683 return true;
4684}
4685
4686/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4688 BuildFnTy &MatchInfo) const {
4689 GAnd *And = cast<GAnd>(&MI);
4690 Register Dst = And->getReg(0);
4691 LLT Ty = MRI.getType(Dst);
4693 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4694 // into account.
4695 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4696 return false;
4697
4698 int64_t AndImm, LSBImm;
4699 Register ShiftSrc;
4700 const unsigned Size = Ty.getScalarSizeInBits();
4701 if (!mi_match(And->getReg(0), MRI,
4702 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4703 m_ICst(AndImm))))
4704 return false;
4705
4706 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4707 auto MaybeMask = static_cast<uint64_t>(AndImm);
4708 if (MaybeMask & (MaybeMask + 1))
4709 return false;
4710
4711 // LSB must fit within the register.
4712 if (static_cast<uint64_t>(LSBImm) >= Size)
4713 return false;
4714
4715 uint64_t Width = APInt(Size, AndImm).countr_one();
4716 MatchInfo = [=](MachineIRBuilder &B) {
4717 auto WidthCst = B.buildConstant(ExtractTy, Width);
4718 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4719 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4720 };
4721 return true;
4722}
4723
4726 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4727 const unsigned Opcode = MI.getOpcode();
4728 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4729
4730 const Register Dst = MI.getOperand(0).getReg();
4731
4732 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4733 ? TargetOpcode::G_SBFX
4734 : TargetOpcode::G_UBFX;
4735
4736 // Check if the type we would use for the extract is legal
4737 LLT Ty = MRI.getType(Dst);
4739 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4740 return false;
4741
4742 Register ShlSrc;
4743 int64_t ShrAmt;
4744 int64_t ShlAmt;
4745 const unsigned Size = Ty.getScalarSizeInBits();
4746
4747 // Try to match shr (shl x, c1), c2
4748 if (!mi_match(Dst, MRI,
4749 m_BinOp(Opcode,
4750 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4751 m_ICst(ShrAmt))))
4752 return false;
4753
4754 // Make sure that the shift sizes can fit a bitfield extract
4755 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4756 return false;
4757
4758 // Skip this combine if the G_SEXT_INREG combine could handle it
4759 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4760 return false;
4761
4762 // Calculate start position and width of the extract
4763 const int64_t Pos = ShrAmt - ShlAmt;
4764 const int64_t Width = Size - ShrAmt;
4765
4766 MatchInfo = [=](MachineIRBuilder &B) {
4767 auto WidthCst = B.buildConstant(ExtractTy, Width);
4768 auto PosCst = B.buildConstant(ExtractTy, Pos);
4769 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4770 };
4771 return true;
4772}
4773
4776 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4777 const unsigned Opcode = MI.getOpcode();
4778 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4779
4780 const Register Dst = MI.getOperand(0).getReg();
4781 LLT Ty = MRI.getType(Dst);
4783 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4784 return false;
4785
4786 // Try to match shr (and x, c1), c2
4787 Register AndSrc;
4788 int64_t ShrAmt;
4789 int64_t SMask;
4790 if (!mi_match(Dst, MRI,
4791 m_BinOp(Opcode,
4792 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4793 m_ICst(ShrAmt))))
4794 return false;
4795
4796 const unsigned Size = Ty.getScalarSizeInBits();
4797 if (ShrAmt < 0 || ShrAmt >= Size)
4798 return false;
4799
4800 // If the shift subsumes the mask, emit the 0 directly.
4801 if (0 == (SMask >> ShrAmt)) {
4802 MatchInfo = [=](MachineIRBuilder &B) {
4803 B.buildConstant(Dst, 0);
4804 };
4805 return true;
4806 }
4807
4808 // Check that ubfx can do the extraction, with no holes in the mask.
4809 uint64_t UMask = SMask;
4810 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4812 if (!isMask_64(UMask))
4813 return false;
4814
4815 // Calculate start position and width of the extract.
4816 const int64_t Pos = ShrAmt;
4817 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4818
4819 // It's preferable to keep the shift, rather than form G_SBFX.
4820 // TODO: remove the G_AND via demanded bits analysis.
4821 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4822 return false;
4823
4824 MatchInfo = [=](MachineIRBuilder &B) {
4825 auto WidthCst = B.buildConstant(ExtractTy, Width);
4826 auto PosCst = B.buildConstant(ExtractTy, Pos);
4827 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4828 };
4829 return true;
4830}
4831
4832bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4833 MachineInstr &MI) const {
4834 auto &PtrAdd = cast<GPtrAdd>(MI);
4835
4836 Register Src1Reg = PtrAdd.getBaseReg();
4837 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4838 if (!Src1Def)
4839 return false;
4840
4841 Register Src2Reg = PtrAdd.getOffsetReg();
4842
4843 if (MRI.hasOneNonDBGUse(Src1Reg))
4844 return false;
4845
4846 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4847 if (!C1)
4848 return false;
4849 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4850 if (!C2)
4851 return false;
4852
4853 const APInt &C1APIntVal = *C1;
4854 const APInt &C2APIntVal = *C2;
4855 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4856
4857 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4858 // This combine may end up running before ptrtoint/inttoptr combines
4859 // manage to eliminate redundant conversions, so try to look through them.
4860 MachineInstr *ConvUseMI = &UseMI;
4861 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4862 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4863 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4864 Register DefReg = ConvUseMI->getOperand(0).getReg();
4865 if (!MRI.hasOneNonDBGUse(DefReg))
4866 break;
4867 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4868 ConvUseOpc = ConvUseMI->getOpcode();
4869 }
4870 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4871 if (!LdStMI)
4872 continue;
4873 // Is x[offset2] already not a legal addressing mode? If so then
4874 // reassociating the constants breaks nothing (we test offset2 because
4875 // that's the one we hope to fold into the load or store).
4876 TargetLoweringBase::AddrMode AM;
4877 AM.HasBaseReg = true;
4878 AM.BaseOffs = C2APIntVal.getSExtValue();
4879 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4880 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4881 PtrAdd.getMF()->getFunction().getContext());
4882 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4883 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4884 AccessTy, AS))
4885 continue;
4886
4887 // Would x[offset1+offset2] still be a legal addressing mode?
4888 AM.BaseOffs = CombinedValue;
4889 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4890 AccessTy, AS))
4891 return true;
4892 }
4893
4894 return false;
4895}
4896
4898 MachineInstr *RHS,
4899 BuildFnTy &MatchInfo) const {
4900 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4901 Register Src1Reg = MI.getOperand(1).getReg();
4902 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4903 return false;
4904 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4905 if (!C2)
4906 return false;
4907
4908 // If both additions are nuw, the reassociated additions are also nuw.
4909 // If the original G_PTR_ADD is additionally nusw, X and C are both not
4910 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
4911 // therefore also nusw.
4912 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
4913 // the new G_PTR_ADDs are then also inbounds.
4914 unsigned PtrAddFlags = MI.getFlags();
4915 unsigned AddFlags = RHS->getFlags();
4916 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
4917 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
4918 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
4919 unsigned Flags = 0;
4920 if (IsNoUWrap)
4922 if (IsNoUSWrap)
4924 if (IsInBounds)
4926
4927 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4928 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4929
4930 auto NewBase =
4931 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
4932 Observer.changingInstr(MI);
4933 MI.getOperand(1).setReg(NewBase.getReg(0));
4934 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4935 MI.setFlags(Flags);
4936 Observer.changedInstr(MI);
4937 };
4938 return !reassociationCanBreakAddressingModePattern(MI);
4939}
4940
4942 MachineInstr *LHS,
4943 MachineInstr *RHS,
4944 BuildFnTy &MatchInfo) const {
4945 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4946 // if and only if (G_PTR_ADD X, C) has one use.
4947 Register LHSBase;
4948 std::optional<ValueAndVReg> LHSCstOff;
4949 if (!mi_match(MI.getBaseReg(), MRI,
4950 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4951 return false;
4952
4953 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4954
4955 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
4956 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
4957 // so the new G_PTR_ADDs are also inbounds.
4958 unsigned PtrAddFlags = MI.getFlags();
4959 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
4960 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
4961 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
4963 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
4965 unsigned Flags = 0;
4966 if (IsNoUWrap)
4968 if (IsNoUSWrap)
4970 if (IsInBounds)
4972
4973 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4974 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4975 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4976 // doesn't happen.
4977 LHSPtrAdd->moveBefore(&MI);
4978 Register RHSReg = MI.getOffsetReg();
4979 // set VReg will cause type mismatch if it comes from extend/trunc
4980 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4981 Observer.changingInstr(MI);
4982 MI.getOperand(2).setReg(NewCst.getReg(0));
4983 MI.setFlags(Flags);
4984 Observer.changedInstr(MI);
4985 Observer.changingInstr(*LHSPtrAdd);
4986 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4987 LHSPtrAdd->setFlags(Flags);
4988 Observer.changedInstr(*LHSPtrAdd);
4989 };
4990 return !reassociationCanBreakAddressingModePattern(MI);
4991}
4992
4994 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
4995 BuildFnTy &MatchInfo) const {
4996 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4997 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4998 if (!LHSPtrAdd)
4999 return false;
5000
5001 Register Src2Reg = MI.getOperand(2).getReg();
5002 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5003 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5004 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5005 if (!C1)
5006 return false;
5007 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5008 if (!C2)
5009 return false;
5010
5011 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5012 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5013 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5014 // largest signed integer that fits into the index type, which is the maximum
5015 // size of allocated objects according to the IR Language Reference.
5016 unsigned PtrAddFlags = MI.getFlags();
5017 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5018 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5019 bool IsInBounds =
5020 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5021 unsigned Flags = 0;
5022 if (IsNoUWrap)
5024 if (IsInBounds) {
5027 }
5028
5029 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5030 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5031 Observer.changingInstr(MI);
5032 MI.getOperand(1).setReg(LHSSrc1);
5033 MI.getOperand(2).setReg(NewCst.getReg(0));
5034 MI.setFlags(Flags);
5035 Observer.changedInstr(MI);
5036 };
5037 return !reassociationCanBreakAddressingModePattern(MI);
5038}
5039
5041 BuildFnTy &MatchInfo) const {
5042 auto &PtrAdd = cast<GPtrAdd>(MI);
5043 // We're trying to match a few pointer computation patterns here for
5044 // re-association opportunities.
5045 // 1) Isolating a constant operand to be on the RHS, e.g.:
5046 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5047 //
5048 // 2) Folding two constants in each sub-tree as long as such folding
5049 // doesn't break a legal addressing mode.
5050 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5051 //
5052 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5053 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5054 // iif (G_PTR_ADD X, C) has one use.
5055 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5056 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5057
5058 // Try to match example 2.
5059 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5060 return true;
5061
5062 // Try to match example 3.
5063 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5064 return true;
5065
5066 // Try to match example 1.
5067 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5068 return true;
5069
5070 return false;
5071}
5073 Register OpLHS, Register OpRHS,
5074 BuildFnTy &MatchInfo) const {
5075 LLT OpRHSTy = MRI.getType(OpRHS);
5076 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5077
5078 if (OpLHSDef->getOpcode() != Opc)
5079 return false;
5080
5081 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5082 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5083 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5084
5085 // If the inner op is (X op C), pull the constant out so it can be folded with
5086 // other constants in the expression tree. Folding is not guaranteed so we
5087 // might have (C1 op C2). In that case do not pull a constant out because it
5088 // won't help and can lead to infinite loops.
5089 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5090 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5091 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5092 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5093 MatchInfo = [=](MachineIRBuilder &B) {
5094 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5095 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5096 };
5097 return true;
5098 }
5099 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5100 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5101 // iff (op x, c1) has one use
5102 MatchInfo = [=](MachineIRBuilder &B) {
5103 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5104 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5105 };
5106 return true;
5107 }
5108 }
5109
5110 return false;
5111}
5112
5114 BuildFnTy &MatchInfo) const {
5115 // We don't check if the reassociation will break a legal addressing mode
5116 // here since pointer arithmetic is handled by G_PTR_ADD.
5117 unsigned Opc = MI.getOpcode();
5118 Register DstReg = MI.getOperand(0).getReg();
5119 Register LHSReg = MI.getOperand(1).getReg();
5120 Register RHSReg = MI.getOperand(2).getReg();
5121
5122 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5123 return true;
5124 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5125 return true;
5126 return false;
5127}
5128
5130 APInt &MatchInfo) const {
5131 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5132 Register SrcOp = MI.getOperand(1).getReg();
5133
5134 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5135 MatchInfo = *MaybeCst;
5136 return true;
5137 }
5138
5139 return false;
5140}
5141
5143 APInt &MatchInfo) const {
5144 Register Op1 = MI.getOperand(1).getReg();
5145 Register Op2 = MI.getOperand(2).getReg();
5146 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5147 if (!MaybeCst)
5148 return false;
5149 MatchInfo = *MaybeCst;
5150 return true;
5151}
5152
5154 ConstantFP *&MatchInfo) const {
5155 Register Op1 = MI.getOperand(1).getReg();
5156 Register Op2 = MI.getOperand(2).getReg();
5157 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5158 if (!MaybeCst)
5159 return false;
5160 MatchInfo =
5161 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5162 return true;
5163}
5164
5166 ConstantFP *&MatchInfo) const {
5167 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5168 MI.getOpcode() == TargetOpcode::G_FMAD);
5169 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5170
5171 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5172 if (!Op3Cst)
5173 return false;
5174
5175 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5176 if (!Op2Cst)
5177 return false;
5178
5179 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5180 if (!Op1Cst)
5181 return false;
5182
5183 APFloat Op1F = Op1Cst->getValueAPF();
5184 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5186 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5187 return true;
5188}
5189
5192 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5193 // Look for a binop feeding into an AND with a mask:
5194 //
5195 // %add = G_ADD %lhs, %rhs
5196 // %and = G_AND %add, 000...11111111
5197 //
5198 // Check if it's possible to perform the binop at a narrower width and zext
5199 // back to the original width like so:
5200 //
5201 // %narrow_lhs = G_TRUNC %lhs
5202 // %narrow_rhs = G_TRUNC %rhs
5203 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5204 // %new_add = G_ZEXT %narrow_add
5205 // %and = G_AND %new_add, 000...11111111
5206 //
5207 // This can allow later combines to eliminate the G_AND if it turns out
5208 // that the mask is irrelevant.
5209 assert(MI.getOpcode() == TargetOpcode::G_AND);
5210 Register Dst = MI.getOperand(0).getReg();
5211 Register AndLHS = MI.getOperand(1).getReg();
5212 Register AndRHS = MI.getOperand(2).getReg();
5213 LLT WideTy = MRI.getType(Dst);
5214
5215 // If the potential binop has more than one use, then it's possible that one
5216 // of those uses will need its full width.
5217 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5218 return false;
5219
5220 // Check if the LHS feeding the AND is impacted by the high bits that we're
5221 // masking out.
5222 //
5223 // e.g. for 64-bit x, y:
5224 //
5225 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5226 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5227 if (!LHSInst)
5228 return false;
5229 unsigned LHSOpc = LHSInst->getOpcode();
5230 switch (LHSOpc) {
5231 default:
5232 return false;
5233 case TargetOpcode::G_ADD:
5234 case TargetOpcode::G_SUB:
5235 case TargetOpcode::G_MUL:
5236 case TargetOpcode::G_AND:
5237 case TargetOpcode::G_OR:
5238 case TargetOpcode::G_XOR:
5239 break;
5240 }
5241
5242 // Find the mask on the RHS.
5243 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5244 if (!Cst)
5245 return false;
5246 auto Mask = Cst->Value;
5247 if (!Mask.isMask())
5248 return false;
5249
5250 // No point in combining if there's nothing to truncate.
5251 unsigned NarrowWidth = Mask.countr_one();
5252 if (NarrowWidth == WideTy.getSizeInBits())
5253 return false;
5254 LLT NarrowTy = LLT::scalar(NarrowWidth);
5255
5256 // Check if adding the zext + truncates could be harmful.
5257 auto &MF = *MI.getMF();
5258 const auto &TLI = getTargetLowering();
5259 LLVMContext &Ctx = MF.getFunction().getContext();
5260 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5261 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5262 return false;
5263 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5264 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5265 return false;
5266 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5267 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5268 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5269 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5270 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5271 auto NarrowBinOp =
5272 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5273 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5274 Observer.changingInstr(MI);
5275 MI.getOperand(1).setReg(Ext.getReg(0));
5276 Observer.changedInstr(MI);
5277 };
5278 return true;
5279}
5280
5282 BuildFnTy &MatchInfo) const {
5283 unsigned Opc = MI.getOpcode();
5284 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5285
5286 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5287 return false;
5288
5289 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5290 Observer.changingInstr(MI);
5291 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5292 : TargetOpcode::G_SADDO;
5293 MI.setDesc(Builder.getTII().get(NewOpc));
5294 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5295 Observer.changedInstr(MI);
5296 };
5297 return true;
5298}
5299
5301 BuildFnTy &MatchInfo) const {
5302 // (G_*MULO x, 0) -> 0 + no carry out
5303 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5304 MI.getOpcode() == TargetOpcode::G_SMULO);
5305 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5306 return false;
5307 Register Dst = MI.getOperand(0).getReg();
5308 Register Carry = MI.getOperand(1).getReg();
5309 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5310 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5311 return false;
5312 MatchInfo = [=](MachineIRBuilder &B) {
5313 B.buildConstant(Dst, 0);
5314 B.buildConstant(Carry, 0);
5315 };
5316 return true;
5317}
5318
5320 BuildFnTy &MatchInfo) const {
5321 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5322 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5323 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5324 MI.getOpcode() == TargetOpcode::G_SADDE ||
5325 MI.getOpcode() == TargetOpcode::G_USUBE ||
5326 MI.getOpcode() == TargetOpcode::G_SSUBE);
5327 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5328 return false;
5329 MatchInfo = [&](MachineIRBuilder &B) {
5330 unsigned NewOpcode;
5331 switch (MI.getOpcode()) {
5332 case TargetOpcode::G_UADDE:
5333 NewOpcode = TargetOpcode::G_UADDO;
5334 break;
5335 case TargetOpcode::G_SADDE:
5336 NewOpcode = TargetOpcode::G_SADDO;
5337 break;
5338 case TargetOpcode::G_USUBE:
5339 NewOpcode = TargetOpcode::G_USUBO;
5340 break;
5341 case TargetOpcode::G_SSUBE:
5342 NewOpcode = TargetOpcode::G_SSUBO;
5343 break;
5344 }
5345 Observer.changingInstr(MI);
5346 MI.setDesc(B.getTII().get(NewOpcode));
5347 MI.removeOperand(4);
5348 Observer.changedInstr(MI);
5349 };
5350 return true;
5351}
5352
5354 BuildFnTy &MatchInfo) const {
5355 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5356 Register Dst = MI.getOperand(0).getReg();
5357 // (x + y) - z -> x (if y == z)
5358 // (x + y) - z -> y (if x == z)
5359 Register X, Y, Z;
5360 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5361 Register ReplaceReg;
5362 int64_t CstX, CstY;
5363 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5365 ReplaceReg = X;
5366 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5368 ReplaceReg = Y;
5369 if (ReplaceReg) {
5370 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5371 return true;
5372 }
5373 }
5374
5375 // x - (y + z) -> 0 - y (if x == z)
5376 // x - (y + z) -> 0 - z (if x == y)
5377 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5378 Register ReplaceReg;
5379 int64_t CstX;
5380 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5382 ReplaceReg = Y;
5383 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5385 ReplaceReg = Z;
5386 if (ReplaceReg) {
5387 MatchInfo = [=](MachineIRBuilder &B) {
5388 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5389 B.buildSub(Dst, Zero, ReplaceReg);
5390 };
5391 return true;
5392 }
5393 }
5394 return false;
5395}
5396
5398 unsigned Opcode = MI.getOpcode();
5399 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5400 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5401 Register Dst = UDivorRem.getReg(0);
5402 Register LHS = UDivorRem.getReg(1);
5403 Register RHS = UDivorRem.getReg(2);
5404 LLT Ty = MRI.getType(Dst);
5405 LLT ScalarTy = Ty.getScalarType();
5406 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5408 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5409
5410 auto &MIB = Builder;
5411
5412 bool UseSRL = false;
5413 SmallVector<Register, 16> Shifts, Factors;
5414 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5415 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5416
5417 auto BuildExactUDIVPattern = [&](const Constant *C) {
5418 // Don't recompute inverses for each splat element.
5419 if (IsSplat && !Factors.empty()) {
5420 Shifts.push_back(Shifts[0]);
5421 Factors.push_back(Factors[0]);
5422 return true;
5423 }
5424
5425 auto *CI = cast<ConstantInt>(C);
5426 APInt Divisor = CI->getValue();
5427 unsigned Shift = Divisor.countr_zero();
5428 if (Shift) {
5429 Divisor.lshrInPlace(Shift);
5430 UseSRL = true;
5431 }
5432
5433 // Calculate the multiplicative inverse modulo BW.
5434 APInt Factor = Divisor.multiplicativeInverse();
5435 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5436 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5437 return true;
5438 };
5439
5440 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5441 // Collect all magic values from the build vector.
5442 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5443 llvm_unreachable("Expected unary predicate match to succeed");
5444
5445 Register Shift, Factor;
5446 if (Ty.isVector()) {
5447 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5448 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5449 } else {
5450 Shift = Shifts[0];
5451 Factor = Factors[0];
5452 }
5453
5454 Register Res = LHS;
5455
5456 if (UseSRL)
5457 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5458
5459 return MIB.buildMul(Ty, Res, Factor);
5460 }
5461
5462 unsigned KnownLeadingZeros =
5463 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5464
5465 bool UseNPQ = false;
5466 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5467 auto BuildUDIVPattern = [&](const Constant *C) {
5468 auto *CI = cast<ConstantInt>(C);
5469 const APInt &Divisor = CI->getValue();
5470
5471 bool SelNPQ = false;
5472 APInt Magic(Divisor.getBitWidth(), 0);
5473 unsigned PreShift = 0, PostShift = 0;
5474
5475 // Magic algorithm doesn't work for division by 1. We need to emit a select
5476 // at the end.
5477 // TODO: Use undef values for divisor of 1.
5478 if (!Divisor.isOne()) {
5479
5480 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5481 // in the dividend exceeds the leading zeros for the divisor.
5484 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5485
5486 Magic = std::move(magics.Magic);
5487
5488 assert(magics.PreShift < Divisor.getBitWidth() &&
5489 "We shouldn't generate an undefined shift!");
5490 assert(magics.PostShift < Divisor.getBitWidth() &&
5491 "We shouldn't generate an undefined shift!");
5492 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5493 PreShift = magics.PreShift;
5494 PostShift = magics.PostShift;
5495 SelNPQ = magics.IsAdd;
5496 }
5497
5498 PreShifts.push_back(
5499 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5500 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5501 NPQFactors.push_back(
5502 MIB.buildConstant(ScalarTy,
5503 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5504 : APInt::getZero(EltBits))
5505 .getReg(0));
5506 PostShifts.push_back(
5507 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5508 UseNPQ |= SelNPQ;
5509 return true;
5510 };
5511
5512 // Collect the shifts/magic values from each element.
5513 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5514 (void)Matched;
5515 assert(Matched && "Expected unary predicate match to succeed");
5516
5517 Register PreShift, PostShift, MagicFactor, NPQFactor;
5518 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5519 if (RHSDef) {
5520 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5521 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5522 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5523 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5524 } else {
5525 assert(MRI.getType(RHS).isScalar() &&
5526 "Non-build_vector operation should have been a scalar");
5527 PreShift = PreShifts[0];
5528 MagicFactor = MagicFactors[0];
5529 PostShift = PostShifts[0];
5530 }
5531
5532 Register Q = LHS;
5533 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5534
5535 // Multiply the numerator (operand 0) by the magic value.
5536 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5537
5538 if (UseNPQ) {
5539 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5540
5541 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5542 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5543 if (Ty.isVector())
5544 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5545 else
5546 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5547
5548 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5549 }
5550
5551 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5552 auto One = MIB.buildConstant(Ty, 1);
5553 auto IsOne = MIB.buildICmp(
5555 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5556 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5557
5558 if (Opcode == TargetOpcode::G_UREM) {
5559 auto Prod = MIB.buildMul(Ty, ret, RHS);
5560 return MIB.buildSub(Ty, LHS, Prod);
5561 }
5562 return ret;
5563}
5564
5566 unsigned Opcode = MI.getOpcode();
5567 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5568 Register Dst = MI.getOperand(0).getReg();
5569 Register RHS = MI.getOperand(2).getReg();
5570 LLT DstTy = MRI.getType(Dst);
5571
5572 auto &MF = *MI.getMF();
5573 AttributeList Attr = MF.getFunction().getAttributes();
5574 const auto &TLI = getTargetLowering();
5575 LLVMContext &Ctx = MF.getFunction().getContext();
5576 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5577 return false;
5578
5579 // Don't do this for minsize because the instruction sequence is usually
5580 // larger.
5581 if (MF.getFunction().hasMinSize())
5582 return false;
5583
5584 if (Opcode == TargetOpcode::G_UDIV &&
5586 return matchUnaryPredicate(
5587 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5588 }
5589
5590 auto *RHSDef = MRI.getVRegDef(RHS);
5591 if (!isConstantOrConstantVector(*RHSDef, MRI))
5592 return false;
5593
5594 // Don't do this if the types are not going to be legal.
5595 if (LI) {
5596 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5597 return false;
5598 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5599 return false;
5601 {TargetOpcode::G_ICMP,
5602 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5603 DstTy}}))
5604 return false;
5605 if (Opcode == TargetOpcode::G_UREM &&
5606 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5607 return false;
5608 }
5609
5610 return matchUnaryPredicate(
5611 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5612}
5613
5615 auto *NewMI = buildUDivOrURemUsingMul(MI);
5616 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5617}
5618
5620 unsigned Opcode = MI.getOpcode();
5621 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5622 Register Dst = MI.getOperand(0).getReg();
5623 Register RHS = MI.getOperand(2).getReg();
5624 LLT DstTy = MRI.getType(Dst);
5625 auto SizeInBits = DstTy.getScalarSizeInBits();
5626 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5627
5628 auto &MF = *MI.getMF();
5629 AttributeList Attr = MF.getFunction().getAttributes();
5630 const auto &TLI = getTargetLowering();
5631 LLVMContext &Ctx = MF.getFunction().getContext();
5632 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5633 return false;
5634
5635 // Don't do this for minsize because the instruction sequence is usually
5636 // larger.
5637 if (MF.getFunction().hasMinSize())
5638 return false;
5639
5640 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5641 if (Opcode == TargetOpcode::G_SDIV &&
5643 return matchUnaryPredicate(
5644 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5645 }
5646
5647 auto *RHSDef = MRI.getVRegDef(RHS);
5648 if (!isConstantOrConstantVector(*RHSDef, MRI))
5649 return false;
5650
5651 // Don't do this if the types are not going to be legal.
5652 if (LI) {
5653 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5654 return false;
5655 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5656 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5657 return false;
5658 if (Opcode == TargetOpcode::G_SREM &&
5659 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5660 return false;
5661 }
5662
5663 return matchUnaryPredicate(
5664 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5665}
5666
5668 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5669 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5670}
5671
5673 unsigned Opcode = MI.getOpcode();
5674 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5675 Opcode == TargetOpcode::G_SREM);
5676 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5677 Register Dst = SDivorRem.getReg(0);
5678 Register LHS = SDivorRem.getReg(1);
5679 Register RHS = SDivorRem.getReg(2);
5680 LLT Ty = MRI.getType(Dst);
5681 LLT ScalarTy = Ty.getScalarType();
5682 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5684 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5685 auto &MIB = Builder;
5686
5687 bool UseSRA = false;
5688 SmallVector<Register, 16> ExactShifts, ExactFactors;
5689
5690 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5691 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5692
5693 auto BuildExactSDIVPattern = [&](const Constant *C) {
5694 // Don't recompute inverses for each splat element.
5695 if (IsSplat && !ExactFactors.empty()) {
5696 ExactShifts.push_back(ExactShifts[0]);
5697 ExactFactors.push_back(ExactFactors[0]);
5698 return true;
5699 }
5700
5701 auto *CI = cast<ConstantInt>(C);
5702 APInt Divisor = CI->getValue();
5703 unsigned Shift = Divisor.countr_zero();
5704 if (Shift) {
5705 Divisor.ashrInPlace(Shift);
5706 UseSRA = true;
5707 }
5708
5709 // Calculate the multiplicative inverse modulo BW.
5710 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5711 APInt Factor = Divisor.multiplicativeInverse();
5712 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5713 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5714 return true;
5715 };
5716
5717 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5718 // Collect all magic values from the build vector.
5719 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5720 (void)Matched;
5721 assert(Matched && "Expected unary predicate match to succeed");
5722
5723 Register Shift, Factor;
5724 if (Ty.isVector()) {
5725 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5726 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5727 } else {
5728 Shift = ExactShifts[0];
5729 Factor = ExactFactors[0];
5730 }
5731
5732 Register Res = LHS;
5733
5734 if (UseSRA)
5735 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5736
5737 return MIB.buildMul(Ty, Res, Factor);
5738 }
5739
5740 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5741
5742 auto BuildSDIVPattern = [&](const Constant *C) {
5743 auto *CI = cast<ConstantInt>(C);
5744 const APInt &Divisor = CI->getValue();
5745
5748 int NumeratorFactor = 0;
5749 int ShiftMask = -1;
5750
5751 if (Divisor.isOne() || Divisor.isAllOnes()) {
5752 // If d is +1/-1, we just multiply the numerator by +1/-1.
5753 NumeratorFactor = Divisor.getSExtValue();
5754 Magics.Magic = 0;
5755 Magics.ShiftAmount = 0;
5756 ShiftMask = 0;
5757 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5758 // If d > 0 and m < 0, add the numerator.
5759 NumeratorFactor = 1;
5760 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5761 // If d < 0 and m > 0, subtract the numerator.
5762 NumeratorFactor = -1;
5763 }
5764
5765 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5766 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5767 Shifts.push_back(
5768 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5769 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5770
5771 return true;
5772 };
5773
5774 // Collect the shifts/magic values from each element.
5775 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5776 (void)Matched;
5777 assert(Matched && "Expected unary predicate match to succeed");
5778
5779 Register MagicFactor, Factor, Shift, ShiftMask;
5780 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5781 if (RHSDef) {
5782 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5783 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5784 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5785 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5786 } else {
5787 assert(MRI.getType(RHS).isScalar() &&
5788 "Non-build_vector operation should have been a scalar");
5789 MagicFactor = MagicFactors[0];
5790 Factor = Factors[0];
5791 Shift = Shifts[0];
5792 ShiftMask = ShiftMasks[0];
5793 }
5794
5795 Register Q = LHS;
5796 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5797
5798 // (Optionally) Add/subtract the numerator using Factor.
5799 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5800 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5801
5802 // Shift right algebraic by shift value.
5803 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5804
5805 // Extract the sign bit, mask it and add it to the quotient.
5806 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5807 auto T = MIB.buildLShr(Ty, Q, SignShift);
5808 T = MIB.buildAnd(Ty, T, ShiftMask);
5809 auto ret = MIB.buildAdd(Ty, Q, T);
5810
5811 if (Opcode == TargetOpcode::G_SREM) {
5812 auto Prod = MIB.buildMul(Ty, ret, RHS);
5813 return MIB.buildSub(Ty, LHS, Prod);
5814 }
5815 return ret;
5816}
5817
5819 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5820 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5821 "Expected SDIV or UDIV");
5822 auto &Div = cast<GenericMachineInstr>(MI);
5823 Register RHS = Div.getReg(2);
5824 auto MatchPow2 = [&](const Constant *C) {
5825 auto *CI = dyn_cast<ConstantInt>(C);
5826 return CI && (CI->getValue().isPowerOf2() ||
5827 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5828 };
5829 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5830}
5831
5833 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5834 auto &SDiv = cast<GenericMachineInstr>(MI);
5835 Register Dst = SDiv.getReg(0);
5836 Register LHS = SDiv.getReg(1);
5837 Register RHS = SDiv.getReg(2);
5838 LLT Ty = MRI.getType(Dst);
5840 LLT CCVT =
5841 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5842
5843 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5844 // to the following version:
5845 //
5846 // %c1 = G_CTTZ %rhs
5847 // %inexact = G_SUB $bitwidth, %c1
5848 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5849 // %lshr = G_LSHR %sign, %inexact
5850 // %add = G_ADD %lhs, %lshr
5851 // %ashr = G_ASHR %add, %c1
5852 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5853 // %zero = G_CONSTANT $0
5854 // %neg = G_NEG %ashr
5855 // %isneg = G_ICMP SLT %rhs, %zero
5856 // %res = G_SELECT %isneg, %neg, %ashr
5857
5858 unsigned BitWidth = Ty.getScalarSizeInBits();
5859 auto Zero = Builder.buildConstant(Ty, 0);
5860
5861 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5862 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5863 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5864 // Splat the sign bit into the register
5865 auto Sign = Builder.buildAShr(
5866 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5867
5868 // Add (LHS < 0) ? abs2 - 1 : 0;
5869 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5870 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5871 auto AShr = Builder.buildAShr(Ty, Add, C1);
5872
5873 // Special case: (sdiv X, 1) -> X
5874 // Special Case: (sdiv X, -1) -> 0-X
5875 auto One = Builder.buildConstant(Ty, 1);
5876 auto MinusOne = Builder.buildConstant(Ty, -1);
5877 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5878 auto IsMinusOne =
5879 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
5880 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5881 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5882
5883 // If divided by a positive value, we're done. Otherwise, the result must be
5884 // negated.
5885 auto Neg = Builder.buildNeg(Ty, AShr);
5886 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5887 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5888 MI.eraseFromParent();
5889}
5890
5892 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5893 auto &UDiv = cast<GenericMachineInstr>(MI);
5894 Register Dst = UDiv.getReg(0);
5895 Register LHS = UDiv.getReg(1);
5896 Register RHS = UDiv.getReg(2);
5897 LLT Ty = MRI.getType(Dst);
5899
5900 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5901 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5902 MI.eraseFromParent();
5903}
5904
5906 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5907 Register RHS = MI.getOperand(2).getReg();
5908 Register Dst = MI.getOperand(0).getReg();
5909 LLT Ty = MRI.getType(Dst);
5910 LLT RHSTy = MRI.getType(RHS);
5912 auto MatchPow2ExceptOne = [&](const Constant *C) {
5913 if (auto *CI = dyn_cast<ConstantInt>(C))
5914 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5915 return false;
5916 };
5917 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5918 return false;
5919 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
5920 // get log base 2, and it is not always legal for on a target.
5921 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
5922 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
5923}
5924
5926 Register LHS = MI.getOperand(1).getReg();
5927 Register RHS = MI.getOperand(2).getReg();
5928 Register Dst = MI.getOperand(0).getReg();
5929 LLT Ty = MRI.getType(Dst);
5931 unsigned NumEltBits = Ty.getScalarSizeInBits();
5932
5933 auto LogBase2 = buildLogBase2(RHS, Builder);
5934 auto ShiftAmt =
5935 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5936 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5937 Builder.buildLShr(Dst, LHS, Trunc);
5938 MI.eraseFromParent();
5939}
5940
5942 Register &MatchInfo) const {
5943 Register Dst = MI.getOperand(0).getReg();
5944 Register Src = MI.getOperand(1).getReg();
5945 LLT DstTy = MRI.getType(Dst);
5946 LLT SrcTy = MRI.getType(Src);
5947 unsigned NumDstBits = DstTy.getScalarSizeInBits();
5948 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5949 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5950
5951 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
5952 return false;
5953
5954 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
5955 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
5956 return mi_match(Src, MRI,
5957 m_GSMin(m_GSMax(m_Reg(MatchInfo),
5958 m_SpecificICstOrSplat(SignedMin)),
5959 m_SpecificICstOrSplat(SignedMax))) ||
5960 mi_match(Src, MRI,
5961 m_GSMax(m_GSMin(m_Reg(MatchInfo),
5962 m_SpecificICstOrSplat(SignedMax)),
5963 m_SpecificICstOrSplat(SignedMin)));
5964}
5965
5967 Register &MatchInfo) const {
5968 Register Dst = MI.getOperand(0).getReg();
5969 Builder.buildTruncSSatS(Dst, MatchInfo);
5970 MI.eraseFromParent();
5971}
5972
5974 Register &MatchInfo) const {
5975 Register Dst = MI.getOperand(0).getReg();
5976 Register Src = MI.getOperand(1).getReg();
5977 LLT DstTy = MRI.getType(Dst);
5978 LLT SrcTy = MRI.getType(Src);
5979 unsigned NumDstBits = DstTy.getScalarSizeInBits();
5980 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
5981 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
5982
5983 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
5984 return false;
5985 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
5986 return mi_match(Src, MRI,
5988 m_SpecificICstOrSplat(UnsignedMax))) ||
5989 mi_match(Src, MRI,
5990 m_GSMax(m_GSMin(m_Reg(MatchInfo),
5991 m_SpecificICstOrSplat(UnsignedMax)),
5992 m_SpecificICstOrSplat(0))) ||
5993 mi_match(Src, MRI,
5995 m_SpecificICstOrSplat(UnsignedMax)));
5996}
5997
5999 Register &MatchInfo) const {
6000 Register Dst = MI.getOperand(0).getReg();
6001 Builder.buildTruncSSatU(Dst, MatchInfo);
6002 MI.eraseFromParent();
6003}
6004
6006 MachineInstr &MinMI) const {
6007 Register Min = MinMI.getOperand(2).getReg();
6008 Register Val = MinMI.getOperand(1).getReg();
6009 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6010 LLT SrcTy = MRI.getType(Val);
6011 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6012 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6013 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6014
6015 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6016 return false;
6017 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6018 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6019 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6020}
6021
6023 MachineInstr &SrcMI) const {
6024 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6025 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6026
6027 return LI &&
6028 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6029}
6030
6032 BuildFnTy &MatchInfo) const {
6033 unsigned Opc = MI.getOpcode();
6034 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6035 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6036 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6037
6038 Register Dst = MI.getOperand(0).getReg();
6039 Register X = MI.getOperand(1).getReg();
6040 Register Y = MI.getOperand(2).getReg();
6041 LLT Type = MRI.getType(Dst);
6042
6043 // fold (fadd x, fneg(y)) -> (fsub x, y)
6044 // fold (fadd fneg(y), x) -> (fsub x, y)
6045 // G_ADD is commutative so both cases are checked by m_GFAdd
6046 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6047 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6048 Opc = TargetOpcode::G_FSUB;
6049 }
6050 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6051 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6052 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6053 Opc = TargetOpcode::G_FADD;
6054 }
6055 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6056 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6057 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6058 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6059 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6060 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6061 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6062 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6063 // no opcode change
6064 } else
6065 return false;
6066
6067 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6068 Observer.changingInstr(MI);
6069 MI.setDesc(B.getTII().get(Opc));
6070 MI.getOperand(1).setReg(X);
6071 MI.getOperand(2).setReg(Y);
6072 Observer.changedInstr(MI);
6073 };
6074 return true;
6075}
6076
6078 Register &MatchInfo) const {
6079 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6080
6081 Register LHS = MI.getOperand(1).getReg();
6082 MatchInfo = MI.getOperand(2).getReg();
6083 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6084
6085 const auto LHSCst = Ty.isVector()
6086 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6088 if (!LHSCst)
6089 return false;
6090
6091 // -0.0 is always allowed
6092 if (LHSCst->Value.isNegZero())
6093 return true;
6094
6095 // +0.0 is only allowed if nsz is set.
6096 if (LHSCst->Value.isPosZero())
6097 return MI.getFlag(MachineInstr::FmNsz);
6098
6099 return false;
6100}
6101
6103 Register &MatchInfo) const {
6104 Register Dst = MI.getOperand(0).getReg();
6105 Builder.buildFNeg(
6106 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6107 eraseInst(MI);
6108}
6109
6110/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6111/// due to global flags or MachineInstr flags.
6112static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6113 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6114 return false;
6115 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6116}
6117
6118static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6119 const MachineRegisterInfo &MRI) {
6120 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6121 MRI.use_instr_nodbg_end()) >
6122 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6123 MRI.use_instr_nodbg_end());
6124}
6125
6127 bool &AllowFusionGlobally,
6128 bool &HasFMAD, bool &Aggressive,
6129 bool CanReassociate) const {
6130
6131 auto *MF = MI.getMF();
6132 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6133 const TargetOptions &Options = MF->getTarget().Options;
6134 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6135
6136 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6137 return false;
6138
6139 // Floating-point multiply-add with intermediate rounding.
6140 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6141 // Floating-point multiply-add without intermediate rounding.
6142 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6143 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6144 // No valid opcode, do not combine.
6145 if (!HasFMAD && !HasFMA)
6146 return false;
6147
6148 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6149 // If the addition is not contractable, do not combine.
6150 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6151 return false;
6152
6153 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6154 return true;
6155}
6156
6159 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6160 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6161
6162 bool AllowFusionGlobally, HasFMAD, Aggressive;
6163 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6164 return false;
6165
6166 Register Op1 = MI.getOperand(1).getReg();
6167 Register Op2 = MI.getOperand(2).getReg();
6168 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6169 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6170 unsigned PreferredFusedOpcode =
6171 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6172
6173 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6174 // prefer to fold the multiply with fewer uses.
6175 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6176 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6177 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6178 std::swap(LHS, RHS);
6179 }
6180
6181 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6182 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6183 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6184 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6185 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6186 {LHS.MI->getOperand(1).getReg(),
6187 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6188 };
6189 return true;
6190 }
6191
6192 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6193 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6194 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6195 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6196 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6197 {RHS.MI->getOperand(1).getReg(),
6198 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6199 };
6200 return true;
6201 }
6202
6203 return false;
6204}
6205
6208 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6209 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6210
6211 bool AllowFusionGlobally, HasFMAD, Aggressive;
6212 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6213 return false;
6214
6215 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6216 Register Op1 = MI.getOperand(1).getReg();
6217 Register Op2 = MI.getOperand(2).getReg();
6218 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6219 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6220 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6221
6222 unsigned PreferredFusedOpcode =
6223 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6224
6225 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6226 // prefer to fold the multiply with fewer uses.
6227 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6228 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6229 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6230 std::swap(LHS, RHS);
6231 }
6232
6233 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6234 MachineInstr *FpExtSrc;
6235 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6236 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6237 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6238 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6239 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6240 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6241 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6242 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6243 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6244 };
6245 return true;
6246 }
6247
6248 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6249 // Note: Commutes FADD operands.
6250 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6251 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6252 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6253 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6254 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6255 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6256 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6257 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6258 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6259 };
6260 return true;
6261 }
6262
6263 return false;
6264}
6265
6268 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6269 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6270
6271 bool AllowFusionGlobally, HasFMAD, Aggressive;
6272 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6273 return false;
6274
6275 Register Op1 = MI.getOperand(1).getReg();
6276 Register Op2 = MI.getOperand(2).getReg();
6277 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6278 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6279 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6280
6281 unsigned PreferredFusedOpcode =
6282 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6283
6284 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6285 // prefer to fold the multiply with fewer uses.
6286 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6287 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6288 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6289 std::swap(LHS, RHS);
6290 }
6291
6292 MachineInstr *FMA = nullptr;
6293 Register Z;
6294 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6295 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6296 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6297 TargetOpcode::G_FMUL) &&
6298 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6299 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6300 FMA = LHS.MI;
6301 Z = RHS.Reg;
6302 }
6303 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6304 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6305 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6306 TargetOpcode::G_FMUL) &&
6307 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6308 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6309 Z = LHS.Reg;
6310 FMA = RHS.MI;
6311 }
6312
6313 if (FMA) {
6314 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6315 Register X = FMA->getOperand(1).getReg();
6316 Register Y = FMA->getOperand(2).getReg();
6317 Register U = FMulMI->getOperand(1).getReg();
6318 Register V = FMulMI->getOperand(2).getReg();
6319
6320 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6321 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6322 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6323 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6324 {X, Y, InnerFMA});
6325 };
6326 return true;
6327 }
6328
6329 return false;
6330}
6331
6334 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6335 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6336
6337 bool AllowFusionGlobally, HasFMAD, Aggressive;
6338 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6339 return false;
6340
6341 if (!Aggressive)
6342 return false;
6343
6344 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6345 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6346 Register Op1 = MI.getOperand(1).getReg();
6347 Register Op2 = MI.getOperand(2).getReg();
6348 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6349 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6350
6351 unsigned PreferredFusedOpcode =
6352 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6353
6354 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6355 // prefer to fold the multiply with fewer uses.
6356 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6357 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6358 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6359 std::swap(LHS, RHS);
6360 }
6361
6362 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6363 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6365 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6366 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6367 Register InnerFMA =
6368 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6369 .getReg(0);
6370 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6371 {X, Y, InnerFMA});
6372 };
6373
6374 MachineInstr *FMulMI, *FMAMI;
6375 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6376 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6377 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6378 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6379 m_GFPExt(m_MInstr(FMulMI))) &&
6380 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6381 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6382 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6383 MatchInfo = [=](MachineIRBuilder &B) {
6384 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6385 FMulMI->getOperand(2).getReg(), RHS.Reg,
6386 LHS.MI->getOperand(1).getReg(),
6387 LHS.MI->getOperand(2).getReg(), B);
6388 };
6389 return true;
6390 }
6391
6392 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6393 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6394 // FIXME: This turns two single-precision and one double-precision
6395 // operation into two double-precision operations, which might not be
6396 // interesting for all targets, especially GPUs.
6397 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6398 FMAMI->getOpcode() == PreferredFusedOpcode) {
6399 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6400 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6401 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6402 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6403 MatchInfo = [=](MachineIRBuilder &B) {
6404 Register X = FMAMI->getOperand(1).getReg();
6405 Register Y = FMAMI->getOperand(2).getReg();
6406 X = B.buildFPExt(DstType, X).getReg(0);
6407 Y = B.buildFPExt(DstType, Y).getReg(0);
6408 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6409 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6410 };
6411
6412 return true;
6413 }
6414 }
6415
6416 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6417 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6418 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6419 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6420 m_GFPExt(m_MInstr(FMulMI))) &&
6421 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6422 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6423 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6424 MatchInfo = [=](MachineIRBuilder &B) {
6425 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6426 FMulMI->getOperand(2).getReg(), LHS.Reg,
6427 RHS.MI->getOperand(1).getReg(),
6428 RHS.MI->getOperand(2).getReg(), B);
6429 };
6430 return true;
6431 }
6432
6433 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6434 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6435 // FIXME: This turns two single-precision and one double-precision
6436 // operation into two double-precision operations, which might not be
6437 // interesting for all targets, especially GPUs.
6438 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6439 FMAMI->getOpcode() == PreferredFusedOpcode) {
6440 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6441 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6442 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6443 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6444 MatchInfo = [=](MachineIRBuilder &B) {
6445 Register X = FMAMI->getOperand(1).getReg();
6446 Register Y = FMAMI->getOperand(2).getReg();
6447 X = B.buildFPExt(DstType, X).getReg(0);
6448 Y = B.buildFPExt(DstType, Y).getReg(0);
6449 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6450 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6451 };
6452 return true;
6453 }
6454 }
6455
6456 return false;
6457}
6458
6461 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6462 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6463
6464 bool AllowFusionGlobally, HasFMAD, Aggressive;
6465 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6466 return false;
6467
6468 Register Op1 = MI.getOperand(1).getReg();
6469 Register Op2 = MI.getOperand(2).getReg();
6470 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6471 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6472 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6473
6474 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6475 // prefer to fold the multiply with fewer uses.
6476 int FirstMulHasFewerUses = true;
6477 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6478 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6479 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6480 FirstMulHasFewerUses = false;
6481
6482 unsigned PreferredFusedOpcode =
6483 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6484
6485 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6486 if (FirstMulHasFewerUses &&
6487 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6488 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6489 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6490 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6491 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6492 {LHS.MI->getOperand(1).getReg(),
6493 LHS.MI->getOperand(2).getReg(), NegZ});
6494 };
6495 return true;
6496 }
6497 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6498 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6499 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6500 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6501 Register NegY =
6502 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6503 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6504 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6505 };
6506 return true;
6507 }
6508
6509 return false;
6510}
6511
6514 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6515 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6516
6517 bool AllowFusionGlobally, HasFMAD, Aggressive;
6518 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6519 return false;
6520
6521 Register LHSReg = MI.getOperand(1).getReg();
6522 Register RHSReg = MI.getOperand(2).getReg();
6523 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6524
6525 unsigned PreferredFusedOpcode =
6526 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6527
6528 MachineInstr *FMulMI;
6529 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6530 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6531 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6532 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6533 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6534 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6535 Register NegX =
6536 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6537 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6538 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6539 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6540 };
6541 return true;
6542 }
6543
6544 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6545 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6546 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6547 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6548 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6549 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6550 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6551 {FMulMI->getOperand(1).getReg(),
6552 FMulMI->getOperand(2).getReg(), LHSReg});
6553 };
6554 return true;
6555 }
6556
6557 return false;
6558}
6559
6562 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6563 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6564
6565 bool AllowFusionGlobally, HasFMAD, Aggressive;
6566 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6567 return false;
6568
6569 Register LHSReg = MI.getOperand(1).getReg();
6570 Register RHSReg = MI.getOperand(2).getReg();
6571 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6572
6573 unsigned PreferredFusedOpcode =
6574 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6575
6576 MachineInstr *FMulMI;
6577 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6578 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6579 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6580 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6581 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6582 Register FpExtX =
6583 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6584 Register FpExtY =
6585 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6586 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6587 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6588 {FpExtX, FpExtY, NegZ});
6589 };
6590 return true;
6591 }
6592
6593 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6594 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6595 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6596 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6597 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6598 Register FpExtY =
6599 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6600 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6601 Register FpExtZ =
6602 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6603 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6604 {NegY, FpExtZ, LHSReg});
6605 };
6606 return true;
6607 }
6608
6609 return false;
6610}
6611
6614 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6615 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6616
6617 bool AllowFusionGlobally, HasFMAD, Aggressive;
6618 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6619 return false;
6620
6621 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6622 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6623 Register LHSReg = MI.getOperand(1).getReg();
6624 Register RHSReg = MI.getOperand(2).getReg();
6625
6626 unsigned PreferredFusedOpcode =
6627 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6628
6629 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6631 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6632 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6633 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6634 };
6635
6636 MachineInstr *FMulMI;
6637 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6638 // (fneg (fma (fpext x), (fpext y), z))
6639 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6640 // (fneg (fma (fpext x), (fpext y), z))
6641 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6642 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6643 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6644 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6645 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6646 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6647 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6648 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6649 FMulMI->getOperand(2).getReg(), RHSReg, B);
6650 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6651 };
6652 return true;
6653 }
6654
6655 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6656 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6657 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6658 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6659 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6660 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6661 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6662 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6663 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6664 FMulMI->getOperand(2).getReg(), LHSReg, B);
6665 };
6666 return true;
6667 }
6668
6669 return false;
6670}
6671
6673 unsigned &IdxToPropagate) const {
6674 bool PropagateNaN;
6675 switch (MI.getOpcode()) {
6676 default:
6677 return false;
6678 case TargetOpcode::G_FMINNUM:
6679 case TargetOpcode::G_FMAXNUM:
6680 PropagateNaN = false;
6681 break;
6682 case TargetOpcode::G_FMINIMUM:
6683 case TargetOpcode::G_FMAXIMUM:
6684 PropagateNaN = true;
6685 break;
6686 }
6687
6688 auto MatchNaN = [&](unsigned Idx) {
6689 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6690 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6691 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6692 return false;
6693 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6694 return true;
6695 };
6696
6697 return MatchNaN(1) || MatchNaN(2);
6698}
6699
6700// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6701// reciprocal.
6702// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6704 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6705 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6706
6707 Register X = MI.getOperand(1).getReg();
6708 Register Y = MI.getOperand(2).getReg();
6709
6710 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6711 return false;
6712
6713 // Skip if current node is a reciprocal/fneg-reciprocal.
6714 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6715 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
6716 return false;
6717
6718 // Exit early if the target does not want this transform or if there can't
6719 // possibly be enough uses of the divisor to make the transform worthwhile.
6720 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6721 if (!MinUses)
6722 return false;
6723
6724 // Find all FDIV users of the same divisor. For the moment we limit all
6725 // instructions to a single BB and use the first Instr in MatchInfo as the
6726 // dominating position.
6727 MatchInfo.push_back(&MI);
6728 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6729 if (&U == &MI || U.getParent() != MI.getParent())
6730 continue;
6731 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6732 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
6733 // This division is eligible for optimization only if global unsafe math
6734 // is enabled or if this division allows reciprocal formation.
6735 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6736 MatchInfo.push_back(&U);
6737 if (dominates(U, *MatchInfo[0]))
6738 std::swap(MatchInfo[0], MatchInfo.back());
6739 }
6740 }
6741 }
6742
6743 // Now that we have the actual number of divisor uses, make sure it meets
6744 // the minimum threshold specified by the target.
6745 return MatchInfo.size() >= MinUses;
6746}
6747
6749 SmallVector<MachineInstr *> &MatchInfo) const {
6750 // Generate the new div at the position of the first instruction, that we have
6751 // ensured will dominate all other instructions.
6752 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6753 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6754 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6755 MatchInfo[0]->getOperand(2).getReg(),
6756 MatchInfo[0]->getFlags());
6757
6758 // Replace all found div's with fmul instructions.
6759 for (MachineInstr *MI : MatchInfo) {
6760 Builder.setInsertPt(*MI->getParent(), MI);
6761 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6762 Div->getOperand(0).getReg(), MI->getFlags());
6763 MI->eraseFromParent();
6764 }
6765}
6766
6768 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6769 Register LHS = MI.getOperand(1).getReg();
6770 Register RHS = MI.getOperand(2).getReg();
6771
6772 // Helper lambda to check for opportunities for
6773 // A + (B - A) -> B
6774 // (B - A) + A -> B
6775 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6776 Register Reg;
6777 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6778 Reg == MaybeSameReg;
6779 };
6780 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6781}
6782
6784 Register &MatchInfo) const {
6785 // This combine folds the following patterns:
6786 //
6787 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6788 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6789 // into
6790 // x
6791 // if
6792 // k == sizeof(VecEltTy)/2
6793 // type(x) == type(dst)
6794 //
6795 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6796 // into
6797 // x
6798 // if
6799 // type(x) == type(dst)
6800
6801 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6802 LLT DstEltTy = DstVecTy.getElementType();
6803
6804 Register Lo, Hi;
6805
6806 if (mi_match(
6807 MI, MRI,
6809 MatchInfo = Lo;
6810 return MRI.getType(MatchInfo) == DstVecTy;
6811 }
6812
6813 std::optional<ValueAndVReg> ShiftAmount;
6814 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6815 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6816 if (mi_match(
6817 MI, MRI,
6818 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6819 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6820 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6821 MatchInfo = Lo;
6822 return MRI.getType(MatchInfo) == DstVecTy;
6823 }
6824 }
6825
6826 return false;
6827}
6828
6830 Register &MatchInfo) const {
6831 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6832 // if type(x) == type(G_TRUNC)
6833 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6834 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6835 return false;
6836
6837 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6838}
6839
6841 Register &MatchInfo) const {
6842 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6843 // y if K == size of vector element type
6844 std::optional<ValueAndVReg> ShiftAmt;
6845 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6847 m_GCst(ShiftAmt))))
6848 return false;
6849
6850 LLT MatchTy = MRI.getType(MatchInfo);
6851 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6852 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6853}
6854
6855unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6856 CmpInst::Predicate Pred, LLT DstTy,
6857 SelectPatternNaNBehaviour VsNaNRetVal) const {
6858 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6859 "Expected a NaN behaviour?");
6860 // Choose an opcode based off of legality or the behaviour when one of the
6861 // LHS/RHS may be NaN.
6862 switch (Pred) {
6863 default:
6864 return 0;
6865 case CmpInst::FCMP_UGT:
6866 case CmpInst::FCMP_UGE:
6867 case CmpInst::FCMP_OGT:
6868 case CmpInst::FCMP_OGE:
6869 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6870 return TargetOpcode::G_FMAXNUM;
6871 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6872 return TargetOpcode::G_FMAXIMUM;
6873 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6874 return TargetOpcode::G_FMAXNUM;
6875 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6876 return TargetOpcode::G_FMAXIMUM;
6877 return 0;
6878 case CmpInst::FCMP_ULT:
6879 case CmpInst::FCMP_ULE:
6880 case CmpInst::FCMP_OLT:
6881 case CmpInst::FCMP_OLE:
6882 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6883 return TargetOpcode::G_FMINNUM;
6884 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6885 return TargetOpcode::G_FMINIMUM;
6886 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6887 return TargetOpcode::G_FMINNUM;
6888 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6889 return 0;
6890 return TargetOpcode::G_FMINIMUM;
6891 }
6892}
6893
6894CombinerHelper::SelectPatternNaNBehaviour
6895CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6896 bool IsOrderedComparison) const {
6897 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6898 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6899 // Completely unsafe.
6900 if (!LHSSafe && !RHSSafe)
6901 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6902 if (LHSSafe && RHSSafe)
6903 return SelectPatternNaNBehaviour::RETURNS_ANY;
6904 // An ordered comparison will return false when given a NaN, so it
6905 // returns the RHS.
6906 if (IsOrderedComparison)
6907 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6908 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6909 // An unordered comparison will return true when given a NaN, so it
6910 // returns the LHS.
6911 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6912 : SelectPatternNaNBehaviour::RETURNS_NAN;
6913}
6914
6915bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6916 Register TrueVal, Register FalseVal,
6917 BuildFnTy &MatchInfo) const {
6918 // Match: select (fcmp cond x, y) x, y
6919 // select (fcmp cond x, y) y, x
6920 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6921 LLT DstTy = MRI.getType(Dst);
6922 // Bail out early on pointers, since we'll never want to fold to a min/max.
6923 if (DstTy.isPointer())
6924 return false;
6925 // Match a floating point compare with a less-than/greater-than predicate.
6926 // TODO: Allow multiple users of the compare if they are all selects.
6927 CmpInst::Predicate Pred;
6928 Register CmpLHS, CmpRHS;
6929 if (!mi_match(Cond, MRI,
6931 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6932 CmpInst::isEquality(Pred))
6933 return false;
6934 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6935 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6936 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6937 return false;
6938 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6939 std::swap(CmpLHS, CmpRHS);
6940 Pred = CmpInst::getSwappedPredicate(Pred);
6941 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6942 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6943 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6944 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6945 }
6946 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6947 return false;
6948 // Decide what type of max/min this should be based off of the predicate.
6949 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6950 if (!Opc || !isLegal({Opc, {DstTy}}))
6951 return false;
6952 // Comparisons between signed zero and zero may have different results...
6953 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6954 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6955 // We don't know if a comparison between two 0s will give us a consistent
6956 // result. Be conservative and only proceed if at least one side is
6957 // non-zero.
6958 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6959 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6960 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6961 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6962 return false;
6963 }
6964 }
6965 MatchInfo = [=](MachineIRBuilder &B) {
6966 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6967 };
6968 return true;
6969}
6970
6972 BuildFnTy &MatchInfo) const {
6973 // TODO: Handle integer cases.
6974 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6975 // Condition may be fed by a truncated compare.
6976 Register Cond = MI.getOperand(1).getReg();
6977 Register MaybeTrunc;
6978 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6979 Cond = MaybeTrunc;
6980 Register Dst = MI.getOperand(0).getReg();
6981 Register TrueVal = MI.getOperand(2).getReg();
6982 Register FalseVal = MI.getOperand(3).getReg();
6983 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6984}
6985
6987 BuildFnTy &MatchInfo) const {
6988 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6989 // (X + Y) == X --> Y == 0
6990 // (X + Y) != X --> Y != 0
6991 // (X - Y) == X --> Y == 0
6992 // (X - Y) != X --> Y != 0
6993 // (X ^ Y) == X --> Y == 0
6994 // (X ^ Y) != X --> Y != 0
6995 Register Dst = MI.getOperand(0).getReg();
6996 CmpInst::Predicate Pred;
6997 Register X, Y, OpLHS, OpRHS;
6998 bool MatchedSub = mi_match(
6999 Dst, MRI,
7000 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7001 if (MatchedSub && X != OpLHS)
7002 return false;
7003 if (!MatchedSub) {
7004 if (!mi_match(Dst, MRI,
7005 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7006 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7007 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7008 return false;
7009 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7010 }
7011 MatchInfo = [=](MachineIRBuilder &B) {
7012 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7013 B.buildICmp(Pred, Dst, Y, Zero);
7014 };
7015 return CmpInst::isEquality(Pred) && Y.isValid();
7016}
7017
7018/// Return the minimum useless shift amount that results in complete loss of the
7019/// source value. Return std::nullopt when it cannot determine a value.
7020static std::optional<unsigned>
7021getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7022 std::optional<int64_t> &Result) {
7023 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7024 Opcode == TargetOpcode::G_ASHR) &&
7025 "Expect G_SHL, G_LSHR or G_ASHR.");
7026 auto SignificantBits = 0;
7027 switch (Opcode) {
7028 case TargetOpcode::G_SHL:
7029 SignificantBits = ValueKB.countMinTrailingZeros();
7030 Result = 0;
7031 break;
7032 case TargetOpcode::G_LSHR:
7033 Result = 0;
7034 SignificantBits = ValueKB.countMinLeadingZeros();
7035 break;
7036 case TargetOpcode::G_ASHR:
7037 if (ValueKB.isNonNegative()) {
7038 SignificantBits = ValueKB.countMinLeadingZeros();
7039 Result = 0;
7040 } else if (ValueKB.isNegative()) {
7041 SignificantBits = ValueKB.countMinLeadingOnes();
7042 Result = -1;
7043 } else {
7044 // Cannot determine shift result.
7045 Result = std::nullopt;
7046 }
7047 break;
7048 default:
7049 break;
7050 }
7051 return ValueKB.getBitWidth() - SignificantBits;
7052}
7053
7055 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7056 Register ShiftVal = MI.getOperand(1).getReg();
7057 Register ShiftReg = MI.getOperand(2).getReg();
7058 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7059 auto IsShiftTooBig = [&](const Constant *C) {
7060 auto *CI = dyn_cast<ConstantInt>(C);
7061 if (!CI)
7062 return false;
7063 if (CI->uge(ResTy.getScalarSizeInBits())) {
7064 MatchInfo = std::nullopt;
7065 return true;
7066 }
7067 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7068 MI.getOpcode(), MatchInfo);
7069 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7070 };
7071 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7072}
7073
7075 unsigned LHSOpndIdx = 1;
7076 unsigned RHSOpndIdx = 2;
7077 switch (MI.getOpcode()) {
7078 case TargetOpcode::G_UADDO:
7079 case TargetOpcode::G_SADDO:
7080 case TargetOpcode::G_UMULO:
7081 case TargetOpcode::G_SMULO:
7082 LHSOpndIdx = 2;
7083 RHSOpndIdx = 3;
7084 break;
7085 default:
7086 break;
7087 }
7088 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7089 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7090 if (!getIConstantVRegVal(LHS, MRI)) {
7091 // Skip commuting if LHS is not a constant. But, LHS may be a
7092 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7093 // have a constant on the RHS.
7094 if (MRI.getVRegDef(LHS)->getOpcode() !=
7095 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7096 return false;
7097 }
7098 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7099 return MRI.getVRegDef(RHS)->getOpcode() !=
7100 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7101 !getIConstantVRegVal(RHS, MRI);
7102}
7103
7105 Register LHS = MI.getOperand(1).getReg();
7106 Register RHS = MI.getOperand(2).getReg();
7107 std::optional<FPValueAndVReg> ValAndVReg;
7108 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7109 return false;
7110 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7111}
7112
7114 Observer.changingInstr(MI);
7115 unsigned LHSOpndIdx = 1;
7116 unsigned RHSOpndIdx = 2;
7117 switch (MI.getOpcode()) {
7118 case TargetOpcode::G_UADDO:
7119 case TargetOpcode::G_SADDO:
7120 case TargetOpcode::G_UMULO:
7121 case TargetOpcode::G_SMULO:
7122 LHSOpndIdx = 2;
7123 RHSOpndIdx = 3;
7124 break;
7125 default:
7126 break;
7127 }
7128 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7129 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7130 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7131 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7132 Observer.changedInstr(MI);
7133}
7134
7135bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7136 LLT SrcTy = MRI.getType(Src);
7137 if (SrcTy.isFixedVector())
7138 return isConstantSplatVector(Src, 1, AllowUndefs);
7139 if (SrcTy.isScalar()) {
7140 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7141 return true;
7142 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7143 return IConstant && IConstant->Value == 1;
7144 }
7145 return false; // scalable vector
7146}
7147
7148bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7149 LLT SrcTy = MRI.getType(Src);
7150 if (SrcTy.isFixedVector())
7151 return isConstantSplatVector(Src, 0, AllowUndefs);
7152 if (SrcTy.isScalar()) {
7153 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7154 return true;
7155 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7156 return IConstant && IConstant->Value == 0;
7157 }
7158 return false; // scalable vector
7159}
7160
7161// Ignores COPYs during conformance checks.
7162// FIXME scalable vectors.
7163bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7164 bool AllowUndefs) const {
7165 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7166 if (!BuildVector)
7167 return false;
7168 unsigned NumSources = BuildVector->getNumSources();
7169
7170 for (unsigned I = 0; I < NumSources; ++I) {
7171 GImplicitDef *ImplicitDef =
7173 if (ImplicitDef && AllowUndefs)
7174 continue;
7175 if (ImplicitDef && !AllowUndefs)
7176 return false;
7177 std::optional<ValueAndVReg> IConstant =
7179 if (IConstant && IConstant->Value == SplatValue)
7180 continue;
7181 return false;
7182 }
7183 return true;
7184}
7185
7186// Ignores COPYs during lookups.
7187// FIXME scalable vectors
7188std::optional<APInt>
7189CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7190 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7191 if (IConstant)
7192 return IConstant->Value;
7193
7194 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7195 if (!BuildVector)
7196 return std::nullopt;
7197 unsigned NumSources = BuildVector->getNumSources();
7198
7199 std::optional<APInt> Value = std::nullopt;
7200 for (unsigned I = 0; I < NumSources; ++I) {
7201 std::optional<ValueAndVReg> IConstant =
7203 if (!IConstant)
7204 return std::nullopt;
7205 if (!Value)
7206 Value = IConstant->Value;
7207 else if (*Value != IConstant->Value)
7208 return std::nullopt;
7209 }
7210 return Value;
7211}
7212
7213// FIXME G_SPLAT_VECTOR
7214bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7215 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7216 if (IConstant)
7217 return true;
7218
7219 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7220 if (!BuildVector)
7221 return false;
7222
7223 unsigned NumSources = BuildVector->getNumSources();
7224 for (unsigned I = 0; I < NumSources; ++I) {
7225 std::optional<ValueAndVReg> IConstant =
7227 if (!IConstant)
7228 return false;
7229 }
7230 return true;
7231}
7232
7233// TODO: use knownbits to determine zeros
7234bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7235 BuildFnTy &MatchInfo) const {
7236 uint32_t Flags = Select->getFlags();
7237 Register Dest = Select->getReg(0);
7238 Register Cond = Select->getCondReg();
7239 Register True = Select->getTrueReg();
7240 Register False = Select->getFalseReg();
7241 LLT CondTy = MRI.getType(Select->getCondReg());
7242 LLT TrueTy = MRI.getType(Select->getTrueReg());
7243
7244 // We only do this combine for scalar boolean conditions.
7245 if (CondTy != LLT::scalar(1))
7246 return false;
7247
7248 if (TrueTy.isPointer())
7249 return false;
7250
7251 // Both are scalars.
7252 std::optional<ValueAndVReg> TrueOpt =
7254 std::optional<ValueAndVReg> FalseOpt =
7256
7257 if (!TrueOpt || !FalseOpt)
7258 return false;
7259
7260 APInt TrueValue = TrueOpt->Value;
7261 APInt FalseValue = FalseOpt->Value;
7262
7263 // select Cond, 1, 0 --> zext (Cond)
7264 if (TrueValue.isOne() && FalseValue.isZero()) {
7265 MatchInfo = [=](MachineIRBuilder &B) {
7266 B.setInstrAndDebugLoc(*Select);
7267 B.buildZExtOrTrunc(Dest, Cond);
7268 };
7269 return true;
7270 }
7271
7272 // select Cond, -1, 0 --> sext (Cond)
7273 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7274 MatchInfo = [=](MachineIRBuilder &B) {
7275 B.setInstrAndDebugLoc(*Select);
7276 B.buildSExtOrTrunc(Dest, Cond);
7277 };
7278 return true;
7279 }
7280
7281 // select Cond, 0, 1 --> zext (!Cond)
7282 if (TrueValue.isZero() && FalseValue.isOne()) {
7283 MatchInfo = [=](MachineIRBuilder &B) {
7284 B.setInstrAndDebugLoc(*Select);
7285 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7286 B.buildNot(Inner, Cond);
7287 B.buildZExtOrTrunc(Dest, Inner);
7288 };
7289 return true;
7290 }
7291
7292 // select Cond, 0, -1 --> sext (!Cond)
7293 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7294 MatchInfo = [=](MachineIRBuilder &B) {
7295 B.setInstrAndDebugLoc(*Select);
7296 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7297 B.buildNot(Inner, Cond);
7298 B.buildSExtOrTrunc(Dest, Inner);
7299 };
7300 return true;
7301 }
7302
7303 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7304 if (TrueValue - 1 == FalseValue) {
7305 MatchInfo = [=](MachineIRBuilder &B) {
7306 B.setInstrAndDebugLoc(*Select);
7307 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7308 B.buildZExtOrTrunc(Inner, Cond);
7309 B.buildAdd(Dest, Inner, False);
7310 };
7311 return true;
7312 }
7313
7314 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7315 if (TrueValue + 1 == FalseValue) {
7316 MatchInfo = [=](MachineIRBuilder &B) {
7317 B.setInstrAndDebugLoc(*Select);
7318 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7319 B.buildSExtOrTrunc(Inner, Cond);
7320 B.buildAdd(Dest, Inner, False);
7321 };
7322 return true;
7323 }
7324
7325 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7326 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7327 MatchInfo = [=](MachineIRBuilder &B) {
7328 B.setInstrAndDebugLoc(*Select);
7329 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7330 B.buildZExtOrTrunc(Inner, Cond);
7331 // The shift amount must be scalar.
7332 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7333 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7334 B.buildShl(Dest, Inner, ShAmtC, Flags);
7335 };
7336 return true;
7337 }
7338
7339 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7340 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7341 MatchInfo = [=](MachineIRBuilder &B) {
7342 B.setInstrAndDebugLoc(*Select);
7343 Register Not = MRI.createGenericVirtualRegister(CondTy);
7344 B.buildNot(Not, Cond);
7345 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7346 B.buildZExtOrTrunc(Inner, Not);
7347 // The shift amount must be scalar.
7348 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7349 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7350 B.buildShl(Dest, Inner, ShAmtC, Flags);
7351 };
7352 return true;
7353 }
7354
7355 // select Cond, -1, C --> or (sext Cond), C
7356 if (TrueValue.isAllOnes()) {
7357 MatchInfo = [=](MachineIRBuilder &B) {
7358 B.setInstrAndDebugLoc(*Select);
7359 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7360 B.buildSExtOrTrunc(Inner, Cond);
7361 B.buildOr(Dest, Inner, False, Flags);
7362 };
7363 return true;
7364 }
7365
7366 // select Cond, C, -1 --> or (sext (not Cond)), C
7367 if (FalseValue.isAllOnes()) {
7368 MatchInfo = [=](MachineIRBuilder &B) {
7369 B.setInstrAndDebugLoc(*Select);
7370 Register Not = MRI.createGenericVirtualRegister(CondTy);
7371 B.buildNot(Not, Cond);
7372 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7373 B.buildSExtOrTrunc(Inner, Not);
7374 B.buildOr(Dest, Inner, True, Flags);
7375 };
7376 return true;
7377 }
7378
7379 return false;
7380}
7381
7382// TODO: use knownbits to determine zeros
7383bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7384 BuildFnTy &MatchInfo) const {
7385 uint32_t Flags = Select->getFlags();
7386 Register DstReg = Select->getReg(0);
7387 Register Cond = Select->getCondReg();
7388 Register True = Select->getTrueReg();
7389 Register False = Select->getFalseReg();
7390 LLT CondTy = MRI.getType(Select->getCondReg());
7391 LLT TrueTy = MRI.getType(Select->getTrueReg());
7392
7393 // Boolean or fixed vector of booleans.
7394 if (CondTy.isScalableVector() ||
7395 (CondTy.isFixedVector() &&
7396 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7397 CondTy.getScalarSizeInBits() != 1)
7398 return false;
7399
7400 if (CondTy != TrueTy)
7401 return false;
7402
7403 // select Cond, Cond, F --> or Cond, F
7404 // select Cond, 1, F --> or Cond, F
7405 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7406 MatchInfo = [=](MachineIRBuilder &B) {
7407 B.setInstrAndDebugLoc(*Select);
7408 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7409 B.buildZExtOrTrunc(Ext, Cond);
7410 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7411 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7412 };
7413 return true;
7414 }
7415
7416 // select Cond, T, Cond --> and Cond, T
7417 // select Cond, T, 0 --> and Cond, T
7418 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7419 MatchInfo = [=](MachineIRBuilder &B) {
7420 B.setInstrAndDebugLoc(*Select);
7421 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7422 B.buildZExtOrTrunc(Ext, Cond);
7423 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7424 B.buildAnd(DstReg, Ext, FreezeTrue);
7425 };
7426 return true;
7427 }
7428
7429 // select Cond, T, 1 --> or (not Cond), T
7430 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7431 MatchInfo = [=](MachineIRBuilder &B) {
7432 B.setInstrAndDebugLoc(*Select);
7433 // First the not.
7434 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7435 B.buildNot(Inner, Cond);
7436 // Then an ext to match the destination register.
7437 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7438 B.buildZExtOrTrunc(Ext, Inner);
7439 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7440 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7441 };
7442 return true;
7443 }
7444
7445 // select Cond, 0, F --> and (not Cond), F
7446 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7447 MatchInfo = [=](MachineIRBuilder &B) {
7448 B.setInstrAndDebugLoc(*Select);
7449 // First the not.
7450 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7451 B.buildNot(Inner, Cond);
7452 // Then an ext to match the destination register.
7453 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7454 B.buildZExtOrTrunc(Ext, Inner);
7455 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7456 B.buildAnd(DstReg, Ext, FreezeFalse);
7457 };
7458 return true;
7459 }
7460
7461 return false;
7462}
7463
7465 BuildFnTy &MatchInfo) const {
7466 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7467 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7468
7469 Register DstReg = Select->getReg(0);
7470 Register True = Select->getTrueReg();
7471 Register False = Select->getFalseReg();
7472 LLT DstTy = MRI.getType(DstReg);
7473
7474 if (DstTy.isPointer())
7475 return false;
7476
7477 // We want to fold the icmp and replace the select.
7478 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7479 return false;
7480
7481 CmpInst::Predicate Pred = Cmp->getCond();
7482 // We need a larger or smaller predicate for
7483 // canonicalization.
7484 if (CmpInst::isEquality(Pred))
7485 return false;
7486
7487 Register CmpLHS = Cmp->getLHSReg();
7488 Register CmpRHS = Cmp->getRHSReg();
7489
7490 // We can swap CmpLHS and CmpRHS for higher hitrate.
7491 if (True == CmpRHS && False == CmpLHS) {
7492 std::swap(CmpLHS, CmpRHS);
7493 Pred = CmpInst::getSwappedPredicate(Pred);
7494 }
7495
7496 // (icmp X, Y) ? X : Y -> integer minmax.
7497 // see matchSelectPattern in ValueTracking.
7498 // Legality between G_SELECT and integer minmax can differ.
7499 if (True != CmpLHS || False != CmpRHS)
7500 return false;
7501
7502 switch (Pred) {
7503 case ICmpInst::ICMP_UGT:
7504 case ICmpInst::ICMP_UGE: {
7505 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7506 return false;
7507 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7508 return true;
7509 }
7510 case ICmpInst::ICMP_SGT:
7511 case ICmpInst::ICMP_SGE: {
7512 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7513 return false;
7514 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7515 return true;
7516 }
7517 case ICmpInst::ICMP_ULT:
7518 case ICmpInst::ICMP_ULE: {
7519 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7520 return false;
7521 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7522 return true;
7523 }
7524 case ICmpInst::ICMP_SLT:
7525 case ICmpInst::ICMP_SLE: {
7526 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7527 return false;
7528 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7529 return true;
7530 }
7531 default:
7532 return false;
7533 }
7534}
7535
7536// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7538 BuildFnTy &MatchInfo) const {
7539 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7540 Register DestReg = MI.getOperand(0).getReg();
7541 LLT DestTy = MRI.getType(DestReg);
7542
7543 Register X;
7544 Register Sub0;
7545 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7546 if (mi_match(DestReg, MRI,
7547 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7548 m_GSMax(m_Reg(X), NegPattern),
7549 m_GUMin(m_Reg(X), NegPattern),
7550 m_GUMax(m_Reg(X), NegPattern)))))) {
7551 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7552 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7553 if (isLegal({NewOpc, {DestTy}})) {
7554 MatchInfo = [=](MachineIRBuilder &B) {
7555 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7556 };
7557 return true;
7558 }
7559 }
7560
7561 return false;
7562}
7563
7566
7567 if (tryFoldSelectOfConstants(Select, MatchInfo))
7568 return true;
7569
7570 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7571 return true;
7572
7573 return false;
7574}
7575
7576/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7577/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7578/// into a single comparison using range-based reasoning.
7579/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7580bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7581 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7582 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7583 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7584 Register DstReg = Logic->getReg(0);
7585 Register LHS = Logic->getLHSReg();
7586 Register RHS = Logic->getRHSReg();
7587 unsigned Flags = Logic->getFlags();
7588
7589 // We need an G_ICMP on the LHS register.
7590 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7591 if (!Cmp1)
7592 return false;
7593
7594 // We need an G_ICMP on the RHS register.
7595 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7596 if (!Cmp2)
7597 return false;
7598
7599 // We want to fold the icmps.
7600 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7601 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7602 return false;
7603
7604 APInt C1;
7605 APInt C2;
7606 std::optional<ValueAndVReg> MaybeC1 =
7608 if (!MaybeC1)
7609 return false;
7610 C1 = MaybeC1->Value;
7611
7612 std::optional<ValueAndVReg> MaybeC2 =
7614 if (!MaybeC2)
7615 return false;
7616 C2 = MaybeC2->Value;
7617
7618 Register R1 = Cmp1->getLHSReg();
7619 Register R2 = Cmp2->getLHSReg();
7620 CmpInst::Predicate Pred1 = Cmp1->getCond();
7621 CmpInst::Predicate Pred2 = Cmp2->getCond();
7622 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7623 LLT CmpOperandTy = MRI.getType(R1);
7624
7625 if (CmpOperandTy.isPointer())
7626 return false;
7627
7628 // We build ands, adds, and constants of type CmpOperandTy.
7629 // They must be legal to build.
7630 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7631 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7632 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7633 return false;
7634
7635 // Look through add of a constant offset on R1, R2, or both operands. This
7636 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7637 std::optional<APInt> Offset1;
7638 std::optional<APInt> Offset2;
7639 if (R1 != R2) {
7640 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7641 std::optional<ValueAndVReg> MaybeOffset1 =
7643 if (MaybeOffset1) {
7644 R1 = Add->getLHSReg();
7645 Offset1 = MaybeOffset1->Value;
7646 }
7647 }
7648 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7649 std::optional<ValueAndVReg> MaybeOffset2 =
7651 if (MaybeOffset2) {
7652 R2 = Add->getLHSReg();
7653 Offset2 = MaybeOffset2->Value;
7654 }
7655 }
7656 }
7657
7658 if (R1 != R2)
7659 return false;
7660
7661 // We calculate the icmp ranges including maybe offsets.
7662 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7663 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7664 if (Offset1)
7665 CR1 = CR1.subtract(*Offset1);
7666
7667 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7668 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7669 if (Offset2)
7670 CR2 = CR2.subtract(*Offset2);
7671
7672 bool CreateMask = false;
7673 APInt LowerDiff;
7674 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7675 if (!CR) {
7676 // We need non-wrapping ranges.
7677 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7678 return false;
7679
7680 // Check whether we have equal-size ranges that only differ by one bit.
7681 // In that case we can apply a mask to map one range onto the other.
7682 LowerDiff = CR1.getLower() ^ CR2.getLower();
7683 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7684 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7685 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7686 CR1Size != CR2.getUpper() - CR2.getLower())
7687 return false;
7688
7689 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7690 CreateMask = true;
7691 }
7692
7693 if (IsAnd)
7694 CR = CR->inverse();
7695
7696 CmpInst::Predicate NewPred;
7697 APInt NewC, Offset;
7698 CR->getEquivalentICmp(NewPred, NewC, Offset);
7699
7700 // We take the result type of one of the original icmps, CmpTy, for
7701 // the to be build icmp. The operand type, CmpOperandTy, is used for
7702 // the other instructions and constants to be build. The types of
7703 // the parameters and output are the same for add and and. CmpTy
7704 // and the type of DstReg might differ. That is why we zext or trunc
7705 // the icmp into the destination register.
7706
7707 MatchInfo = [=](MachineIRBuilder &B) {
7708 if (CreateMask && Offset != 0) {
7709 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7710 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7711 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7712 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7713 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7714 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7715 B.buildZExtOrTrunc(DstReg, ICmp);
7716 } else if (CreateMask && Offset == 0) {
7717 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7718 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7719 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7720 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7721 B.buildZExtOrTrunc(DstReg, ICmp);
7722 } else if (!CreateMask && Offset != 0) {
7723 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7724 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7725 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7726 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7727 B.buildZExtOrTrunc(DstReg, ICmp);
7728 } else if (!CreateMask && Offset == 0) {
7729 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7730 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7731 B.buildZExtOrTrunc(DstReg, ICmp);
7732 } else {
7733 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7734 }
7735 };
7736 return true;
7737}
7738
7739bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7740 BuildFnTy &MatchInfo) const {
7741 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7742 Register DestReg = Logic->getReg(0);
7743 Register LHS = Logic->getLHSReg();
7744 Register RHS = Logic->getRHSReg();
7745 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7746
7747 // We need a compare on the LHS register.
7748 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7749 if (!Cmp1)
7750 return false;
7751
7752 // We need a compare on the RHS register.
7753 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7754 if (!Cmp2)
7755 return false;
7756
7757 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7758 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7759
7760 // We build one fcmp, want to fold the fcmps, replace the logic op,
7761 // and the fcmps must have the same shape.
7763 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7764 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7765 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7766 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7767 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7768 return false;
7769
7770 CmpInst::Predicate PredL = Cmp1->getCond();
7771 CmpInst::Predicate PredR = Cmp2->getCond();
7772 Register LHS0 = Cmp1->getLHSReg();
7773 Register LHS1 = Cmp1->getRHSReg();
7774 Register RHS0 = Cmp2->getLHSReg();
7775 Register RHS1 = Cmp2->getRHSReg();
7776
7777 if (LHS0 == RHS1 && LHS1 == RHS0) {
7778 // Swap RHS operands to match LHS.
7779 PredR = CmpInst::getSwappedPredicate(PredR);
7780 std::swap(RHS0, RHS1);
7781 }
7782
7783 if (LHS0 == RHS0 && LHS1 == RHS1) {
7784 // We determine the new predicate.
7785 unsigned CmpCodeL = getFCmpCode(PredL);
7786 unsigned CmpCodeR = getFCmpCode(PredR);
7787 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7788 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7789 MatchInfo = [=](MachineIRBuilder &B) {
7790 // The fcmp predicates fill the lower part of the enum.
7791 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7792 if (Pred == FCmpInst::FCMP_FALSE &&
7794 auto False = B.buildConstant(CmpTy, 0);
7795 B.buildZExtOrTrunc(DestReg, False);
7796 } else if (Pred == FCmpInst::FCMP_TRUE &&
7798 auto True =
7799 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7800 CmpTy.isVector() /*isVector*/,
7801 true /*isFP*/));
7802 B.buildZExtOrTrunc(DestReg, True);
7803 } else { // We take the predicate without predicate optimizations.
7804 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7805 B.buildZExtOrTrunc(DestReg, Cmp);
7806 }
7807 };
7808 return true;
7809 }
7810
7811 return false;
7812}
7813
7815 GAnd *And = cast<GAnd>(&MI);
7816
7817 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7818 return true;
7819
7820 if (tryFoldLogicOfFCmps(And, MatchInfo))
7821 return true;
7822
7823 return false;
7824}
7825
7827 GOr *Or = cast<GOr>(&MI);
7828
7829 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7830 return true;
7831
7832 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7833 return true;
7834
7835 return false;
7836}
7837
7839 BuildFnTy &MatchInfo) const {
7841
7842 // Addo has no flags
7843 Register Dst = Add->getReg(0);
7844 Register Carry = Add->getReg(1);
7845 Register LHS = Add->getLHSReg();
7846 Register RHS = Add->getRHSReg();
7847 bool IsSigned = Add->isSigned();
7848 LLT DstTy = MRI.getType(Dst);
7849 LLT CarryTy = MRI.getType(Carry);
7850
7851 // Fold addo, if the carry is dead -> add, undef.
7852 if (MRI.use_nodbg_empty(Carry) &&
7853 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7854 MatchInfo = [=](MachineIRBuilder &B) {
7855 B.buildAdd(Dst, LHS, RHS);
7856 B.buildUndef(Carry);
7857 };
7858 return true;
7859 }
7860
7861 // Canonicalize constant to RHS.
7862 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7863 if (IsSigned) {
7864 MatchInfo = [=](MachineIRBuilder &B) {
7865 B.buildSAddo(Dst, Carry, RHS, LHS);
7866 };
7867 return true;
7868 }
7869 // !IsSigned
7870 MatchInfo = [=](MachineIRBuilder &B) {
7871 B.buildUAddo(Dst, Carry, RHS, LHS);
7872 };
7873 return true;
7874 }
7875
7876 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7877 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7878
7879 // Fold addo(c1, c2) -> c3, carry.
7880 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7882 bool Overflow;
7883 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7884 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7885 MatchInfo = [=](MachineIRBuilder &B) {
7886 B.buildConstant(Dst, Result);
7887 B.buildConstant(Carry, Overflow);
7888 };
7889 return true;
7890 }
7891
7892 // Fold (addo x, 0) -> x, no carry
7893 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7894 MatchInfo = [=](MachineIRBuilder &B) {
7895 B.buildCopy(Dst, LHS);
7896 B.buildConstant(Carry, 0);
7897 };
7898 return true;
7899 }
7900
7901 // Given 2 constant operands whose sum does not overflow:
7902 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7903 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7904 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7905 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7906 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7907 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7908 std::optional<APInt> MaybeAddRHS =
7909 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7910 if (MaybeAddRHS) {
7911 bool Overflow;
7912 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7913 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7914 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7915 if (IsSigned) {
7916 MatchInfo = [=](MachineIRBuilder &B) {
7917 auto ConstRHS = B.buildConstant(DstTy, NewC);
7918 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7919 };
7920 return true;
7921 }
7922 // !IsSigned
7923 MatchInfo = [=](MachineIRBuilder &B) {
7924 auto ConstRHS = B.buildConstant(DstTy, NewC);
7925 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7926 };
7927 return true;
7928 }
7929 }
7930 };
7931
7932 // We try to combine addo to non-overflowing add.
7933 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7935 return false;
7936
7937 // We try to combine uaddo to non-overflowing add.
7938 if (!IsSigned) {
7939 ConstantRange CRLHS =
7940 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
7941 ConstantRange CRRHS =
7942 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
7943
7944 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7946 return false;
7948 MatchInfo = [=](MachineIRBuilder &B) {
7949 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7950 B.buildConstant(Carry, 0);
7951 };
7952 return true;
7953 }
7956 MatchInfo = [=](MachineIRBuilder &B) {
7957 B.buildAdd(Dst, LHS, RHS);
7958 B.buildConstant(Carry, 1);
7959 };
7960 return true;
7961 }
7962 }
7963 return false;
7964 }
7965
7966 // We try to combine saddo to non-overflowing add.
7967
7968 // If LHS and RHS each have at least two sign bits, then there is no signed
7969 // overflow.
7970 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
7971 MatchInfo = [=](MachineIRBuilder &B) {
7972 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7973 B.buildConstant(Carry, 0);
7974 };
7975 return true;
7976 }
7977
7978 ConstantRange CRLHS =
7979 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
7980 ConstantRange CRRHS =
7981 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
7982
7983 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7985 return false;
7987 MatchInfo = [=](MachineIRBuilder &B) {
7988 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7989 B.buildConstant(Carry, 0);
7990 };
7991 return true;
7992 }
7995 MatchInfo = [=](MachineIRBuilder &B) {
7996 B.buildAdd(Dst, LHS, RHS);
7997 B.buildConstant(Carry, 1);
7998 };
7999 return true;
8000 }
8001 }
8002
8003 return false;
8004}
8005
8007 BuildFnTy &MatchInfo) const {
8009 MatchInfo(Builder);
8010 Root->eraseFromParent();
8011}
8012
8014 int64_t Exponent) const {
8015 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8017}
8018
8020 int64_t Exponent) const {
8021 auto [Dst, Base] = MI.getFirst2Regs();
8022 LLT Ty = MRI.getType(Dst);
8023 int64_t ExpVal = Exponent;
8024
8025 if (ExpVal == 0) {
8026 Builder.buildFConstant(Dst, 1.0);
8027 MI.removeFromParent();
8028 return;
8029 }
8030
8031 if (ExpVal < 0)
8032 ExpVal = -ExpVal;
8033
8034 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8035 // to generate the multiply sequence. There are more optimal ways to do this
8036 // (for example, powi(x,15) generates one more multiply than it should), but
8037 // this has the benefit of being both really simple and much better than a
8038 // libcall.
8039 std::optional<SrcOp> Res;
8040 SrcOp CurSquare = Base;
8041 while (ExpVal > 0) {
8042 if (ExpVal & 1) {
8043 if (!Res)
8044 Res = CurSquare;
8045 else
8046 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8047 }
8048
8049 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8050 ExpVal >>= 1;
8051 }
8052
8053 // If the original exponent was negative, invert the result, producing
8054 // 1/(x*x*x).
8055 if (Exponent < 0)
8056 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8057 MI.getFlags());
8058
8059 Builder.buildCopy(Dst, *Res);
8060 MI.eraseFromParent();
8061}
8062
8064 BuildFnTy &MatchInfo) const {
8065 // fold (A+C1)-C2 -> A+(C1-C2)
8066 const GSub *Sub = cast<GSub>(&MI);
8067 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8068
8069 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8070 return false;
8071
8072 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8073 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8074
8075 Register Dst = Sub->getReg(0);
8076 LLT DstTy = MRI.getType(Dst);
8077
8078 MatchInfo = [=](MachineIRBuilder &B) {
8079 auto Const = B.buildConstant(DstTy, C1 - C2);
8080 B.buildAdd(Dst, Add->getLHSReg(), Const);
8081 };
8082
8083 return true;
8084}
8085
8087 BuildFnTy &MatchInfo) const {
8088 // fold C2-(A+C1) -> (C2-C1)-A
8089 const GSub *Sub = cast<GSub>(&MI);
8090 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8091
8092 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8093 return false;
8094
8095 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8096 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8097
8098 Register Dst = Sub->getReg(0);
8099 LLT DstTy = MRI.getType(Dst);
8100
8101 MatchInfo = [=](MachineIRBuilder &B) {
8102 auto Const = B.buildConstant(DstTy, C2 - C1);
8103 B.buildSub(Dst, Const, Add->getLHSReg());
8104 };
8105
8106 return true;
8107}
8108
8110 BuildFnTy &MatchInfo) const {
8111 // fold (A-C1)-C2 -> A-(C1+C2)
8112 const GSub *Sub1 = cast<GSub>(&MI);
8113 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8114
8115 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8116 return false;
8117
8118 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8119 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8120
8121 Register Dst = Sub1->getReg(0);
8122 LLT DstTy = MRI.getType(Dst);
8123
8124 MatchInfo = [=](MachineIRBuilder &B) {
8125 auto Const = B.buildConstant(DstTy, C1 + C2);
8126 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8127 };
8128
8129 return true;
8130}
8131
8133 BuildFnTy &MatchInfo) const {
8134 // fold (C1-A)-C2 -> (C1-C2)-A
8135 const GSub *Sub1 = cast<GSub>(&MI);
8136 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8137
8138 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8139 return false;
8140
8141 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8142 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8143
8144 Register Dst = Sub1->getReg(0);
8145 LLT DstTy = MRI.getType(Dst);
8146
8147 MatchInfo = [=](MachineIRBuilder &B) {
8148 auto Const = B.buildConstant(DstTy, C1 - C2);
8149 B.buildSub(Dst, Const, Sub2->getRHSReg());
8150 };
8151
8152 return true;
8153}
8154
8156 BuildFnTy &MatchInfo) const {
8157 // fold ((A-C1)+C2) -> (A+(C2-C1))
8158 const GAdd *Add = cast<GAdd>(&MI);
8159 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8160
8161 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8162 return false;
8163
8164 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8165 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8166
8167 Register Dst = Add->getReg(0);
8168 LLT DstTy = MRI.getType(Dst);
8169
8170 MatchInfo = [=](MachineIRBuilder &B) {
8171 auto Const = B.buildConstant(DstTy, C2 - C1);
8172 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8173 };
8174
8175 return true;
8176}
8177
8179 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8180 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8181
8182 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8183 return false;
8184
8185 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8186
8187 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8188
8189 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8190 // $any:_(<8 x s16>) = G_ANYEXT $bv
8191 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8192 //
8193 // ->
8194 //
8195 // $any:_(s16) = G_ANYEXT $bv[0]
8196 // $any1:_(s16) = G_ANYEXT $bv[1]
8197 // $any2:_(s16) = G_ANYEXT $bv[2]
8198 // $any3:_(s16) = G_ANYEXT $bv[3]
8199 // $any4:_(s16) = G_ANYEXT $bv[4]
8200 // $any5:_(s16) = G_ANYEXT $bv[5]
8201 // $any6:_(s16) = G_ANYEXT $bv[6]
8202 // $any7:_(s16) = G_ANYEXT $bv[7]
8203 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8204 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8205
8206 // We want to unmerge into vectors.
8207 if (!DstTy.isFixedVector())
8208 return false;
8209
8210 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8211 if (!Any)
8212 return false;
8213
8214 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8215
8216 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8217 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8218
8219 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8220 return false;
8221
8222 // FIXME: check element types?
8223 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8224 return false;
8225
8226 LLT BigBvTy = MRI.getType(BV->getReg(0));
8227 LLT SmallBvTy = DstTy;
8228 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8229
8231 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8232 return false;
8233
8234 // We check the legality of scalar anyext.
8236 {TargetOpcode::G_ANYEXT,
8237 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8238 return false;
8239
8240 MatchInfo = [=](MachineIRBuilder &B) {
8241 // Build into each G_UNMERGE_VALUES def
8242 // a small build vector with anyext from the source build vector.
8243 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8245 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8246 Register SourceArray =
8247 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8248 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8249 Ops.push_back(AnyExt.getReg(0));
8250 }
8251 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8252 };
8253 };
8254 return true;
8255 };
8256
8257 return false;
8258}
8259
8261 BuildFnTy &MatchInfo) const {
8262
8263 bool Changed = false;
8264 auto &Shuffle = cast<GShuffleVector>(MI);
8265 ArrayRef<int> OrigMask = Shuffle.getMask();
8266 SmallVector<int, 16> NewMask;
8267 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8268 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8269 const unsigned NumDstElts = OrigMask.size();
8270 for (unsigned i = 0; i != NumDstElts; ++i) {
8271 int Idx = OrigMask[i];
8272 if (Idx >= (int)NumSrcElems) {
8273 Idx = -1;
8274 Changed = true;
8275 }
8276 NewMask.push_back(Idx);
8277 }
8278
8279 if (!Changed)
8280 return false;
8281
8282 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8283 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8284 std::move(NewMask));
8285 };
8286
8287 return true;
8288}
8289
8290static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8291 const unsigned MaskSize = Mask.size();
8292 for (unsigned I = 0; I < MaskSize; ++I) {
8293 int Idx = Mask[I];
8294 if (Idx < 0)
8295 continue;
8296
8297 if (Idx < (int)NumElems)
8298 Mask[I] = Idx + NumElems;
8299 else
8300 Mask[I] = Idx - NumElems;
8301 }
8302}
8303
8305 BuildFnTy &MatchInfo) const {
8306
8307 auto &Shuffle = cast<GShuffleVector>(MI);
8308 // If any of the two inputs is already undef, don't check the mask again to
8309 // prevent infinite loop
8310 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8311 return false;
8312
8313 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8314 return false;
8315
8316 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8317 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8319 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8320 return false;
8321
8322 ArrayRef<int> Mask = Shuffle.getMask();
8323 const unsigned NumSrcElems = Src1Ty.getNumElements();
8324
8325 bool TouchesSrc1 = false;
8326 bool TouchesSrc2 = false;
8327 const unsigned NumElems = Mask.size();
8328 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8329 if (Mask[Idx] < 0)
8330 continue;
8331
8332 if (Mask[Idx] < (int)NumSrcElems)
8333 TouchesSrc1 = true;
8334 else
8335 TouchesSrc2 = true;
8336 }
8337
8338 if (TouchesSrc1 == TouchesSrc2)
8339 return false;
8340
8341 Register NewSrc1 = Shuffle.getSrc1Reg();
8342 SmallVector<int, 16> NewMask(Mask);
8343 if (TouchesSrc2) {
8344 NewSrc1 = Shuffle.getSrc2Reg();
8345 commuteMask(NewMask, NumSrcElems);
8346 }
8347
8348 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8349 auto Undef = B.buildUndef(Src1Ty);
8350 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8351 };
8352
8353 return true;
8354}
8355
8357 BuildFnTy &MatchInfo) const {
8358 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8359
8360 Register Dst = Subo->getReg(0);
8361 Register LHS = Subo->getLHSReg();
8362 Register RHS = Subo->getRHSReg();
8363 Register Carry = Subo->getCarryOutReg();
8364 LLT DstTy = MRI.getType(Dst);
8365 LLT CarryTy = MRI.getType(Carry);
8366
8367 // Check legality before known bits.
8368 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8370 return false;
8371
8372 ConstantRange KBLHS =
8373 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8374 /* IsSigned=*/Subo->isSigned());
8375 ConstantRange KBRHS =
8376 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8377 /* IsSigned=*/Subo->isSigned());
8378
8379 if (Subo->isSigned()) {
8380 // G_SSUBO
8381 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8383 return false;
8385 MatchInfo = [=](MachineIRBuilder &B) {
8386 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8387 B.buildConstant(Carry, 0);
8388 };
8389 return true;
8390 }
8393 MatchInfo = [=](MachineIRBuilder &B) {
8394 B.buildSub(Dst, LHS, RHS);
8395 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8396 /*isVector=*/CarryTy.isVector(),
8397 /*isFP=*/false));
8398 };
8399 return true;
8400 }
8401 }
8402 return false;
8403 }
8404
8405 // G_USUBO
8406 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8408 return false;
8410 MatchInfo = [=](MachineIRBuilder &B) {
8411 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8412 B.buildConstant(Carry, 0);
8413 };
8414 return true;
8415 }
8418 MatchInfo = [=](MachineIRBuilder &B) {
8419 B.buildSub(Dst, LHS, RHS);
8420 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8421 /*isVector=*/CarryTy.isVector(),
8422 /*isFP=*/false));
8423 };
8424 return true;
8425 }
8426 }
8427
8428 return false;
8429}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
bool isNaN() const
Definition APFloat.h:1429
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1217
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:206
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
int32_t exactLogBase2() const
Definition APInt.h:1783
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:834
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:356
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMask(unsigned numBits) const
Definition APInt.h:488
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:389
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is zero.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValue() const
Definition Constants.h:321
const APFloat & getValueAPF() const
Definition Constants.h:320
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:261
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1481
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2034
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1441
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1606
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:739
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1564
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1588
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1621
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1653
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:670
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1544
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:201
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1474
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:965
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1577
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1678
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1459
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:234
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:251
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...