LLVM 22.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
41#include <cmath>
42#include <optional>
43#include <tuple>
44
45#define DEBUG_TYPE "gi-combiner"
46
47using namespace llvm;
48using namespace MIPatternMatch;
49
50// Option to allow testing of the combiner while no targets know about indexed
51// addressing.
52static cl::opt<bool>
53 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
54 cl::desc("Force all indexed operations to be "
55 "legal for the GlobalISel combiner"));
56
61 const LegalizerInfo *LI)
62 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
64 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
65 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
66 (void)this->VT;
67}
68
70 return *Builder.getMF().getSubtarget().getTargetLowering();
71}
72
74 return Builder.getMF();
75}
76
80
81LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
82
83/// \returns The little endian in-memory byte position of byte \p I in a
84/// \p ByteWidth bytes wide type.
85///
86/// E.g. Given a 4-byte type x, x[0] -> byte 0
87static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
88 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
89 return I;
90}
91
92/// Determines the LogBase2 value for a non-null input value using the
93/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
95 auto &MRI = *MIB.getMRI();
96 LLT Ty = MRI.getType(V);
97 auto Ctlz = MIB.buildCTLZ(Ty, V);
98 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
99 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
100}
101
102/// \returns The big endian in-memory byte position of byte \p I in a
103/// \p ByteWidth bytes wide type.
104///
105/// E.g. Given a 4-byte type x, x[0] -> byte 3
106static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
107 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
108 return ByteWidth - I - 1;
109}
110
111/// Given a map from byte offsets in memory to indices in a load/store,
112/// determine if that map corresponds to a little or big endian byte pattern.
113///
114/// \param MemOffset2Idx maps memory offsets to address offsets.
115/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
116///
117/// \returns true if the map corresponds to a big endian byte pattern, false if
118/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
119///
120/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
121/// are as follows:
122///
123/// AddrOffset Little endian Big endian
124/// 0 0 3
125/// 1 1 2
126/// 2 2 1
127/// 3 3 0
128static std::optional<bool>
130 int64_t LowestIdx) {
131 // Need at least two byte positions to decide on endianness.
132 unsigned Width = MemOffset2Idx.size();
133 if (Width < 2)
134 return std::nullopt;
135 bool BigEndian = true, LittleEndian = true;
136 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
137 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
138 if (MemOffsetAndIdx == MemOffset2Idx.end())
139 return std::nullopt;
140 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
141 assert(Idx >= 0 && "Expected non-negative byte offset?");
142 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
143 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
144 if (!BigEndian && !LittleEndian)
145 return std::nullopt;
146 }
147
148 assert((BigEndian != LittleEndian) &&
149 "Pattern cannot be both big and little endian!");
150 return BigEndian;
151}
152
154
155bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
156 assert(LI && "Must have LegalizerInfo to query isLegal!");
157 return LI->getAction(Query).Action == LegalizeActions::Legal;
158}
159
161 const LegalityQuery &Query) const {
162 return isPreLegalize() || isLegal(Query);
163}
164
166 return isLegal(Query) ||
167 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
168}
169
171 if (!Ty.isVector())
172 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
173 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
174 if (isPreLegalize())
175 return true;
176 LLT EltTy = Ty.getElementType();
177 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
178 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
179}
180
182 Register ToReg) const {
183 Observer.changingAllUsesOfReg(MRI, FromReg);
184
185 if (MRI.constrainRegAttrs(ToReg, FromReg))
186 MRI.replaceRegWith(FromReg, ToReg);
187 else
188 Builder.buildCopy(FromReg, ToReg);
189
190 Observer.finishedChangingAllUsesOfReg();
191}
192
194 MachineOperand &FromRegOp,
195 Register ToReg) const {
196 assert(FromRegOp.getParent() && "Expected an operand in an MI");
197 Observer.changingInstr(*FromRegOp.getParent());
198
199 FromRegOp.setReg(ToReg);
200
201 Observer.changedInstr(*FromRegOp.getParent());
202}
203
205 unsigned ToOpcode) const {
206 Observer.changingInstr(FromMI);
207
208 FromMI.setDesc(Builder.getTII().get(ToOpcode));
209
210 Observer.changedInstr(FromMI);
211}
212
214 return RBI->getRegBank(Reg, MRI, *TRI);
215}
216
218 const RegisterBank *RegBank) const {
219 if (RegBank)
220 MRI.setRegBank(Reg, *RegBank);
221}
222
224 if (matchCombineCopy(MI)) {
226 return true;
227 }
228 return false;
229}
231 if (MI.getOpcode() != TargetOpcode::COPY)
232 return false;
233 Register DstReg = MI.getOperand(0).getReg();
234 Register SrcReg = MI.getOperand(1).getReg();
235 return canReplaceReg(DstReg, SrcReg, MRI);
236}
238 Register DstReg = MI.getOperand(0).getReg();
239 Register SrcReg = MI.getOperand(1).getReg();
240 replaceRegWith(MRI, DstReg, SrcReg);
241 MI.eraseFromParent();
242}
243
245 MachineInstr &MI, BuildFnTy &MatchInfo) const {
246 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
247 Register DstOp = MI.getOperand(0).getReg();
248 Register OrigOp = MI.getOperand(1).getReg();
249
250 if (!MRI.hasOneNonDBGUse(OrigOp))
251 return false;
252
253 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
254 // Even if only a single operand of the PHI is not guaranteed non-poison,
255 // moving freeze() backwards across a PHI can cause optimization issues for
256 // other users of that operand.
257 //
258 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
259 // the source register is unprofitable because it makes the freeze() more
260 // strict than is necessary (it would affect the whole register instead of
261 // just the subreg being frozen).
262 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
263 return false;
264
265 if (canCreateUndefOrPoison(OrigOp, MRI,
266 /*ConsiderFlagsAndMetadata=*/false))
267 return false;
268
269 std::optional<MachineOperand> MaybePoisonOperand;
270 for (MachineOperand &Operand : OrigDef->uses()) {
271 if (!Operand.isReg())
272 return false;
273
274 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
275 continue;
276
277 if (!MaybePoisonOperand)
278 MaybePoisonOperand = Operand;
279 else {
280 // We have more than one maybe-poison operand. Moving the freeze is
281 // unsafe.
282 return false;
283 }
284 }
285
286 // Eliminate freeze if all operands are guaranteed non-poison.
287 if (!MaybePoisonOperand) {
288 MatchInfo = [=](MachineIRBuilder &B) {
289 Observer.changingInstr(*OrigDef);
290 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
291 Observer.changedInstr(*OrigDef);
292 B.buildCopy(DstOp, OrigOp);
293 };
294 return true;
295 }
296
297 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
298 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
299
300 MatchInfo = [=](MachineIRBuilder &B) mutable {
301 Observer.changingInstr(*OrigDef);
302 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
303 Observer.changedInstr(*OrigDef);
304 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
305 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
307 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
308 Freeze.getReg(0));
309 replaceRegWith(MRI, DstOp, OrigOp);
310 };
311 return true;
312}
313
316 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
317 "Invalid instruction");
318 bool IsUndef = true;
319 MachineInstr *Undef = nullptr;
320
321 // Walk over all the operands of concat vectors and check if they are
322 // build_vector themselves or undef.
323 // Then collect their operands in Ops.
324 for (const MachineOperand &MO : MI.uses()) {
325 Register Reg = MO.getReg();
326 MachineInstr *Def = MRI.getVRegDef(Reg);
327 assert(Def && "Operand not defined");
328 if (!MRI.hasOneNonDBGUse(Reg))
329 return false;
330 switch (Def->getOpcode()) {
331 case TargetOpcode::G_BUILD_VECTOR:
332 IsUndef = false;
333 // Remember the operands of the build_vector to fold
334 // them into the yet-to-build flattened concat vectors.
335 for (const MachineOperand &BuildVecMO : Def->uses())
336 Ops.push_back(BuildVecMO.getReg());
337 break;
338 case TargetOpcode::G_IMPLICIT_DEF: {
339 LLT OpType = MRI.getType(Reg);
340 // Keep one undef value for all the undef operands.
341 if (!Undef) {
342 Builder.setInsertPt(*MI.getParent(), MI);
343 Undef = Builder.buildUndef(OpType.getScalarType());
344 }
345 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
346 OpType.getScalarType() &&
347 "All undefs should have the same type");
348 // Break the undef vector in as many scalar elements as needed
349 // for the flattening.
350 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
351 EltIdx != EltEnd; ++EltIdx)
352 Ops.push_back(Undef->getOperand(0).getReg());
353 break;
354 }
355 default:
356 return false;
357 }
358 }
359
360 // Check if the combine is illegal
361 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
363 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
364 return false;
365 }
366
367 if (IsUndef)
368 Ops.clear();
369
370 return true;
371}
374 // We determined that the concat_vectors can be flatten.
375 // Generate the flattened build_vector.
376 Register DstReg = MI.getOperand(0).getReg();
377 Builder.setInsertPt(*MI.getParent(), MI);
378 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
379
380 // Note: IsUndef is sort of redundant. We could have determine it by
381 // checking that at all Ops are undef. Alternatively, we could have
382 // generate a build_vector of undefs and rely on another combine to
383 // clean that up. For now, given we already gather this information
384 // in matchCombineConcatVectors, just save compile time and issue the
385 // right thing.
386 if (Ops.empty())
387 Builder.buildUndef(NewDstReg);
388 else
389 Builder.buildBuildVector(NewDstReg, Ops);
390 replaceRegWith(MRI, DstReg, NewDstReg);
391 MI.eraseFromParent();
392}
393
395 auto &Shuffle = cast<GShuffleVector>(MI);
396
397 Register SrcVec1 = Shuffle.getSrc1Reg();
398 Register SrcVec2 = Shuffle.getSrc2Reg();
399 LLT EltTy = MRI.getType(SrcVec1).getElementType();
400 int Width = MRI.getType(SrcVec1).getNumElements();
401
402 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
403 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
404
405 SmallVector<Register> Extracts;
406 // Select only applicable elements from unmerged values.
407 for (int Val : Shuffle.getMask()) {
408 if (Val == -1)
409 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
410 else if (Val < Width)
411 Extracts.push_back(Unmerge1.getReg(Val));
412 else
413 Extracts.push_back(Unmerge2.getReg(Val - Width));
414 }
415 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
416 if (Extracts.size() == 1)
417 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
418 else
419 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
420 MI.eraseFromParent();
421}
422
425 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
426 auto ConcatMI1 =
427 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
428 auto ConcatMI2 =
429 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
430 if (!ConcatMI1 || !ConcatMI2)
431 return false;
432
433 // Check that the sources of the Concat instructions have the same type
434 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
435 MRI.getType(ConcatMI2->getSourceReg(0)))
436 return false;
437
438 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
439 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
440 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
441 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
442 // Check if the index takes a whole source register from G_CONCAT_VECTORS
443 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
444 if (Mask[i] == -1) {
445 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
446 if (i + j >= Mask.size())
447 return false;
448 if (Mask[i + j] != -1)
449 return false;
450 }
452 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
453 return false;
454 Ops.push_back(0);
455 } else if (Mask[i] % ConcatSrcNumElt == 0) {
456 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
457 if (i + j >= Mask.size())
458 return false;
459 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
460 return false;
461 }
462 // Retrieve the source register from its respective G_CONCAT_VECTORS
463 // instruction
464 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
465 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
466 } else {
467 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
468 ConcatMI1->getNumSources()));
469 }
470 } else {
471 return false;
472 }
473 }
474
476 {TargetOpcode::G_CONCAT_VECTORS,
477 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
478 return false;
479
480 return !Ops.empty();
481}
482
485 LLT SrcTy;
486 for (Register &Reg : Ops) {
487 if (Reg != 0)
488 SrcTy = MRI.getType(Reg);
489 }
490 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
491
492 Register UndefReg = 0;
493
494 for (Register &Reg : Ops) {
495 if (Reg == 0) {
496 if (UndefReg == 0)
497 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
498 Reg = UndefReg;
499 }
500 }
501
502 if (Ops.size() > 1)
503 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
504 else
505 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
506 MI.eraseFromParent();
507}
508
513 return true;
514 }
515 return false;
516}
517
520 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
521 "Invalid instruction kind");
522 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
523 Register Src1 = MI.getOperand(1).getReg();
524 LLT SrcType = MRI.getType(Src1);
525
526 unsigned DstNumElts = DstType.getNumElements();
527 unsigned SrcNumElts = SrcType.getNumElements();
528
529 // If the resulting vector is smaller than the size of the source
530 // vectors being concatenated, we won't be able to replace the
531 // shuffle vector into a concat_vectors.
532 //
533 // Note: We may still be able to produce a concat_vectors fed by
534 // extract_vector_elt and so on. It is less clear that would
535 // be better though, so don't bother for now.
536 //
537 // If the destination is a scalar, the size of the sources doesn't
538 // matter. we will lower the shuffle to a plain copy. This will
539 // work only if the source and destination have the same size. But
540 // that's covered by the next condition.
541 //
542 // TODO: If the size between the source and destination don't match
543 // we could still emit an extract vector element in that case.
544 if (DstNumElts < 2 * SrcNumElts)
545 return false;
546
547 // Check that the shuffle mask can be broken evenly between the
548 // different sources.
549 if (DstNumElts % SrcNumElts != 0)
550 return false;
551
552 // Mask length is a multiple of the source vector length.
553 // Check if the shuffle is some kind of concatenation of the input
554 // vectors.
555 unsigned NumConcat = DstNumElts / SrcNumElts;
556 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
557 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
558 for (unsigned i = 0; i != DstNumElts; ++i) {
559 int Idx = Mask[i];
560 // Undef value.
561 if (Idx < 0)
562 continue;
563 // Ensure the indices in each SrcType sized piece are sequential and that
564 // the same source is used for the whole piece.
565 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
566 (ConcatSrcs[i / SrcNumElts] >= 0 &&
567 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
568 return false;
569 // Remember which source this index came from.
570 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
571 }
572
573 // The shuffle is concatenating multiple vectors together.
574 // Collect the different operands for that.
575 Register UndefReg;
576 Register Src2 = MI.getOperand(2).getReg();
577 for (auto Src : ConcatSrcs) {
578 if (Src < 0) {
579 if (!UndefReg) {
580 Builder.setInsertPt(*MI.getParent(), MI);
581 UndefReg = Builder.buildUndef(SrcType).getReg(0);
582 }
583 Ops.push_back(UndefReg);
584 } else if (Src == 0)
585 Ops.push_back(Src1);
586 else
587 Ops.push_back(Src2);
588 }
589 return true;
590}
591
593 ArrayRef<Register> Ops) const {
594 Register DstReg = MI.getOperand(0).getReg();
595 Builder.setInsertPt(*MI.getParent(), MI);
596 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
597
598 if (Ops.size() == 1)
599 Builder.buildCopy(NewDstReg, Ops[0]);
600 else
601 Builder.buildMergeLikeInstr(NewDstReg, Ops);
602
603 replaceRegWith(MRI, DstReg, NewDstReg);
604 MI.eraseFromParent();
605}
606
607namespace {
608
609/// Select a preference between two uses. CurrentUse is the current preference
610/// while *ForCandidate is attributes of the candidate under consideration.
611PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
612 PreferredTuple &CurrentUse,
613 const LLT TyForCandidate,
614 unsigned OpcodeForCandidate,
615 MachineInstr *MIForCandidate) {
616 if (!CurrentUse.Ty.isValid()) {
617 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
618 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
619 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
620 return CurrentUse;
621 }
622
623 // We permit the extend to hoist through basic blocks but this is only
624 // sensible if the target has extending loads. If you end up lowering back
625 // into a load and extend during the legalizer then the end result is
626 // hoisting the extend up to the load.
627
628 // Prefer defined extensions to undefined extensions as these are more
629 // likely to reduce the number of instructions.
630 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
631 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
632 return CurrentUse;
633 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
634 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
635 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
636
637 // Prefer sign extensions to zero extensions as sign-extensions tend to be
638 // more expensive. Don't do this if the load is already a zero-extend load
639 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
640 // later.
641 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
642 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
643 OpcodeForCandidate == TargetOpcode::G_ZEXT)
644 return CurrentUse;
645 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
646 OpcodeForCandidate == TargetOpcode::G_SEXT)
647 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
648 }
649
650 // This is potentially target specific. We've chosen the largest type
651 // because G_TRUNC is usually free. One potential catch with this is that
652 // some targets have a reduced number of larger registers than smaller
653 // registers and this choice potentially increases the live-range for the
654 // larger value.
655 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
656 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
657 }
658 return CurrentUse;
659}
660
661/// Find a suitable place to insert some instructions and insert them. This
662/// function accounts for special cases like inserting before a PHI node.
663/// The current strategy for inserting before PHI's is to duplicate the
664/// instructions for each predecessor. However, while that's ok for G_TRUNC
665/// on most targets since it generally requires no code, other targets/cases may
666/// want to try harder to find a dominating block.
667static void InsertInsnsWithoutSideEffectsBeforeUse(
670 MachineOperand &UseMO)>
671 Inserter) {
672 MachineInstr &UseMI = *UseMO.getParent();
673
674 MachineBasicBlock *InsertBB = UseMI.getParent();
675
676 // If the use is a PHI then we want the predecessor block instead.
677 if (UseMI.isPHI()) {
678 MachineOperand *PredBB = std::next(&UseMO);
679 InsertBB = PredBB->getMBB();
680 }
681
682 // If the block is the same block as the def then we want to insert just after
683 // the def instead of at the start of the block.
684 if (InsertBB == DefMI.getParent()) {
686 Inserter(InsertBB, std::next(InsertPt), UseMO);
687 return;
688 }
689
690 // Otherwise we want the start of the BB
691 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
692}
693} // end anonymous namespace
694
696 PreferredTuple Preferred;
697 if (matchCombineExtendingLoads(MI, Preferred)) {
698 applyCombineExtendingLoads(MI, Preferred);
699 return true;
700 }
701 return false;
702}
703
704static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
705 unsigned CandidateLoadOpc;
706 switch (ExtOpc) {
707 case TargetOpcode::G_ANYEXT:
708 CandidateLoadOpc = TargetOpcode::G_LOAD;
709 break;
710 case TargetOpcode::G_SEXT:
711 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
712 break;
713 case TargetOpcode::G_ZEXT:
714 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
715 break;
716 default:
717 llvm_unreachable("Unexpected extend opc");
718 }
719 return CandidateLoadOpc;
720}
721
723 MachineInstr &MI, PreferredTuple &Preferred) const {
724 // We match the loads and follow the uses to the extend instead of matching
725 // the extends and following the def to the load. This is because the load
726 // must remain in the same position for correctness (unless we also add code
727 // to find a safe place to sink it) whereas the extend is freely movable.
728 // It also prevents us from duplicating the load for the volatile case or just
729 // for performance.
730 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
731 if (!LoadMI)
732 return false;
733
734 Register LoadReg = LoadMI->getDstReg();
735
736 LLT LoadValueTy = MRI.getType(LoadReg);
737 if (!LoadValueTy.isScalar())
738 return false;
739
740 // Most architectures are going to legalize <s8 loads into at least a 1 byte
741 // load, and the MMOs can only describe memory accesses in multiples of bytes.
742 // If we try to perform extload combining on those, we can end up with
743 // %a(s8) = extload %ptr (load 1 byte from %ptr)
744 // ... which is an illegal extload instruction.
745 if (LoadValueTy.getSizeInBits() < 8)
746 return false;
747
748 // For non power-of-2 types, they will very likely be legalized into multiple
749 // loads. Don't bother trying to match them into extending loads.
751 return false;
752
753 // Find the preferred type aside from the any-extends (unless it's the only
754 // one) and non-extending ops. We'll emit an extending load to that type and
755 // and emit a variant of (extend (trunc X)) for the others according to the
756 // relative type sizes. At the same time, pick an extend to use based on the
757 // extend involved in the chosen type.
758 unsigned PreferredOpcode =
759 isa<GLoad>(&MI)
760 ? TargetOpcode::G_ANYEXT
761 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
762 Preferred = {LLT(), PreferredOpcode, nullptr};
763 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
764 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
765 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
766 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
767 const auto &MMO = LoadMI->getMMO();
768 // Don't do anything for atomics.
769 if (MMO.isAtomic())
770 continue;
771 // Check for legality.
772 if (!isPreLegalize()) {
773 LegalityQuery::MemDesc MMDesc(MMO);
774 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
775 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
776 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
777 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
778 .Action != LegalizeActions::Legal)
779 continue;
780 }
781 Preferred = ChoosePreferredUse(MI, Preferred,
782 MRI.getType(UseMI.getOperand(0).getReg()),
783 UseMI.getOpcode(), &UseMI);
784 }
785 }
786
787 // There were no extends
788 if (!Preferred.MI)
789 return false;
790 // It should be impossible to chose an extend without selecting a different
791 // type since by definition the result of an extend is larger.
792 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
793
794 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
795 return true;
796}
797
799 MachineInstr &MI, PreferredTuple &Preferred) const {
800 // Rewrite the load to the chosen extending load.
801 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
802
803 // Inserter to insert a truncate back to the original type at a given point
804 // with some basic CSE to limit truncate duplication to one per BB.
806 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
807 MachineBasicBlock::iterator InsertBefore,
808 MachineOperand &UseMO) {
809 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
810 if (PreviouslyEmitted) {
811 Observer.changingInstr(*UseMO.getParent());
812 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
813 Observer.changedInstr(*UseMO.getParent());
814 return;
815 }
816
817 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
818 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
819 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
820 EmittedInsns[InsertIntoBB] = NewMI;
821 replaceRegOpWith(MRI, UseMO, NewDstReg);
822 };
823
824 Observer.changingInstr(MI);
825 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
826 MI.setDesc(Builder.getTII().get(LoadOpc));
827
828 // Rewrite all the uses to fix up the types.
829 auto &LoadValue = MI.getOperand(0);
831 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
832
833 for (auto *UseMO : Uses) {
834 MachineInstr *UseMI = UseMO->getParent();
835
836 // If the extend is compatible with the preferred extend then we should fix
837 // up the type and extend so that it uses the preferred use.
838 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
839 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
840 Register UseDstReg = UseMI->getOperand(0).getReg();
841 MachineOperand &UseSrcMO = UseMI->getOperand(1);
842 const LLT UseDstTy = MRI.getType(UseDstReg);
843 if (UseDstReg != ChosenDstReg) {
844 if (Preferred.Ty == UseDstTy) {
845 // If the use has the same type as the preferred use, then merge
846 // the vregs and erase the extend. For example:
847 // %1:_(s8) = G_LOAD ...
848 // %2:_(s32) = G_SEXT %1(s8)
849 // %3:_(s32) = G_ANYEXT %1(s8)
850 // ... = ... %3(s32)
851 // rewrites to:
852 // %2:_(s32) = G_SEXTLOAD ...
853 // ... = ... %2(s32)
854 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
855 Observer.erasingInstr(*UseMO->getParent());
856 UseMO->getParent()->eraseFromParent();
857 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
858 // If the preferred size is smaller, then keep the extend but extend
859 // from the result of the extending load. For example:
860 // %1:_(s8) = G_LOAD ...
861 // %2:_(s32) = G_SEXT %1(s8)
862 // %3:_(s64) = G_ANYEXT %1(s8)
863 // ... = ... %3(s64)
864 /// rewrites to:
865 // %2:_(s32) = G_SEXTLOAD ...
866 // %3:_(s64) = G_ANYEXT %2:_(s32)
867 // ... = ... %3(s64)
868 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
869 } else {
870 // If the preferred size is large, then insert a truncate. For
871 // example:
872 // %1:_(s8) = G_LOAD ...
873 // %2:_(s64) = G_SEXT %1(s8)
874 // %3:_(s32) = G_ZEXT %1(s8)
875 // ... = ... %3(s32)
876 /// rewrites to:
877 // %2:_(s64) = G_SEXTLOAD ...
878 // %4:_(s8) = G_TRUNC %2:_(s32)
879 // %3:_(s64) = G_ZEXT %2:_(s8)
880 // ... = ... %3(s64)
881 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
882 InsertTruncAt);
883 }
884 continue;
885 }
886 // The use is (one of) the uses of the preferred use we chose earlier.
887 // We're going to update the load to def this value later so just erase
888 // the old extend.
889 Observer.erasingInstr(*UseMO->getParent());
890 UseMO->getParent()->eraseFromParent();
891 continue;
892 }
893
894 // The use isn't an extend. Truncate back to the type we originally loaded.
895 // This is free on many targets.
896 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
897 }
898
899 MI.getOperand(0).setReg(ChosenDstReg);
900 Observer.changedInstr(MI);
901}
902
904 BuildFnTy &MatchInfo) const {
905 assert(MI.getOpcode() == TargetOpcode::G_AND);
906
907 // If we have the following code:
908 // %mask = G_CONSTANT 255
909 // %ld = G_LOAD %ptr, (load s16)
910 // %and = G_AND %ld, %mask
911 //
912 // Try to fold it into
913 // %ld = G_ZEXTLOAD %ptr, (load s8)
914
915 Register Dst = MI.getOperand(0).getReg();
916 if (MRI.getType(Dst).isVector())
917 return false;
918
919 auto MaybeMask =
920 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
921 if (!MaybeMask)
922 return false;
923
924 APInt MaskVal = MaybeMask->Value;
925
926 if (!MaskVal.isMask())
927 return false;
928
929 Register SrcReg = MI.getOperand(1).getReg();
930 // Don't use getOpcodeDef() here since intermediate instructions may have
931 // multiple users.
932 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
933 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
934 return false;
935
936 Register LoadReg = LoadMI->getDstReg();
937 LLT RegTy = MRI.getType(LoadReg);
938 Register PtrReg = LoadMI->getPointerReg();
939 unsigned RegSize = RegTy.getSizeInBits();
940 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
941 unsigned MaskSizeBits = MaskVal.countr_one();
942
943 // The mask may not be larger than the in-memory type, as it might cover sign
944 // extended bits
945 if (MaskSizeBits > LoadSizeBits.getValue())
946 return false;
947
948 // If the mask covers the whole destination register, there's nothing to
949 // extend
950 if (MaskSizeBits >= RegSize)
951 return false;
952
953 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
954 // at least byte loads. Avoid creating such loads here
955 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
956 return false;
957
958 const MachineMemOperand &MMO = LoadMI->getMMO();
959 LegalityQuery::MemDesc MemDesc(MMO);
960
961 // Don't modify the memory access size if this is atomic/volatile, but we can
962 // still adjust the opcode to indicate the high bit behavior.
963 if (LoadMI->isSimple())
964 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
965 else if (LoadSizeBits.getValue() > MaskSizeBits ||
966 LoadSizeBits.getValue() == RegSize)
967 return false;
968
969 // TODO: Could check if it's legal with the reduced or original memory size.
971 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
972 return false;
973
974 MatchInfo = [=](MachineIRBuilder &B) {
975 B.setInstrAndDebugLoc(*LoadMI);
976 auto &MF = B.getMF();
977 auto PtrInfo = MMO.getPointerInfo();
978 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
979 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
980 LoadMI->eraseFromParent();
981 };
982 return true;
983}
984
986 const MachineInstr &UseMI) const {
987 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
988 "shouldn't consider debug uses");
989 assert(DefMI.getParent() == UseMI.getParent());
990 if (&DefMI == &UseMI)
991 return true;
992 const MachineBasicBlock &MBB = *DefMI.getParent();
993 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
994 return &MI == &DefMI || &MI == &UseMI;
995 });
996 if (DefOrUse == MBB.end())
997 llvm_unreachable("Block must contain both DefMI and UseMI!");
998 return &*DefOrUse == &DefMI;
999}
1000
1002 const MachineInstr &UseMI) const {
1003 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1004 "shouldn't consider debug uses");
1005 if (MDT)
1006 return MDT->dominates(&DefMI, &UseMI);
1007 else if (DefMI.getParent() != UseMI.getParent())
1008 return false;
1009
1010 return isPredecessor(DefMI, UseMI);
1011}
1012
1014 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1015 Register SrcReg = MI.getOperand(1).getReg();
1016 Register LoadUser = SrcReg;
1017
1018 if (MRI.getType(SrcReg).isVector())
1019 return false;
1020
1021 Register TruncSrc;
1022 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1023 LoadUser = TruncSrc;
1024
1025 uint64_t SizeInBits = MI.getOperand(2).getImm();
1026 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1027 // need any extend at all, just a truncate.
1028 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1029 // If truncating more than the original extended value, abort.
1030 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1031 if (TruncSrc &&
1032 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1033 return false;
1034 if (LoadSizeBits == SizeInBits)
1035 return true;
1036 }
1037 return false;
1038}
1039
1041 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1042 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1043 MI.eraseFromParent();
1044}
1045
1047 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1048 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1049
1050 Register DstReg = MI.getOperand(0).getReg();
1051 LLT RegTy = MRI.getType(DstReg);
1052
1053 // Only supports scalars for now.
1054 if (RegTy.isVector())
1055 return false;
1056
1057 Register SrcReg = MI.getOperand(1).getReg();
1058 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1059 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1060 return false;
1061
1062 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1063
1064 // If the sign extend extends from a narrower width than the load's width,
1065 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1066 // Avoid widening the load at all.
1067 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1068
1069 // Don't generate G_SEXTLOADs with a < 1 byte width.
1070 if (NewSizeBits < 8)
1071 return false;
1072 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1073 // anyway for most targets.
1074 if (!isPowerOf2_32(NewSizeBits))
1075 return false;
1076
1077 const MachineMemOperand &MMO = LoadDef->getMMO();
1078 LegalityQuery::MemDesc MMDesc(MMO);
1079
1080 // Don't modify the memory access size if this is atomic/volatile, but we can
1081 // still adjust the opcode to indicate the high bit behavior.
1082 if (LoadDef->isSimple())
1083 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1084 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1085 return false;
1086
1087 // TODO: Could check if it's legal with the reduced or original memory size.
1088 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1089 {MRI.getType(LoadDef->getDstReg()),
1090 MRI.getType(LoadDef->getPointerReg())},
1091 {MMDesc}}))
1092 return false;
1093
1094 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1095 return true;
1096}
1097
1099 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1100 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1101 Register LoadReg;
1102 unsigned ScalarSizeBits;
1103 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1104 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1105
1106 // If we have the following:
1107 // %ld = G_LOAD %ptr, (load 2)
1108 // %ext = G_SEXT_INREG %ld, 8
1109 // ==>
1110 // %ld = G_SEXTLOAD %ptr (load 1)
1111
1112 auto &MMO = LoadDef->getMMO();
1113 Builder.setInstrAndDebugLoc(*LoadDef);
1114 auto &MF = Builder.getMF();
1115 auto PtrInfo = MMO.getPointerInfo();
1116 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1117 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1118 LoadDef->getPointerReg(), *NewMMO);
1119 MI.eraseFromParent();
1120
1121 // Not all loads can be deleted, so make sure the old one is removed.
1122 LoadDef->eraseFromParent();
1123}
1124
1125/// Return true if 'MI' is a load or a store that may be fold it's address
1126/// operand into the load / store addressing mode.
1130 auto *MF = MI->getMF();
1131 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1132 if (!Addr)
1133 return false;
1134
1135 AM.HasBaseReg = true;
1136 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1137 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1138 else
1139 AM.Scale = 1; // [reg +/- reg]
1140
1141 return TLI.isLegalAddressingMode(
1142 MF->getDataLayout(), AM,
1143 getTypeForLLT(MI->getMMO().getMemoryType(),
1144 MF->getFunction().getContext()),
1145 MI->getMMO().getAddrSpace());
1146}
1147
1148static unsigned getIndexedOpc(unsigned LdStOpc) {
1149 switch (LdStOpc) {
1150 case TargetOpcode::G_LOAD:
1151 return TargetOpcode::G_INDEXED_LOAD;
1152 case TargetOpcode::G_STORE:
1153 return TargetOpcode::G_INDEXED_STORE;
1154 case TargetOpcode::G_ZEXTLOAD:
1155 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1156 case TargetOpcode::G_SEXTLOAD:
1157 return TargetOpcode::G_INDEXED_SEXTLOAD;
1158 default:
1159 llvm_unreachable("Unexpected opcode");
1160 }
1161}
1162
1163bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1164 // Check for legality.
1165 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1166 LLT Ty = MRI.getType(LdSt.getReg(0));
1167 LLT MemTy = LdSt.getMMO().getMemoryType();
1169 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1171 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1172 SmallVector<LLT> OpTys;
1173 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1174 OpTys = {PtrTy, Ty, Ty};
1175 else
1176 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1177
1178 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1179 return isLegal(Q);
1180}
1181
1183 "post-index-use-threshold", cl::Hidden, cl::init(32),
1184 cl::desc("Number of uses of a base pointer to check before it is no longer "
1185 "considered for post-indexing."));
1186
1187bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1189 bool &RematOffset) const {
1190 // We're looking for the following pattern, for either load or store:
1191 // %baseptr:_(p0) = ...
1192 // G_STORE %val(s64), %baseptr(p0)
1193 // %offset:_(s64) = G_CONSTANT i64 -256
1194 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1195 const auto &TLI = getTargetLowering();
1196
1197 Register Ptr = LdSt.getPointerReg();
1198 // If the store is the only use, don't bother.
1199 if (MRI.hasOneNonDBGUse(Ptr))
1200 return false;
1201
1202 if (!isIndexedLoadStoreLegal(LdSt))
1203 return false;
1204
1205 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1206 return false;
1207
1208 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1209 auto *PtrDef = MRI.getVRegDef(Ptr);
1210
1211 unsigned NumUsesChecked = 0;
1212 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1213 if (++NumUsesChecked > PostIndexUseThreshold)
1214 return false; // Try to avoid exploding compile time.
1215
1216 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1217 // The use itself might be dead. This can happen during combines if DCE
1218 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1219 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1220 continue;
1221
1222 // Check the user of this isn't the store, otherwise we'd be generate a
1223 // indexed store defining its own use.
1224 if (StoredValDef == &Use)
1225 continue;
1226
1227 Offset = PtrAdd->getOffsetReg();
1228 if (!ForceLegalIndexing &&
1229 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1230 /*IsPre*/ false, MRI))
1231 continue;
1232
1233 // Make sure the offset calculation is before the potentially indexed op.
1234 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1235 RematOffset = false;
1236 if (!dominates(*OffsetDef, LdSt)) {
1237 // If the offset however is just a G_CONSTANT, we can always just
1238 // rematerialize it where we need it.
1239 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1240 continue;
1241 RematOffset = true;
1242 }
1243
1244 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1245 if (&BasePtrUse == PtrDef)
1246 continue;
1247
1248 // If the user is a later load/store that can be post-indexed, then don't
1249 // combine this one.
1250 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1251 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1252 dominates(LdSt, *BasePtrLdSt) &&
1253 isIndexedLoadStoreLegal(*BasePtrLdSt))
1254 return false;
1255
1256 // Now we're looking for the key G_PTR_ADD instruction, which contains
1257 // the offset add that we want to fold.
1258 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1259 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1260 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1261 // If the use is in a different block, then we may produce worse code
1262 // due to the extra register pressure.
1263 if (BaseUseUse.getParent() != LdSt.getParent())
1264 return false;
1265
1266 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1267 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1268 return false;
1269 }
1270 if (!dominates(LdSt, BasePtrUse))
1271 return false; // All use must be dominated by the load/store.
1272 }
1273 }
1274
1275 Addr = PtrAdd->getReg(0);
1276 Base = PtrAdd->getBaseReg();
1277 return true;
1278 }
1279
1280 return false;
1281}
1282
1283bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1284 Register &Base,
1285 Register &Offset) const {
1286 auto &MF = *LdSt.getParent()->getParent();
1287 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1288
1289 Addr = LdSt.getPointerReg();
1290 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1291 MRI.hasOneNonDBGUse(Addr))
1292 return false;
1293
1294 if (!ForceLegalIndexing &&
1295 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1296 return false;
1297
1298 if (!isIndexedLoadStoreLegal(LdSt))
1299 return false;
1300
1301 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1302 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1303 return false;
1304
1305 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1306 // Would require a copy.
1307 if (Base == St->getValueReg())
1308 return false;
1309
1310 // We're expecting one use of Addr in MI, but it could also be the
1311 // value stored, which isn't actually dominated by the instruction.
1312 if (St->getValueReg() == Addr)
1313 return false;
1314 }
1315
1316 // Avoid increasing cross-block register pressure.
1317 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1318 if (AddrUse.getParent() != LdSt.getParent())
1319 return false;
1320
1321 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1322 // That might allow us to end base's liveness here by adjusting the constant.
1323 bool RealUse = false;
1324 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1325 if (!dominates(LdSt, AddrUse))
1326 return false; // All use must be dominated by the load/store.
1327
1328 // If Ptr may be folded in addressing mode of other use, then it's
1329 // not profitable to do this transformation.
1330 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1331 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1332 RealUse = true;
1333 } else {
1334 RealUse = true;
1335 }
1336 }
1337 return RealUse;
1338}
1339
1341 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1342 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1343
1344 // Check if there is a load that defines the vector being extracted from.
1345 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1346 if (!LoadMI)
1347 return false;
1348
1349 Register Vector = MI.getOperand(1).getReg();
1350 LLT VecEltTy = MRI.getType(Vector).getElementType();
1351
1352 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1353
1354 // Checking whether we should reduce the load width.
1355 if (!MRI.hasOneNonDBGUse(Vector))
1356 return false;
1357
1358 // Check if the defining load is simple.
1359 if (!LoadMI->isSimple())
1360 return false;
1361
1362 // If the vector element type is not a multiple of a byte then we are unable
1363 // to correctly compute an address to load only the extracted element as a
1364 // scalar.
1365 if (!VecEltTy.isByteSized())
1366 return false;
1367
1368 // Check for load fold barriers between the extraction and the load.
1369 if (MI.getParent() != LoadMI->getParent())
1370 return false;
1371 const unsigned MaxIter = 20;
1372 unsigned Iter = 0;
1373 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1374 if (II->isLoadFoldBarrier())
1375 return false;
1376 if (Iter++ == MaxIter)
1377 return false;
1378 }
1379
1380 // Check if the new load that we are going to create is legal
1381 // if we are in the post-legalization phase.
1382 MachineMemOperand MMO = LoadMI->getMMO();
1383 Align Alignment = MMO.getAlign();
1384 MachinePointerInfo PtrInfo;
1386
1387 // Finding the appropriate PtrInfo if offset is a known constant.
1388 // This is required to create the memory operand for the narrowed load.
1389 // This machine memory operand object helps us infer about legality
1390 // before we proceed to combine the instruction.
1391 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1392 int Elt = CVal->getZExtValue();
1393 // FIXME: should be (ABI size)*Elt.
1394 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1395 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1396 } else {
1397 // Discard the pointer info except the address space because the memory
1398 // operand can't represent this new access since the offset is variable.
1399 Offset = VecEltTy.getSizeInBits() / 8;
1401 }
1402
1403 Alignment = commonAlignment(Alignment, Offset);
1404
1405 Register VecPtr = LoadMI->getPointerReg();
1406 LLT PtrTy = MRI.getType(VecPtr);
1407
1408 MachineFunction &MF = *MI.getMF();
1409 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1410
1411 LegalityQuery::MemDesc MMDesc(*NewMMO);
1412
1414 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1415 return false;
1416
1417 // Load must be allowed and fast on the target.
1419 auto &DL = MF.getDataLayout();
1420 unsigned Fast = 0;
1421 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1422 &Fast) ||
1423 !Fast)
1424 return false;
1425
1426 Register Result = MI.getOperand(0).getReg();
1427 Register Index = MI.getOperand(2).getReg();
1428
1429 MatchInfo = [=](MachineIRBuilder &B) {
1430 GISelObserverWrapper DummyObserver;
1431 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1432 //// Get pointer to the vector element.
1433 Register finalPtr = Helper.getVectorElementPointer(
1434 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1435 Index);
1436 // New G_LOAD instruction.
1437 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1438 // Remove original GLOAD instruction.
1439 LoadMI->eraseFromParent();
1440 };
1441
1442 return true;
1443}
1444
1446 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1447 auto &LdSt = cast<GLoadStore>(MI);
1448
1449 if (LdSt.isAtomic())
1450 return false;
1451
1452 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1453 MatchInfo.Offset);
1454 if (!MatchInfo.IsPre &&
1455 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1456 MatchInfo.Offset, MatchInfo.RematOffset))
1457 return false;
1458
1459 return true;
1460}
1461
1463 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1464 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1465 unsigned Opcode = MI.getOpcode();
1466 bool IsStore = Opcode == TargetOpcode::G_STORE;
1467 unsigned NewOpcode = getIndexedOpc(Opcode);
1468
1469 // If the offset constant didn't happen to dominate the load/store, we can
1470 // just clone it as needed.
1471 if (MatchInfo.RematOffset) {
1472 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1473 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1474 *OldCst->getOperand(1).getCImm());
1475 MatchInfo.Offset = NewCst.getReg(0);
1476 }
1477
1478 auto MIB = Builder.buildInstr(NewOpcode);
1479 if (IsStore) {
1480 MIB.addDef(MatchInfo.Addr);
1481 MIB.addUse(MI.getOperand(0).getReg());
1482 } else {
1483 MIB.addDef(MI.getOperand(0).getReg());
1484 MIB.addDef(MatchInfo.Addr);
1485 }
1486
1487 MIB.addUse(MatchInfo.Base);
1488 MIB.addUse(MatchInfo.Offset);
1489 MIB.addImm(MatchInfo.IsPre);
1490 MIB->cloneMemRefs(*MI.getMF(), MI);
1491 MI.eraseFromParent();
1492 AddrDef.eraseFromParent();
1493
1494 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1495}
1496
1498 MachineInstr *&OtherMI) const {
1499 unsigned Opcode = MI.getOpcode();
1500 bool IsDiv, IsSigned;
1501
1502 switch (Opcode) {
1503 default:
1504 llvm_unreachable("Unexpected opcode!");
1505 case TargetOpcode::G_SDIV:
1506 case TargetOpcode::G_UDIV: {
1507 IsDiv = true;
1508 IsSigned = Opcode == TargetOpcode::G_SDIV;
1509 break;
1510 }
1511 case TargetOpcode::G_SREM:
1512 case TargetOpcode::G_UREM: {
1513 IsDiv = false;
1514 IsSigned = Opcode == TargetOpcode::G_SREM;
1515 break;
1516 }
1517 }
1518
1519 Register Src1 = MI.getOperand(1).getReg();
1520 unsigned DivOpcode, RemOpcode, DivremOpcode;
1521 if (IsSigned) {
1522 DivOpcode = TargetOpcode::G_SDIV;
1523 RemOpcode = TargetOpcode::G_SREM;
1524 DivremOpcode = TargetOpcode::G_SDIVREM;
1525 } else {
1526 DivOpcode = TargetOpcode::G_UDIV;
1527 RemOpcode = TargetOpcode::G_UREM;
1528 DivremOpcode = TargetOpcode::G_UDIVREM;
1529 }
1530
1531 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1532 return false;
1533
1534 // Combine:
1535 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1536 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1537 // into:
1538 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1539
1540 // Combine:
1541 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1542 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1543 // into:
1544 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1545
1546 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1547 if (MI.getParent() == UseMI.getParent() &&
1548 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1549 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1550 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1551 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1552 OtherMI = &UseMI;
1553 return true;
1554 }
1555 }
1556
1557 return false;
1558}
1559
1561 MachineInstr *&OtherMI) const {
1562 unsigned Opcode = MI.getOpcode();
1563 assert(OtherMI && "OtherMI shouldn't be empty.");
1564
1565 Register DestDivReg, DestRemReg;
1566 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1567 DestDivReg = MI.getOperand(0).getReg();
1568 DestRemReg = OtherMI->getOperand(0).getReg();
1569 } else {
1570 DestDivReg = OtherMI->getOperand(0).getReg();
1571 DestRemReg = MI.getOperand(0).getReg();
1572 }
1573
1574 bool IsSigned =
1575 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1576
1577 // Check which instruction is first in the block so we don't break def-use
1578 // deps by "moving" the instruction incorrectly. Also keep track of which
1579 // instruction is first so we pick it's operands, avoiding use-before-def
1580 // bugs.
1581 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1582 Builder.setInstrAndDebugLoc(*FirstInst);
1583
1584 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1585 : TargetOpcode::G_UDIVREM,
1586 {DestDivReg, DestRemReg},
1587 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1588 MI.eraseFromParent();
1589 OtherMI->eraseFromParent();
1590}
1591
1593 MachineInstr &MI, MachineInstr *&BrCond) const {
1594 assert(MI.getOpcode() == TargetOpcode::G_BR);
1595
1596 // Try to match the following:
1597 // bb1:
1598 // G_BRCOND %c1, %bb2
1599 // G_BR %bb3
1600 // bb2:
1601 // ...
1602 // bb3:
1603
1604 // The above pattern does not have a fall through to the successor bb2, always
1605 // resulting in a branch no matter which path is taken. Here we try to find
1606 // and replace that pattern with conditional branch to bb3 and otherwise
1607 // fallthrough to bb2. This is generally better for branch predictors.
1608
1609 MachineBasicBlock *MBB = MI.getParent();
1611 if (BrIt == MBB->begin())
1612 return false;
1613 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1614
1615 BrCond = &*std::prev(BrIt);
1616 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1617 return false;
1618
1619 // Check that the next block is the conditional branch target. Also make sure
1620 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1621 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1622 return BrCondTarget != MI.getOperand(0).getMBB() &&
1623 MBB->isLayoutSuccessor(BrCondTarget);
1624}
1625
1627 MachineInstr &MI, MachineInstr *&BrCond) const {
1628 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1629 Builder.setInstrAndDebugLoc(*BrCond);
1630 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1631 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1632 // this to i1 only since we might not know for sure what kind of
1633 // compare generated the condition value.
1634 auto True = Builder.buildConstant(
1635 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1636 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1637
1638 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1639 Observer.changingInstr(MI);
1640 MI.getOperand(0).setMBB(FallthroughBB);
1641 Observer.changedInstr(MI);
1642
1643 // Change the conditional branch to use the inverted condition and
1644 // new target block.
1645 Observer.changingInstr(*BrCond);
1646 BrCond->getOperand(0).setReg(Xor.getReg(0));
1647 BrCond->getOperand(1).setMBB(BrTarget);
1648 Observer.changedInstr(*BrCond);
1649}
1650
1652 MachineIRBuilder HelperBuilder(MI);
1653 GISelObserverWrapper DummyObserver;
1654 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1655 return Helper.lowerMemcpyInline(MI) ==
1657}
1658
1660 unsigned MaxLen) const {
1661 MachineIRBuilder HelperBuilder(MI);
1662 GISelObserverWrapper DummyObserver;
1663 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1664 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1666}
1667
1669 const MachineRegisterInfo &MRI,
1670 const APFloat &Val) {
1671 APFloat Result(Val);
1672 switch (MI.getOpcode()) {
1673 default:
1674 llvm_unreachable("Unexpected opcode!");
1675 case TargetOpcode::G_FNEG: {
1676 Result.changeSign();
1677 return Result;
1678 }
1679 case TargetOpcode::G_FABS: {
1680 Result.clearSign();
1681 return Result;
1682 }
1683 case TargetOpcode::G_FPEXT:
1684 case TargetOpcode::G_FPTRUNC: {
1685 bool Unused;
1686 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1688 &Unused);
1689 return Result;
1690 }
1691 case TargetOpcode::G_FSQRT: {
1692 bool Unused;
1694 &Unused);
1695 Result = APFloat(sqrt(Result.convertToDouble()));
1696 break;
1697 }
1698 case TargetOpcode::G_FLOG2: {
1699 bool Unused;
1701 &Unused);
1702 Result = APFloat(log2(Result.convertToDouble()));
1703 break;
1704 }
1705 }
1706 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1707 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1708 // `G_FLOG2` reach here.
1709 bool Unused;
1710 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1711 return Result;
1712}
1713
1715 MachineInstr &MI, const ConstantFP *Cst) const {
1716 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1717 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1718 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1719 MI.eraseFromParent();
1720}
1721
1723 PtrAddChain &MatchInfo) const {
1724 // We're trying to match the following pattern:
1725 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1726 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1727 // -->
1728 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1729
1730 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1731 return false;
1732
1733 Register Add2 = MI.getOperand(1).getReg();
1734 Register Imm1 = MI.getOperand(2).getReg();
1735 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1736 if (!MaybeImmVal)
1737 return false;
1738
1739 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1740 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1741 return false;
1742
1743 Register Base = Add2Def->getOperand(1).getReg();
1744 Register Imm2 = Add2Def->getOperand(2).getReg();
1745 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1746 if (!MaybeImm2Val)
1747 return false;
1748
1749 // Check if the new combined immediate forms an illegal addressing mode.
1750 // Do not combine if it was legal before but would get illegal.
1751 // To do so, we need to find a load/store user of the pointer to get
1752 // the access type.
1753 Type *AccessTy = nullptr;
1754 auto &MF = *MI.getMF();
1755 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1756 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1757 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1758 MF.getFunction().getContext());
1759 break;
1760 }
1761 }
1763 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1764 AMNew.BaseOffs = CombinedImm.getSExtValue();
1765 if (AccessTy) {
1766 AMNew.HasBaseReg = true;
1768 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1769 AMOld.HasBaseReg = true;
1770 unsigned AS = MRI.getType(Add2).getAddressSpace();
1771 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1772 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1773 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1774 return false;
1775 }
1776
1777 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1778 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1779 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1780 // largest signed integer that fits into the index type, which is the maximum
1781 // size of allocated objects according to the IR Language Reference.
1782 unsigned PtrAddFlags = MI.getFlags();
1783 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1784 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1785 bool IsInBounds =
1786 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1787 unsigned Flags = 0;
1788 if (IsNoUWrap)
1790 if (IsInBounds) {
1793 }
1794
1795 // Pass the combined immediate to the apply function.
1796 MatchInfo.Imm = AMNew.BaseOffs;
1797 MatchInfo.Base = Base;
1798 MatchInfo.Bank = getRegBank(Imm2);
1799 MatchInfo.Flags = Flags;
1800 return true;
1801}
1802
1804 PtrAddChain &MatchInfo) const {
1805 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1806 MachineIRBuilder MIB(MI);
1807 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1808 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1809 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1810 Observer.changingInstr(MI);
1811 MI.getOperand(1).setReg(MatchInfo.Base);
1812 MI.getOperand(2).setReg(NewOffset.getReg(0));
1813 MI.setFlags(MatchInfo.Flags);
1814 Observer.changedInstr(MI);
1815}
1816
1818 RegisterImmPair &MatchInfo) const {
1819 // We're trying to match the following pattern with any of
1820 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1821 // %t1 = SHIFT %base, G_CONSTANT imm1
1822 // %root = SHIFT %t1, G_CONSTANT imm2
1823 // -->
1824 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1825
1826 unsigned Opcode = MI.getOpcode();
1827 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1828 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1829 Opcode == TargetOpcode::G_USHLSAT) &&
1830 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1831
1832 Register Shl2 = MI.getOperand(1).getReg();
1833 Register Imm1 = MI.getOperand(2).getReg();
1834 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1835 if (!MaybeImmVal)
1836 return false;
1837
1838 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1839 if (Shl2Def->getOpcode() != Opcode)
1840 return false;
1841
1842 Register Base = Shl2Def->getOperand(1).getReg();
1843 Register Imm2 = Shl2Def->getOperand(2).getReg();
1844 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1845 if (!MaybeImm2Val)
1846 return false;
1847
1848 // Pass the combined immediate to the apply function.
1849 MatchInfo.Imm =
1850 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1851 MatchInfo.Reg = Base;
1852
1853 // There is no simple replacement for a saturating unsigned left shift that
1854 // exceeds the scalar size.
1855 if (Opcode == TargetOpcode::G_USHLSAT &&
1856 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1857 return false;
1858
1859 return true;
1860}
1861
1863 RegisterImmPair &MatchInfo) const {
1864 unsigned Opcode = MI.getOpcode();
1865 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1866 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1867 Opcode == TargetOpcode::G_USHLSAT) &&
1868 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1869
1870 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1871 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1872 auto Imm = MatchInfo.Imm;
1873
1874 if (Imm >= ScalarSizeInBits) {
1875 // Any logical shift that exceeds scalar size will produce zero.
1876 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1877 Builder.buildConstant(MI.getOperand(0), 0);
1878 MI.eraseFromParent();
1879 return;
1880 }
1881 // Arithmetic shift and saturating signed left shift have no effect beyond
1882 // scalar size.
1883 Imm = ScalarSizeInBits - 1;
1884 }
1885
1886 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1887 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1888 Observer.changingInstr(MI);
1889 MI.getOperand(1).setReg(MatchInfo.Reg);
1890 MI.getOperand(2).setReg(NewImm);
1891 Observer.changedInstr(MI);
1892}
1893
1895 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1896 // We're trying to match the following pattern with any of
1897 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1898 // with any of G_AND/G_OR/G_XOR logic instructions.
1899 // %t1 = SHIFT %X, G_CONSTANT C0
1900 // %t2 = LOGIC %t1, %Y
1901 // %root = SHIFT %t2, G_CONSTANT C1
1902 // -->
1903 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1904 // %t4 = SHIFT %Y, G_CONSTANT C1
1905 // %root = LOGIC %t3, %t4
1906 unsigned ShiftOpcode = MI.getOpcode();
1907 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1908 ShiftOpcode == TargetOpcode::G_ASHR ||
1909 ShiftOpcode == TargetOpcode::G_LSHR ||
1910 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1911 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1912 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1913
1914 // Match a one-use bitwise logic op.
1915 Register LogicDest = MI.getOperand(1).getReg();
1916 if (!MRI.hasOneNonDBGUse(LogicDest))
1917 return false;
1918
1919 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1920 unsigned LogicOpcode = LogicMI->getOpcode();
1921 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1922 LogicOpcode != TargetOpcode::G_XOR)
1923 return false;
1924
1925 // Find a matching one-use shift by constant.
1926 const Register C1 = MI.getOperand(2).getReg();
1927 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1928 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1929 return false;
1930
1931 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1932
1933 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1934 // Shift should match previous one and should be a one-use.
1935 if (MI->getOpcode() != ShiftOpcode ||
1936 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1937 return false;
1938
1939 // Must be a constant.
1940 auto MaybeImmVal =
1941 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1942 if (!MaybeImmVal)
1943 return false;
1944
1945 ShiftVal = MaybeImmVal->Value.getSExtValue();
1946 return true;
1947 };
1948
1949 // Logic ops are commutative, so check each operand for a match.
1950 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1951 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1952 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1953 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1954 uint64_t C0Val;
1955
1956 if (matchFirstShift(LogicMIOp1, C0Val)) {
1957 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1958 MatchInfo.Shift2 = LogicMIOp1;
1959 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1960 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1961 MatchInfo.Shift2 = LogicMIOp2;
1962 } else
1963 return false;
1964
1965 MatchInfo.ValSum = C0Val + C1Val;
1966
1967 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1968 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1969 return false;
1970
1971 MatchInfo.Logic = LogicMI;
1972 return true;
1973}
1974
1976 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1977 unsigned Opcode = MI.getOpcode();
1978 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1979 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1980 Opcode == TargetOpcode::G_SSHLSAT) &&
1981 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1982
1983 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1984 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1985
1986 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1987
1988 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1989 Register Shift1 =
1990 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1991
1992 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1993 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1994 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1995 // remove old shift1. And it will cause crash later. So erase it earlier to
1996 // avoid the crash.
1997 MatchInfo.Shift2->eraseFromParent();
1998
1999 Register Shift2Const = MI.getOperand(2).getReg();
2000 Register Shift2 = Builder
2001 .buildInstr(Opcode, {DestType},
2002 {MatchInfo.LogicNonShiftReg, Shift2Const})
2003 .getReg(0);
2004
2005 Register Dest = MI.getOperand(0).getReg();
2006 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2007
2008 // This was one use so it's safe to remove it.
2009 MatchInfo.Logic->eraseFromParent();
2010
2011 MI.eraseFromParent();
2012}
2013
2015 BuildFnTy &MatchInfo) const {
2016 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2017 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2018 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2019 auto &Shl = cast<GenericMachineInstr>(MI);
2020 Register DstReg = Shl.getReg(0);
2021 Register SrcReg = Shl.getReg(1);
2022 Register ShiftReg = Shl.getReg(2);
2023 Register X, C1;
2024
2025 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2026 return false;
2027
2028 if (!mi_match(SrcReg, MRI,
2030 m_GOr(m_Reg(X), m_Reg(C1))))))
2031 return false;
2032
2033 APInt C1Val, C2Val;
2034 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2035 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2036 return false;
2037
2038 auto *SrcDef = MRI.getVRegDef(SrcReg);
2039 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2040 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2041 LLT SrcTy = MRI.getType(SrcReg);
2042 MatchInfo = [=](MachineIRBuilder &B) {
2043 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2044 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2045 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2046 };
2047 return true;
2048}
2049
2051 LshrOfTruncOfLshr &MatchInfo,
2052 MachineInstr &ShiftMI) const {
2053 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2054
2055 Register N0 = MI.getOperand(1).getReg();
2056 Register N1 = MI.getOperand(2).getReg();
2057 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2058
2059 APInt N1C, N001C;
2060 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2061 return false;
2062 auto N001 = ShiftMI.getOperand(2).getReg();
2063 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2064 return false;
2065
2066 if (N001C.getBitWidth() > N1C.getBitWidth())
2067 N1C = N1C.zext(N001C.getBitWidth());
2068 else
2069 N001C = N001C.zext(N1C.getBitWidth());
2070
2071 Register InnerShift = ShiftMI.getOperand(0).getReg();
2072 LLT InnerShiftTy = MRI.getType(InnerShift);
2073 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2074 if ((N1C + N001C).ult(InnerShiftSize)) {
2075 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2076 MatchInfo.ShiftAmt = N1C + N001C;
2077 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2078 MatchInfo.InnerShiftTy = InnerShiftTy;
2079
2080 if ((N001C + OpSizeInBits) == InnerShiftSize)
2081 return true;
2082 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2083 MatchInfo.Mask = true;
2084 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2085 return true;
2086 }
2087 }
2088 return false;
2089}
2090
2092 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2093 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2094
2095 Register Dst = MI.getOperand(0).getReg();
2096 auto ShiftAmt =
2097 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2098 auto Shift =
2099 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2100 if (MatchInfo.Mask == true) {
2101 APInt MaskVal =
2103 MatchInfo.MaskVal.getZExtValue());
2104 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2105 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2106 Builder.buildTrunc(Dst, And);
2107 } else
2108 Builder.buildTrunc(Dst, Shift);
2109 MI.eraseFromParent();
2110}
2111
2113 unsigned &ShiftVal) const {
2114 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2115 auto MaybeImmVal =
2116 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2117 if (!MaybeImmVal)
2118 return false;
2119
2120 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2121 return (static_cast<int32_t>(ShiftVal) != -1);
2122}
2123
2125 unsigned &ShiftVal) const {
2126 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2127 MachineIRBuilder MIB(MI);
2128 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2129 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2130 Observer.changingInstr(MI);
2131 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2132 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2133 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2135 Observer.changedInstr(MI);
2136}
2137
2139 BuildFnTy &MatchInfo) const {
2140 GSub &Sub = cast<GSub>(MI);
2141
2142 LLT Ty = MRI.getType(Sub.getReg(0));
2143
2144 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2145 return false;
2146
2148 return false;
2149
2150 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2151
2152 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2153 auto NegCst = B.buildConstant(Ty, -Imm);
2154 Observer.changingInstr(MI);
2155 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2156 MI.getOperand(2).setReg(NegCst.getReg(0));
2158 if (Imm.isMinSignedValue())
2160 Observer.changedInstr(MI);
2161 };
2162 return true;
2163}
2164
2165// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2167 RegisterImmPair &MatchData) const {
2168 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2169 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2170 return false;
2171
2172 Register LHS = MI.getOperand(1).getReg();
2173
2174 Register ExtSrc;
2175 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2176 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2177 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2178 return false;
2179
2180 Register RHS = MI.getOperand(2).getReg();
2181 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2182 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2183 if (!MaybeShiftAmtVal)
2184 return false;
2185
2186 if (LI) {
2187 LLT SrcTy = MRI.getType(ExtSrc);
2188
2189 // We only really care about the legality with the shifted value. We can
2190 // pick any type the constant shift amount, so ask the target what to
2191 // use. Otherwise we would have to guess and hope it is reported as legal.
2192 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2193 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2194 return false;
2195 }
2196
2197 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2198 MatchData.Reg = ExtSrc;
2199 MatchData.Imm = ShiftAmt;
2200
2201 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2202 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2203 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2204}
2205
2207 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2208 Register ExtSrcReg = MatchData.Reg;
2209 int64_t ShiftAmtVal = MatchData.Imm;
2210
2211 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2212 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2213 auto NarrowShift =
2214 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2215 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2216 MI.eraseFromParent();
2217}
2218
2220 Register &MatchInfo) const {
2222 SmallVector<Register, 16> MergedValues;
2223 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2224 MergedValues.emplace_back(Merge.getSourceReg(I));
2225
2226 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2227 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2228 return false;
2229
2230 for (unsigned I = 0; I < MergedValues.size(); ++I)
2231 if (MergedValues[I] != Unmerge->getReg(I))
2232 return false;
2233
2234 MatchInfo = Unmerge->getSourceReg();
2235 return true;
2236}
2237
2239 const MachineRegisterInfo &MRI) {
2240 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2241 ;
2242
2243 return Reg;
2244}
2245
2247 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2248 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2249 "Expected an unmerge");
2250 auto &Unmerge = cast<GUnmerge>(MI);
2251 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2252
2253 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2254 if (!SrcInstr)
2255 return false;
2256
2257 // Check the source type of the merge.
2258 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2259 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2260 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2261 if (SrcMergeTy != Dst0Ty && !SameSize)
2262 return false;
2263 // They are the same now (modulo a bitcast).
2264 // We can collect all the src registers.
2265 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2266 Operands.push_back(SrcInstr->getSourceReg(Idx));
2267 return true;
2268}
2269
2271 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2272 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2273 "Expected an unmerge");
2274 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2275 "Not enough operands to replace all defs");
2276 unsigned NumElems = MI.getNumOperands() - 1;
2277
2278 LLT SrcTy = MRI.getType(Operands[0]);
2279 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2280 bool CanReuseInputDirectly = DstTy == SrcTy;
2281 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2282 Register DstReg = MI.getOperand(Idx).getReg();
2283 Register SrcReg = Operands[Idx];
2284
2285 // This combine may run after RegBankSelect, so we need to be aware of
2286 // register banks.
2287 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2288 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2289 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2290 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2291 }
2292
2293 if (CanReuseInputDirectly)
2294 replaceRegWith(MRI, DstReg, SrcReg);
2295 else
2296 Builder.buildCast(DstReg, SrcReg);
2297 }
2298 MI.eraseFromParent();
2299}
2300
2302 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2303 unsigned SrcIdx = MI.getNumOperands() - 1;
2304 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2305 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2306 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2307 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2308 return false;
2309 // Break down the big constant in smaller ones.
2310 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2311 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2312 ? CstVal.getCImm()->getValue()
2313 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2314
2315 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2316 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2317 // Unmerge a constant.
2318 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2319 Csts.emplace_back(Val.trunc(ShiftAmt));
2320 Val = Val.lshr(ShiftAmt);
2321 }
2322
2323 return true;
2324}
2325
2327 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2328 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2329 "Expected an unmerge");
2330 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2331 "Not enough operands to replace all defs");
2332 unsigned NumElems = MI.getNumOperands() - 1;
2333 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2334 Register DstReg = MI.getOperand(Idx).getReg();
2335 Builder.buildConstant(DstReg, Csts[Idx]);
2336 }
2337
2338 MI.eraseFromParent();
2339}
2340
2343 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2344 unsigned SrcIdx = MI.getNumOperands() - 1;
2345 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2346 MatchInfo = [&MI](MachineIRBuilder &B) {
2347 unsigned NumElems = MI.getNumOperands() - 1;
2348 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2349 Register DstReg = MI.getOperand(Idx).getReg();
2350 B.buildUndef(DstReg);
2351 }
2352 };
2353 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2354}
2355
2357 MachineInstr &MI) const {
2358 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2359 "Expected an unmerge");
2360 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2361 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2362 return false;
2363 // Check that all the lanes are dead except the first one.
2364 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2365 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2366 return false;
2367 }
2368 return true;
2369}
2370
2372 MachineInstr &MI) const {
2373 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2374 Register Dst0Reg = MI.getOperand(0).getReg();
2375 Builder.buildTrunc(Dst0Reg, SrcReg);
2376 MI.eraseFromParent();
2377}
2378
2380 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2381 "Expected an unmerge");
2382 Register Dst0Reg = MI.getOperand(0).getReg();
2383 LLT Dst0Ty = MRI.getType(Dst0Reg);
2384 // G_ZEXT on vector applies to each lane, so it will
2385 // affect all destinations. Therefore we won't be able
2386 // to simplify the unmerge to just the first definition.
2387 if (Dst0Ty.isVector())
2388 return false;
2389 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2390 LLT SrcTy = MRI.getType(SrcReg);
2391 if (SrcTy.isVector())
2392 return false;
2393
2394 Register ZExtSrcReg;
2395 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2396 return false;
2397
2398 // Finally we can replace the first definition with
2399 // a zext of the source if the definition is big enough to hold
2400 // all of ZExtSrc bits.
2401 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2402 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2403}
2404
2406 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2407 "Expected an unmerge");
2408
2409 Register Dst0Reg = MI.getOperand(0).getReg();
2410
2411 MachineInstr *ZExtInstr =
2412 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2413 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2414 "Expecting a G_ZEXT");
2415
2416 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2417 LLT Dst0Ty = MRI.getType(Dst0Reg);
2418 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2419
2420 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2421 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2422 } else {
2423 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2424 "ZExt src doesn't fit in destination");
2425 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2426 }
2427
2428 Register ZeroReg;
2429 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2430 if (!ZeroReg)
2431 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2432 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2433 }
2434 MI.eraseFromParent();
2435}
2436
2438 unsigned TargetShiftSize,
2439 unsigned &ShiftVal) const {
2440 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2441 MI.getOpcode() == TargetOpcode::G_LSHR ||
2442 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2443
2444 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2445 if (Ty.isVector()) // TODO:
2446 return false;
2447
2448 // Don't narrow further than the requested size.
2449 unsigned Size = Ty.getSizeInBits();
2450 if (Size <= TargetShiftSize)
2451 return false;
2452
2453 auto MaybeImmVal =
2454 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2455 if (!MaybeImmVal)
2456 return false;
2457
2458 ShiftVal = MaybeImmVal->Value.getSExtValue();
2459 return ShiftVal >= Size / 2 && ShiftVal < Size;
2460}
2461
2463 MachineInstr &MI, const unsigned &ShiftVal) const {
2464 Register DstReg = MI.getOperand(0).getReg();
2465 Register SrcReg = MI.getOperand(1).getReg();
2466 LLT Ty = MRI.getType(SrcReg);
2467 unsigned Size = Ty.getSizeInBits();
2468 unsigned HalfSize = Size / 2;
2469 assert(ShiftVal >= HalfSize);
2470
2471 LLT HalfTy = LLT::scalar(HalfSize);
2472
2473 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2474 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2475
2476 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2477 Register Narrowed = Unmerge.getReg(1);
2478
2479 // dst = G_LSHR s64:x, C for C >= 32
2480 // =>
2481 // lo, hi = G_UNMERGE_VALUES x
2482 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2483
2484 if (NarrowShiftAmt != 0) {
2485 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2486 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2487 }
2488
2489 auto Zero = Builder.buildConstant(HalfTy, 0);
2490 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2491 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2492 Register Narrowed = Unmerge.getReg(0);
2493 // dst = G_SHL s64:x, C for C >= 32
2494 // =>
2495 // lo, hi = G_UNMERGE_VALUES x
2496 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2497 if (NarrowShiftAmt != 0) {
2498 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2499 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2500 }
2501
2502 auto Zero = Builder.buildConstant(HalfTy, 0);
2503 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2504 } else {
2505 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2506 auto Hi = Builder.buildAShr(
2507 HalfTy, Unmerge.getReg(1),
2508 Builder.buildConstant(HalfTy, HalfSize - 1));
2509
2510 if (ShiftVal == HalfSize) {
2511 // (G_ASHR i64:x, 32) ->
2512 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2513 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2514 } else if (ShiftVal == Size - 1) {
2515 // Don't need a second shift.
2516 // (G_ASHR i64:x, 63) ->
2517 // %narrowed = (G_ASHR hi_32(x), 31)
2518 // G_MERGE_VALUES %narrowed, %narrowed
2519 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2520 } else {
2521 auto Lo = Builder.buildAShr(
2522 HalfTy, Unmerge.getReg(1),
2523 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2524
2525 // (G_ASHR i64:x, C) ->, for C >= 32
2526 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2527 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2528 }
2529 }
2530
2531 MI.eraseFromParent();
2532}
2533
2535 MachineInstr &MI, unsigned TargetShiftAmount) const {
2536 unsigned ShiftAmt;
2537 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2538 applyCombineShiftToUnmerge(MI, ShiftAmt);
2539 return true;
2540 }
2541
2542 return false;
2543}
2544
2546 Register &Reg) const {
2547 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2548 Register DstReg = MI.getOperand(0).getReg();
2549 LLT DstTy = MRI.getType(DstReg);
2550 Register SrcReg = MI.getOperand(1).getReg();
2551 return mi_match(SrcReg, MRI,
2552 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2553}
2554
2556 Register &Reg) const {
2557 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2558 Register DstReg = MI.getOperand(0).getReg();
2559 Builder.buildCopy(DstReg, Reg);
2560 MI.eraseFromParent();
2561}
2562
2564 Register &Reg) const {
2565 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2566 Register DstReg = MI.getOperand(0).getReg();
2567 Builder.buildZExtOrTrunc(DstReg, Reg);
2568 MI.eraseFromParent();
2569}
2570
2572 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2573 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2574 Register LHS = MI.getOperand(1).getReg();
2575 Register RHS = MI.getOperand(2).getReg();
2576 LLT IntTy = MRI.getType(LHS);
2577
2578 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2579 // instruction.
2580 PtrReg.second = false;
2581 for (Register SrcReg : {LHS, RHS}) {
2582 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2583 // Don't handle cases where the integer is implicitly converted to the
2584 // pointer width.
2585 LLT PtrTy = MRI.getType(PtrReg.first);
2586 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2587 return true;
2588 }
2589
2590 PtrReg.second = true;
2591 }
2592
2593 return false;
2594}
2595
2597 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2598 Register Dst = MI.getOperand(0).getReg();
2599 Register LHS = MI.getOperand(1).getReg();
2600 Register RHS = MI.getOperand(2).getReg();
2601
2602 const bool DoCommute = PtrReg.second;
2603 if (DoCommute)
2604 std::swap(LHS, RHS);
2605 LHS = PtrReg.first;
2606
2607 LLT PtrTy = MRI.getType(LHS);
2608
2609 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2610 Builder.buildPtrToInt(Dst, PtrAdd);
2611 MI.eraseFromParent();
2612}
2613
2615 APInt &NewCst) const {
2616 auto &PtrAdd = cast<GPtrAdd>(MI);
2617 Register LHS = PtrAdd.getBaseReg();
2618 Register RHS = PtrAdd.getOffsetReg();
2619 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2620
2621 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2622 APInt Cst;
2623 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2624 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2625 // G_INTTOPTR uses zero-extension
2626 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2627 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2628 return true;
2629 }
2630 }
2631
2632 return false;
2633}
2634
2636 APInt &NewCst) const {
2637 auto &PtrAdd = cast<GPtrAdd>(MI);
2638 Register Dst = PtrAdd.getReg(0);
2639
2640 Builder.buildConstant(Dst, NewCst);
2641 PtrAdd.eraseFromParent();
2642}
2643
2645 Register &Reg) const {
2646 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2647 Register DstReg = MI.getOperand(0).getReg();
2648 Register SrcReg = MI.getOperand(1).getReg();
2649 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2650 if (OriginalSrcReg.isValid())
2651 SrcReg = OriginalSrcReg;
2652 LLT DstTy = MRI.getType(DstReg);
2653 return mi_match(SrcReg, MRI,
2654 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2655 canReplaceReg(DstReg, Reg, MRI);
2656}
2657
2659 Register &Reg) const {
2660 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2661 Register DstReg = MI.getOperand(0).getReg();
2662 Register SrcReg = MI.getOperand(1).getReg();
2663 LLT DstTy = MRI.getType(DstReg);
2664 if (mi_match(SrcReg, MRI,
2665 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2666 canReplaceReg(DstReg, Reg, MRI)) {
2667 unsigned DstSize = DstTy.getScalarSizeInBits();
2668 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2669 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2670 }
2671 return false;
2672}
2673
2675 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2676 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2677
2678 // ShiftTy > 32 > TruncTy -> 32
2679 if (ShiftSize > 32 && TruncSize < 32)
2680 return ShiftTy.changeElementSize(32);
2681
2682 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2683 // Some targets like it, some don't, some only like it under certain
2684 // conditions/processor versions, etc.
2685 // A TL hook might be needed for this.
2686
2687 // Don't combine
2688 return ShiftTy;
2689}
2690
2692 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2693 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2694 Register DstReg = MI.getOperand(0).getReg();
2695 Register SrcReg = MI.getOperand(1).getReg();
2696
2697 if (!MRI.hasOneNonDBGUse(SrcReg))
2698 return false;
2699
2700 LLT SrcTy = MRI.getType(SrcReg);
2701 LLT DstTy = MRI.getType(DstReg);
2702
2703 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2704 const auto &TL = getTargetLowering();
2705
2706 LLT NewShiftTy;
2707 switch (SrcMI->getOpcode()) {
2708 default:
2709 return false;
2710 case TargetOpcode::G_SHL: {
2711 NewShiftTy = DstTy;
2712
2713 // Make sure new shift amount is legal.
2714 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2715 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2716 return false;
2717 break;
2718 }
2719 case TargetOpcode::G_LSHR:
2720 case TargetOpcode::G_ASHR: {
2721 // For right shifts, we conservatively do not do the transform if the TRUNC
2722 // has any STORE users. The reason is that if we change the type of the
2723 // shift, we may break the truncstore combine.
2724 //
2725 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2726 for (auto &User : MRI.use_instructions(DstReg))
2727 if (User.getOpcode() == TargetOpcode::G_STORE)
2728 return false;
2729
2730 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2731 if (NewShiftTy == SrcTy)
2732 return false;
2733
2734 // Make sure we won't lose information by truncating the high bits.
2735 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2736 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2737 DstTy.getScalarSizeInBits()))
2738 return false;
2739 break;
2740 }
2741 }
2742
2744 {SrcMI->getOpcode(),
2745 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2746 return false;
2747
2748 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2749 return true;
2750}
2751
2753 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2754 MachineInstr *ShiftMI = MatchInfo.first;
2755 LLT NewShiftTy = MatchInfo.second;
2756
2757 Register Dst = MI.getOperand(0).getReg();
2758 LLT DstTy = MRI.getType(Dst);
2759
2760 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2761 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2762 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2763
2764 Register NewShift =
2765 Builder
2766 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2767 .getReg(0);
2768
2769 if (NewShiftTy == DstTy)
2770 replaceRegWith(MRI, Dst, NewShift);
2771 else
2772 Builder.buildTrunc(Dst, NewShift);
2773
2774 eraseInst(MI);
2775}
2776
2778 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2779 return MO.isReg() &&
2780 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2781 });
2782}
2783
2785 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2786 return !MO.isReg() ||
2787 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2788 });
2789}
2790
2792 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2793 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2794 return all_of(Mask, [](int Elt) { return Elt < 0; });
2795}
2796
2798 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2799 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2800 MRI);
2801}
2802
2804 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2805 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2806 MRI);
2807}
2808
2810 MachineInstr &MI) const {
2811 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2812 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2813 "Expected an insert/extract element op");
2814 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2815 if (VecTy.isScalableVector())
2816 return false;
2817
2818 unsigned IdxIdx =
2819 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2820 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2821 if (!Idx)
2822 return false;
2823 return Idx->getZExtValue() >= VecTy.getNumElements();
2824}
2825
2827 unsigned &OpIdx) const {
2828 GSelect &SelMI = cast<GSelect>(MI);
2829 auto Cst =
2830 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2831 if (!Cst)
2832 return false;
2833 OpIdx = Cst->isZero() ? 3 : 2;
2834 return true;
2835}
2836
2837void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2838
2840 const MachineOperand &MOP2) const {
2841 if (!MOP1.isReg() || !MOP2.isReg())
2842 return false;
2843 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2844 if (!InstAndDef1)
2845 return false;
2846 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2847 if (!InstAndDef2)
2848 return false;
2849 MachineInstr *I1 = InstAndDef1->MI;
2850 MachineInstr *I2 = InstAndDef2->MI;
2851
2852 // Handle a case like this:
2853 //
2854 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2855 //
2856 // Even though %0 and %1 are produced by the same instruction they are not
2857 // the same values.
2858 if (I1 == I2)
2859 return MOP1.getReg() == MOP2.getReg();
2860
2861 // If we have an instruction which loads or stores, we can't guarantee that
2862 // it is identical.
2863 //
2864 // For example, we may have
2865 //
2866 // %x1 = G_LOAD %addr (load N from @somewhere)
2867 // ...
2868 // call @foo
2869 // ...
2870 // %x2 = G_LOAD %addr (load N from @somewhere)
2871 // ...
2872 // %or = G_OR %x1, %x2
2873 //
2874 // It's possible that @foo will modify whatever lives at the address we're
2875 // loading from. To be safe, let's just assume that all loads and stores
2876 // are different (unless we have something which is guaranteed to not
2877 // change.)
2878 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2879 return false;
2880
2881 // If both instructions are loads or stores, they are equal only if both
2882 // are dereferenceable invariant loads with the same number of bits.
2883 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2886 if (!LS1 || !LS2)
2887 return false;
2888
2889 if (!I2->isDereferenceableInvariantLoad() ||
2890 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2891 return false;
2892 }
2893
2894 // Check for physical registers on the instructions first to avoid cases
2895 // like this:
2896 //
2897 // %a = COPY $physreg
2898 // ...
2899 // SOMETHING implicit-def $physreg
2900 // ...
2901 // %b = COPY $physreg
2902 //
2903 // These copies are not equivalent.
2904 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2905 return MO.isReg() && MO.getReg().isPhysical();
2906 })) {
2907 // Check if we have a case like this:
2908 //
2909 // %a = COPY $physreg
2910 // %b = COPY %a
2911 //
2912 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2913 // From that, we know that they must have the same value, since they must
2914 // have come from the same COPY.
2915 return I1->isIdenticalTo(*I2);
2916 }
2917
2918 // We don't have any physical registers, so we don't necessarily need the
2919 // same vreg defs.
2920 //
2921 // On the off-chance that there's some target instruction feeding into the
2922 // instruction, let's use produceSameValue instead of isIdenticalTo.
2923 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2924 // Handle instructions with multiple defs that produce same values. Values
2925 // are same for operands with same index.
2926 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2927 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2928 // I1 and I2 are different instructions but produce same values,
2929 // %1 and %6 are same, %1 and %7 are not the same value.
2930 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2931 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2932 }
2933 return false;
2934}
2935
2937 int64_t C) const {
2938 if (!MOP.isReg())
2939 return false;
2940 auto *MI = MRI.getVRegDef(MOP.getReg());
2941 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2942 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2943 MaybeCst->getSExtValue() == C;
2944}
2945
2947 double C) const {
2948 if (!MOP.isReg())
2949 return false;
2950 std::optional<FPValueAndVReg> MaybeCst;
2951 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2952 return false;
2953
2954 return MaybeCst->Value.isExactlyValue(C);
2955}
2956
2958 unsigned OpIdx) const {
2959 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2960 Register OldReg = MI.getOperand(0).getReg();
2961 Register Replacement = MI.getOperand(OpIdx).getReg();
2962 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2963 replaceRegWith(MRI, OldReg, Replacement);
2964 MI.eraseFromParent();
2965}
2966
2968 Register Replacement) const {
2969 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2970 Register OldReg = MI.getOperand(0).getReg();
2971 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2972 replaceRegWith(MRI, OldReg, Replacement);
2973 MI.eraseFromParent();
2974}
2975
2977 unsigned ConstIdx) const {
2978 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2979 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2980
2981 // Get the shift amount
2982 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2983 if (!VRegAndVal)
2984 return false;
2985
2986 // Return true of shift amount >= Bitwidth
2987 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2988}
2989
2991 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2992 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2993 "This is not a funnel shift operation");
2994
2995 Register ConstReg = MI.getOperand(3).getReg();
2996 LLT ConstTy = MRI.getType(ConstReg);
2997 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2998
2999 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3000 assert((VRegAndVal) && "Value is not a constant");
3001
3002 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3003 APInt NewConst = VRegAndVal->Value.urem(
3004 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3005
3006 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3007 Builder.buildInstr(
3008 MI.getOpcode(), {MI.getOperand(0)},
3009 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3010
3011 MI.eraseFromParent();
3012}
3013
3015 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3016 // Match (cond ? x : x)
3017 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3018 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3019 MRI);
3020}
3021
3023 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3024 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3025 MRI);
3026}
3027
3029 unsigned OpIdx) const {
3030 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
3031 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
3032 MRI);
3033}
3034
3036 unsigned OpIdx) const {
3037 MachineOperand &MO = MI.getOperand(OpIdx);
3038 return MO.isReg() &&
3039 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3040}
3041
3043 unsigned OpIdx) const {
3044 MachineOperand &MO = MI.getOperand(OpIdx);
3045 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3046}
3047
3049 double C) const {
3050 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3051 Builder.buildFConstant(MI.getOperand(0), C);
3052 MI.eraseFromParent();
3053}
3054
3056 int64_t C) const {
3057 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3058 Builder.buildConstant(MI.getOperand(0), C);
3059 MI.eraseFromParent();
3060}
3061
3063 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3064 Builder.buildConstant(MI.getOperand(0), C);
3065 MI.eraseFromParent();
3066}
3067
3069 ConstantFP *CFP) const {
3070 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3071 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3072 MI.eraseFromParent();
3073}
3074
3076 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3077 Builder.buildUndef(MI.getOperand(0));
3078 MI.eraseFromParent();
3079}
3080
3082 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3083 Register LHS = MI.getOperand(1).getReg();
3084 Register RHS = MI.getOperand(2).getReg();
3085 Register &NewLHS = std::get<0>(MatchInfo);
3086 Register &NewRHS = std::get<1>(MatchInfo);
3087
3088 // Helper lambda to check for opportunities for
3089 // ((0-A) + B) -> B - A
3090 // (A + (0-B)) -> A - B
3091 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3092 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3093 return false;
3094 NewLHS = MaybeNewLHS;
3095 return true;
3096 };
3097
3098 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3099}
3100
3102 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3103 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3104 "Invalid opcode");
3105 Register DstReg = MI.getOperand(0).getReg();
3106 LLT DstTy = MRI.getType(DstReg);
3107 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3108
3109 if (DstTy.isScalableVector())
3110 return false;
3111
3112 unsigned NumElts = DstTy.getNumElements();
3113 // If this MI is part of a sequence of insert_vec_elts, then
3114 // don't do the combine in the middle of the sequence.
3115 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3116 TargetOpcode::G_INSERT_VECTOR_ELT)
3117 return false;
3118 MachineInstr *CurrInst = &MI;
3119 MachineInstr *TmpInst;
3120 int64_t IntImm;
3121 Register TmpReg;
3122 MatchInfo.resize(NumElts);
3123 while (mi_match(
3124 CurrInst->getOperand(0).getReg(), MRI,
3125 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3126 if (IntImm >= NumElts || IntImm < 0)
3127 return false;
3128 if (!MatchInfo[IntImm])
3129 MatchInfo[IntImm] = TmpReg;
3130 CurrInst = TmpInst;
3131 }
3132 // Variable index.
3133 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3134 return false;
3135 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3136 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3137 if (!MatchInfo[I - 1].isValid())
3138 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3139 }
3140 return true;
3141 }
3142 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3143 // overwritten, bail out.
3144 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3145 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3146}
3147
3149 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3150 Register UndefReg;
3151 auto GetUndef = [&]() {
3152 if (UndefReg)
3153 return UndefReg;
3154 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3155 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3156 return UndefReg;
3157 };
3158 for (Register &Reg : MatchInfo) {
3159 if (!Reg)
3160 Reg = GetUndef();
3161 }
3162 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3163 MI.eraseFromParent();
3164}
3165
3167 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3168 Register SubLHS, SubRHS;
3169 std::tie(SubLHS, SubRHS) = MatchInfo;
3170 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3171 MI.eraseFromParent();
3172}
3173
3175 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3176 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3177 //
3178 // Creates the new hand + logic instruction (but does not insert them.)
3179 //
3180 // On success, MatchInfo is populated with the new instructions. These are
3181 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3182 unsigned LogicOpcode = MI.getOpcode();
3183 assert(LogicOpcode == TargetOpcode::G_AND ||
3184 LogicOpcode == TargetOpcode::G_OR ||
3185 LogicOpcode == TargetOpcode::G_XOR);
3186 MachineIRBuilder MIB(MI);
3187 Register Dst = MI.getOperand(0).getReg();
3188 Register LHSReg = MI.getOperand(1).getReg();
3189 Register RHSReg = MI.getOperand(2).getReg();
3190
3191 // Don't recompute anything.
3192 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3193 return false;
3194
3195 // Make sure we have (hand x, ...), (hand y, ...)
3196 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3197 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3198 if (!LeftHandInst || !RightHandInst)
3199 return false;
3200 unsigned HandOpcode = LeftHandInst->getOpcode();
3201 if (HandOpcode != RightHandInst->getOpcode())
3202 return false;
3203 if (LeftHandInst->getNumOperands() < 2 ||
3204 !LeftHandInst->getOperand(1).isReg() ||
3205 RightHandInst->getNumOperands() < 2 ||
3206 !RightHandInst->getOperand(1).isReg())
3207 return false;
3208
3209 // Make sure the types match up, and if we're doing this post-legalization,
3210 // we end up with legal types.
3211 Register X = LeftHandInst->getOperand(1).getReg();
3212 Register Y = RightHandInst->getOperand(1).getReg();
3213 LLT XTy = MRI.getType(X);
3214 LLT YTy = MRI.getType(Y);
3215 if (!XTy.isValid() || XTy != YTy)
3216 return false;
3217
3218 // Optional extra source register.
3219 Register ExtraHandOpSrcReg;
3220 switch (HandOpcode) {
3221 default:
3222 return false;
3223 case TargetOpcode::G_ANYEXT:
3224 case TargetOpcode::G_SEXT:
3225 case TargetOpcode::G_ZEXT: {
3226 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3227 break;
3228 }
3229 case TargetOpcode::G_TRUNC: {
3230 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3231 const MachineFunction *MF = MI.getMF();
3232 LLVMContext &Ctx = MF->getFunction().getContext();
3233
3234 LLT DstTy = MRI.getType(Dst);
3235 const TargetLowering &TLI = getTargetLowering();
3236
3237 // Be extra careful sinking truncate. If it's free, there's no benefit in
3238 // widening a binop.
3239 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3240 return false;
3241 break;
3242 }
3243 case TargetOpcode::G_AND:
3244 case TargetOpcode::G_ASHR:
3245 case TargetOpcode::G_LSHR:
3246 case TargetOpcode::G_SHL: {
3247 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3248 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3249 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3250 return false;
3251 ExtraHandOpSrcReg = ZOp.getReg();
3252 break;
3253 }
3254 }
3255
3256 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3257 return false;
3258
3259 // Record the steps to build the new instructions.
3260 //
3261 // Steps to build (logic x, y)
3262 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3263 OperandBuildSteps LogicBuildSteps = {
3264 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3265 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3266 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3267 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3268
3269 // Steps to build hand (logic x, y), ...z
3270 OperandBuildSteps HandBuildSteps = {
3271 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3272 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3273 if (ExtraHandOpSrcReg.isValid())
3274 HandBuildSteps.push_back(
3275 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3276 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3277
3278 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3279 return true;
3280}
3281
3283 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3284 assert(MatchInfo.InstrsToBuild.size() &&
3285 "Expected at least one instr to build?");
3286 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3287 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3288 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3289 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3290 for (auto &OperandFn : InstrToBuild.OperandFns)
3291 OperandFn(Instr);
3292 }
3293 MI.eraseFromParent();
3294}
3295
3297 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3298 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3299 int64_t ShlCst, AshrCst;
3300 Register Src;
3301 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3302 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3303 m_ICstOrSplat(AshrCst))))
3304 return false;
3305 if (ShlCst != AshrCst)
3306 return false;
3308 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3309 return false;
3310 MatchInfo = std::make_tuple(Src, ShlCst);
3311 return true;
3312}
3313
3315 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3316 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3317 Register Src;
3318 int64_t ShiftAmt;
3319 std::tie(Src, ShiftAmt) = MatchInfo;
3320 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3321 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3322 MI.eraseFromParent();
3323}
3324
3325/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3328 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3329 assert(MI.getOpcode() == TargetOpcode::G_AND);
3330
3331 Register Dst = MI.getOperand(0).getReg();
3332 LLT Ty = MRI.getType(Dst);
3333
3334 Register R;
3335 int64_t C1;
3336 int64_t C2;
3337 if (!mi_match(
3338 Dst, MRI,
3339 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3340 return false;
3341
3342 MatchInfo = [=](MachineIRBuilder &B) {
3343 if (C1 & C2) {
3344 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3345 return;
3346 }
3347 auto Zero = B.buildConstant(Ty, 0);
3348 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3349 };
3350 return true;
3351}
3352
3354 Register &Replacement) const {
3355 // Given
3356 //
3357 // %y:_(sN) = G_SOMETHING
3358 // %x:_(sN) = G_SOMETHING
3359 // %res:_(sN) = G_AND %x, %y
3360 //
3361 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3362 //
3363 // Patterns like this can appear as a result of legalization. E.g.
3364 //
3365 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3366 // %one:_(s32) = G_CONSTANT i32 1
3367 // %and:_(s32) = G_AND %cmp, %one
3368 //
3369 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3370 assert(MI.getOpcode() == TargetOpcode::G_AND);
3371 if (!VT)
3372 return false;
3373
3374 Register AndDst = MI.getOperand(0).getReg();
3375 Register LHS = MI.getOperand(1).getReg();
3376 Register RHS = MI.getOperand(2).getReg();
3377
3378 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3379 // we can't do anything. If we do, then it depends on whether we have
3380 // KnownBits on the LHS.
3381 KnownBits RHSBits = VT->getKnownBits(RHS);
3382 if (RHSBits.isUnknown())
3383 return false;
3384
3385 KnownBits LHSBits = VT->getKnownBits(LHS);
3386
3387 // Check that x & Mask == x.
3388 // x & 1 == x, always
3389 // x & 0 == x, only if x is also 0
3390 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3391 //
3392 // Check if we can replace AndDst with the LHS of the G_AND
3393 if (canReplaceReg(AndDst, LHS, MRI) &&
3394 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3395 Replacement = LHS;
3396 return true;
3397 }
3398
3399 // Check if we can replace AndDst with the RHS of the G_AND
3400 if (canReplaceReg(AndDst, RHS, MRI) &&
3401 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3402 Replacement = RHS;
3403 return true;
3404 }
3405
3406 return false;
3407}
3408
3410 Register &Replacement) const {
3411 // Given
3412 //
3413 // %y:_(sN) = G_SOMETHING
3414 // %x:_(sN) = G_SOMETHING
3415 // %res:_(sN) = G_OR %x, %y
3416 //
3417 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3418 assert(MI.getOpcode() == TargetOpcode::G_OR);
3419 if (!VT)
3420 return false;
3421
3422 Register OrDst = MI.getOperand(0).getReg();
3423 Register LHS = MI.getOperand(1).getReg();
3424 Register RHS = MI.getOperand(2).getReg();
3425
3426 KnownBits LHSBits = VT->getKnownBits(LHS);
3427 KnownBits RHSBits = VT->getKnownBits(RHS);
3428
3429 // Check that x | Mask == x.
3430 // x | 0 == x, always
3431 // x | 1 == x, only if x is also 1
3432 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3433 //
3434 // Check if we can replace OrDst with the LHS of the G_OR
3435 if (canReplaceReg(OrDst, LHS, MRI) &&
3436 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3437 Replacement = LHS;
3438 return true;
3439 }
3440
3441 // Check if we can replace OrDst with the RHS of the G_OR
3442 if (canReplaceReg(OrDst, RHS, MRI) &&
3443 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3444 Replacement = RHS;
3445 return true;
3446 }
3447
3448 return false;
3449}
3450
3452 // If the input is already sign extended, just drop the extension.
3453 Register Src = MI.getOperand(1).getReg();
3454 unsigned ExtBits = MI.getOperand(2).getImm();
3455 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3456 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3457}
3458
3459static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3460 int64_t Cst, bool IsVector, bool IsFP) {
3461 // For i1, Cst will always be -1 regardless of boolean contents.
3462 return (ScalarSizeBits == 1 && Cst == -1) ||
3463 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3464}
3465
3466// This pattern aims to match the following shape to avoid extra mov
3467// instructions
3468// G_BUILD_VECTOR(
3469// G_UNMERGE_VALUES(src, 0)
3470// G_UNMERGE_VALUES(src, 1)
3471// G_IMPLICIT_DEF
3472// G_IMPLICIT_DEF
3473// )
3474// ->
3475// G_CONCAT_VECTORS(
3476// src,
3477// undef
3478// )
3481 Register &UnmergeSrc) const {
3482 auto &BV = cast<GBuildVector>(MI);
3483
3484 unsigned BuildUseCount = BV.getNumSources();
3485 if (BuildUseCount % 2 != 0)
3486 return false;
3487
3488 unsigned NumUnmerge = BuildUseCount / 2;
3489
3490 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3491
3492 // Check the first operand is an unmerge and has the correct number of
3493 // operands
3494 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3495 return false;
3496
3497 UnmergeSrc = Unmerge->getSourceReg();
3498
3499 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3500 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3501
3502 if (!UnmergeSrcTy.isVector())
3503 return false;
3504
3505 // Ensure we only generate legal instructions post-legalizer
3506 if (!IsPreLegalize &&
3507 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3508 return false;
3509
3510 // Check that all of the operands before the midpoint come from the same
3511 // unmerge and are in the same order as they are used in the build_vector
3512 for (unsigned I = 0; I < NumUnmerge; ++I) {
3513 auto MaybeUnmergeReg = BV.getSourceReg(I);
3514 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3515
3516 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3517 return false;
3518
3519 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3520 return false;
3521 }
3522
3523 // Check that all of the unmerged values are used
3524 if (Unmerge->getNumDefs() != NumUnmerge)
3525 return false;
3526
3527 // Check that all of the operands after the mid point are undefs.
3528 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3529 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3530
3531 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3532 return false;
3533 }
3534
3535 return true;
3536}
3537
3541 Register &UnmergeSrc) const {
3542 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3543 B.setInstrAndDebugLoc(MI);
3544
3545 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3546 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3547
3548 MI.eraseFromParent();
3549}
3550
3551// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3552// using vector truncates instead
3553//
3554// EXAMPLE:
3555// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3556// %T_a(i16) = G_TRUNC %a(i32)
3557// %T_b(i16) = G_TRUNC %b(i32)
3558// %Undef(i16) = G_IMPLICIT_DEF(i16)
3559// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3560//
3561// ===>
3562// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3563// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3564// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3565//
3566// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3568 Register &MatchInfo) const {
3569 auto BuildMI = cast<GBuildVector>(&MI);
3570 unsigned NumOperands = BuildMI->getNumSources();
3571 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3572
3573 // Check the G_BUILD_VECTOR sources
3574 unsigned I;
3575 MachineInstr *UnmergeMI = nullptr;
3576
3577 // Check all source TRUNCs come from the same UNMERGE instruction
3578 for (I = 0; I < NumOperands; ++I) {
3579 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3580 auto SrcMIOpc = SrcMI->getOpcode();
3581
3582 // Check if the G_TRUNC instructions all come from the same MI
3583 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3584 if (!UnmergeMI) {
3585 UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3586 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3587 return false;
3588 } else {
3589 auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3590 if (UnmergeMI != UnmergeSrcMI)
3591 return false;
3592 }
3593 } else {
3594 break;
3595 }
3596 }
3597 if (I < 2)
3598 return false;
3599
3600 // Check the remaining source elements are only G_IMPLICIT_DEF
3601 for (; I < NumOperands; ++I) {
3602 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3603 auto SrcMIOpc = SrcMI->getOpcode();
3604
3605 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3606 return false;
3607 }
3608
3609 // Check the size of unmerge source
3610 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3611 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3612 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3613 return false;
3614
3615 // Check the unmerge source and destination element types match
3616 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3617 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3618 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3619 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3620 return false;
3621
3622 // Only generate legal instructions post-legalizer
3623 if (!IsPreLegalize) {
3624 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3625
3626 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3627 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3628 return false;
3629
3630 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3631 return false;
3632 }
3633
3634 return true;
3635}
3636
3638 Register &MatchInfo) const {
3639 Register MidReg;
3640 auto BuildMI = cast<GBuildVector>(&MI);
3641 Register DstReg = BuildMI->getReg(0);
3642 LLT DstTy = MRI.getType(DstReg);
3643 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3644 unsigned DstTyNumElt = DstTy.getNumElements();
3645 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3646
3647 // No need to pad vector if only G_TRUNC is needed
3648 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3649 MidReg = MatchInfo;
3650 } else {
3651 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3652 SmallVector<Register> ConcatRegs = {MatchInfo};
3653 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3654 ConcatRegs.push_back(UndefReg);
3655
3656 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3657 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3658 }
3659
3660 Builder.buildTrunc(DstReg, MidReg);
3661 MI.eraseFromParent();
3662}
3663
3665 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3666 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3667 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3668 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3669 Register XorSrc;
3670 Register CstReg;
3671 // We match xor(src, true) here.
3672 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3673 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3674 return false;
3675
3676 if (!MRI.hasOneNonDBGUse(XorSrc))
3677 return false;
3678
3679 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3680 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3681 // list of tree nodes to visit.
3682 RegsToNegate.push_back(XorSrc);
3683 // Remember whether the comparisons are all integer or all floating point.
3684 bool IsInt = false;
3685 bool IsFP = false;
3686 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3687 Register Reg = RegsToNegate[I];
3688 if (!MRI.hasOneNonDBGUse(Reg))
3689 return false;
3690 MachineInstr *Def = MRI.getVRegDef(Reg);
3691 switch (Def->getOpcode()) {
3692 default:
3693 // Don't match if the tree contains anything other than ANDs, ORs and
3694 // comparisons.
3695 return false;
3696 case TargetOpcode::G_ICMP:
3697 if (IsFP)
3698 return false;
3699 IsInt = true;
3700 // When we apply the combine we will invert the predicate.
3701 break;
3702 case TargetOpcode::G_FCMP:
3703 if (IsInt)
3704 return false;
3705 IsFP = true;
3706 // When we apply the combine we will invert the predicate.
3707 break;
3708 case TargetOpcode::G_AND:
3709 case TargetOpcode::G_OR:
3710 // Implement De Morgan's laws:
3711 // ~(x & y) -> ~x | ~y
3712 // ~(x | y) -> ~x & ~y
3713 // When we apply the combine we will change the opcode and recursively
3714 // negate the operands.
3715 RegsToNegate.push_back(Def->getOperand(1).getReg());
3716 RegsToNegate.push_back(Def->getOperand(2).getReg());
3717 break;
3718 }
3719 }
3720
3721 // Now we know whether the comparisons are integer or floating point, check
3722 // the constant in the xor.
3723 int64_t Cst;
3724 if (Ty.isVector()) {
3725 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3726 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3727 if (!MaybeCst)
3728 return false;
3729 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3730 return false;
3731 } else {
3732 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3733 return false;
3734 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3735 return false;
3736 }
3737
3738 return true;
3739}
3740
3742 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3743 for (Register Reg : RegsToNegate) {
3744 MachineInstr *Def = MRI.getVRegDef(Reg);
3745 Observer.changingInstr(*Def);
3746 // For each comparison, invert the opcode. For each AND and OR, change the
3747 // opcode.
3748 switch (Def->getOpcode()) {
3749 default:
3750 llvm_unreachable("Unexpected opcode");
3751 case TargetOpcode::G_ICMP:
3752 case TargetOpcode::G_FCMP: {
3753 MachineOperand &PredOp = Def->getOperand(1);
3756 PredOp.setPredicate(NewP);
3757 break;
3758 }
3759 case TargetOpcode::G_AND:
3760 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3761 break;
3762 case TargetOpcode::G_OR:
3763 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3764 break;
3765 }
3766 Observer.changedInstr(*Def);
3767 }
3768
3769 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3770 MI.eraseFromParent();
3771}
3772
3774 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3775 // Match (xor (and x, y), y) (or any of its commuted cases)
3776 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3777 Register &X = MatchInfo.first;
3778 Register &Y = MatchInfo.second;
3779 Register AndReg = MI.getOperand(1).getReg();
3780 Register SharedReg = MI.getOperand(2).getReg();
3781
3782 // Find a G_AND on either side of the G_XOR.
3783 // Look for one of
3784 //
3785 // (xor (and x, y), SharedReg)
3786 // (xor SharedReg, (and x, y))
3787 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3788 std::swap(AndReg, SharedReg);
3789 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3790 return false;
3791 }
3792
3793 // Only do this if we'll eliminate the G_AND.
3794 if (!MRI.hasOneNonDBGUse(AndReg))
3795 return false;
3796
3797 // We can combine if SharedReg is the same as either the LHS or RHS of the
3798 // G_AND.
3799 if (Y != SharedReg)
3800 std::swap(X, Y);
3801 return Y == SharedReg;
3802}
3803
3805 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3806 // Fold (xor (and x, y), y) -> (and (not x), y)
3807 Register X, Y;
3808 std::tie(X, Y) = MatchInfo;
3809 auto Not = Builder.buildNot(MRI.getType(X), X);
3810 Observer.changingInstr(MI);
3811 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3812 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3813 MI.getOperand(2).setReg(Y);
3814 Observer.changedInstr(MI);
3815}
3816
3818 auto &PtrAdd = cast<GPtrAdd>(MI);
3819 Register DstReg = PtrAdd.getReg(0);
3820 LLT Ty = MRI.getType(DstReg);
3821 const DataLayout &DL = Builder.getMF().getDataLayout();
3822
3823 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3824 return false;
3825
3826 if (Ty.isPointer()) {
3827 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3828 return ConstVal && *ConstVal == 0;
3829 }
3830
3831 assert(Ty.isVector() && "Expecting a vector type");
3832 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3833 return isBuildVectorAllZeros(*VecMI, MRI);
3834}
3835
3837 auto &PtrAdd = cast<GPtrAdd>(MI);
3838 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3839 PtrAdd.eraseFromParent();
3840}
3841
3842/// The second source operand is known to be a power of 2.
3844 Register DstReg = MI.getOperand(0).getReg();
3845 Register Src0 = MI.getOperand(1).getReg();
3846 Register Pow2Src1 = MI.getOperand(2).getReg();
3847 LLT Ty = MRI.getType(DstReg);
3848
3849 // Fold (urem x, pow2) -> (and x, pow2-1)
3850 auto NegOne = Builder.buildConstant(Ty, -1);
3851 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3852 Builder.buildAnd(DstReg, Src0, Add);
3853 MI.eraseFromParent();
3854}
3855
3857 unsigned &SelectOpNo) const {
3858 Register LHS = MI.getOperand(1).getReg();
3859 Register RHS = MI.getOperand(2).getReg();
3860
3861 Register OtherOperandReg = RHS;
3862 SelectOpNo = 1;
3863 MachineInstr *Select = MRI.getVRegDef(LHS);
3864
3865 // Don't do this unless the old select is going away. We want to eliminate the
3866 // binary operator, not replace a binop with a select.
3867 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3868 !MRI.hasOneNonDBGUse(LHS)) {
3869 OtherOperandReg = LHS;
3870 SelectOpNo = 2;
3871 Select = MRI.getVRegDef(RHS);
3872 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3873 !MRI.hasOneNonDBGUse(RHS))
3874 return false;
3875 }
3876
3877 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3878 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3879
3880 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3881 /*AllowFP*/ true,
3882 /*AllowOpaqueConstants*/ false))
3883 return false;
3884 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3885 /*AllowFP*/ true,
3886 /*AllowOpaqueConstants*/ false))
3887 return false;
3888
3889 unsigned BinOpcode = MI.getOpcode();
3890
3891 // We know that one of the operands is a select of constants. Now verify that
3892 // the other binary operator operand is either a constant, or we can handle a
3893 // variable.
3894 bool CanFoldNonConst =
3895 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3896 (isNullOrNullSplat(*SelectLHS, MRI) ||
3897 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3898 (isNullOrNullSplat(*SelectRHS, MRI) ||
3899 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3900 if (CanFoldNonConst)
3901 return true;
3902
3903 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3904 /*AllowFP*/ true,
3905 /*AllowOpaqueConstants*/ false);
3906}
3907
3908/// \p SelectOperand is the operand in binary operator \p MI that is the select
3909/// to fold.
3911 MachineInstr &MI, const unsigned &SelectOperand) const {
3912 Register Dst = MI.getOperand(0).getReg();
3913 Register LHS = MI.getOperand(1).getReg();
3914 Register RHS = MI.getOperand(2).getReg();
3915 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3916
3917 Register SelectCond = Select->getOperand(1).getReg();
3918 Register SelectTrue = Select->getOperand(2).getReg();
3919 Register SelectFalse = Select->getOperand(3).getReg();
3920
3921 LLT Ty = MRI.getType(Dst);
3922 unsigned BinOpcode = MI.getOpcode();
3923
3924 Register FoldTrue, FoldFalse;
3925
3926 // We have a select-of-constants followed by a binary operator with a
3927 // constant. Eliminate the binop by pulling the constant math into the select.
3928 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3929 if (SelectOperand == 1) {
3930 // TODO: SelectionDAG verifies this actually constant folds before
3931 // committing to the combine.
3932
3933 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3934 FoldFalse =
3935 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3936 } else {
3937 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3938 FoldFalse =
3939 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3940 }
3941
3942 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3943 MI.eraseFromParent();
3944}
3945
3946std::optional<SmallVector<Register, 8>>
3947CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3948 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3949 // We want to detect if Root is part of a tree which represents a bunch
3950 // of loads being merged into a larger load. We'll try to recognize patterns
3951 // like, for example:
3952 //
3953 // Reg Reg
3954 // \ /
3955 // OR_1 Reg
3956 // \ /
3957 // OR_2
3958 // \ Reg
3959 // .. /
3960 // Root
3961 //
3962 // Reg Reg Reg Reg
3963 // \ / \ /
3964 // OR_1 OR_2
3965 // \ /
3966 // \ /
3967 // ...
3968 // Root
3969 //
3970 // Each "Reg" may have been produced by a load + some arithmetic. This
3971 // function will save each of them.
3972 SmallVector<Register, 8> RegsToVisit;
3974
3975 // In the "worst" case, we're dealing with a load for each byte. So, there
3976 // are at most #bytes - 1 ORs.
3977 const unsigned MaxIter =
3978 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3979 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3980 if (Ors.empty())
3981 break;
3982 const MachineInstr *Curr = Ors.pop_back_val();
3983 Register OrLHS = Curr->getOperand(1).getReg();
3984 Register OrRHS = Curr->getOperand(2).getReg();
3985
3986 // In the combine, we want to elimate the entire tree.
3987 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3988 return std::nullopt;
3989
3990 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3991 // something that may be a load + arithmetic.
3992 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3993 Ors.push_back(Or);
3994 else
3995 RegsToVisit.push_back(OrLHS);
3996 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3997 Ors.push_back(Or);
3998 else
3999 RegsToVisit.push_back(OrRHS);
4000 }
4001
4002 // We're going to try and merge each register into a wider power-of-2 type,
4003 // so we ought to have an even number of registers.
4004 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4005 return std::nullopt;
4006 return RegsToVisit;
4007}
4008
4009/// Helper function for findLoadOffsetsForLoadOrCombine.
4010///
4011/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4012/// and then moving that value into a specific byte offset.
4013///
4014/// e.g. x[i] << 24
4015///
4016/// \returns The load instruction and the byte offset it is moved into.
4017static std::optional<std::pair<GZExtLoad *, int64_t>>
4018matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4019 const MachineRegisterInfo &MRI) {
4020 assert(MRI.hasOneNonDBGUse(Reg) &&
4021 "Expected Reg to only have one non-debug use?");
4022 Register MaybeLoad;
4023 int64_t Shift;
4024 if (!mi_match(Reg, MRI,
4025 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4026 Shift = 0;
4027 MaybeLoad = Reg;
4028 }
4029
4030 if (Shift % MemSizeInBits != 0)
4031 return std::nullopt;
4032
4033 // TODO: Handle other types of loads.
4034 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4035 if (!Load)
4036 return std::nullopt;
4037
4038 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4039 return std::nullopt;
4040
4041 return std::make_pair(Load, Shift / MemSizeInBits);
4042}
4043
4044std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4045CombinerHelper::findLoadOffsetsForLoadOrCombine(
4047 const SmallVector<Register, 8> &RegsToVisit,
4048 const unsigned MemSizeInBits) const {
4049
4050 // Each load found for the pattern. There should be one for each RegsToVisit.
4051 SmallSetVector<const MachineInstr *, 8> Loads;
4052
4053 // The lowest index used in any load. (The lowest "i" for each x[i].)
4054 int64_t LowestIdx = INT64_MAX;
4055
4056 // The load which uses the lowest index.
4057 GZExtLoad *LowestIdxLoad = nullptr;
4058
4059 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4060 SmallSet<int64_t, 8> SeenIdx;
4061
4062 // Ensure each load is in the same MBB.
4063 // TODO: Support multiple MachineBasicBlocks.
4064 MachineBasicBlock *MBB = nullptr;
4065 const MachineMemOperand *MMO = nullptr;
4066
4067 // Earliest instruction-order load in the pattern.
4068 GZExtLoad *EarliestLoad = nullptr;
4069
4070 // Latest instruction-order load in the pattern.
4071 GZExtLoad *LatestLoad = nullptr;
4072
4073 // Base pointer which every load should share.
4075
4076 // We want to find a load for each register. Each load should have some
4077 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4078 // track of the load which uses the lowest index. Later, we will check if we
4079 // can use its pointer in the final, combined load.
4080 for (auto Reg : RegsToVisit) {
4081 // Find the load, and find the position that it will end up in (e.g. a
4082 // shifted) value.
4083 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4084 if (!LoadAndPos)
4085 return std::nullopt;
4086 GZExtLoad *Load;
4087 int64_t DstPos;
4088 std::tie(Load, DstPos) = *LoadAndPos;
4089
4090 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4091 // it is difficult to check for stores/calls/etc between loads.
4092 MachineBasicBlock *LoadMBB = Load->getParent();
4093 if (!MBB)
4094 MBB = LoadMBB;
4095 if (LoadMBB != MBB)
4096 return std::nullopt;
4097
4098 // Make sure that the MachineMemOperands of every seen load are compatible.
4099 auto &LoadMMO = Load->getMMO();
4100 if (!MMO)
4101 MMO = &LoadMMO;
4102 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4103 return std::nullopt;
4104
4105 // Find out what the base pointer and index for the load is.
4106 Register LoadPtr;
4107 int64_t Idx;
4108 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4109 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4110 LoadPtr = Load->getOperand(1).getReg();
4111 Idx = 0;
4112 }
4113
4114 // Don't combine things like a[i], a[i] -> a bigger load.
4115 if (!SeenIdx.insert(Idx).second)
4116 return std::nullopt;
4117
4118 // Every load must share the same base pointer; don't combine things like:
4119 //
4120 // a[i], b[i + 1] -> a bigger load.
4121 if (!BasePtr.isValid())
4122 BasePtr = LoadPtr;
4123 if (BasePtr != LoadPtr)
4124 return std::nullopt;
4125
4126 if (Idx < LowestIdx) {
4127 LowestIdx = Idx;
4128 LowestIdxLoad = Load;
4129 }
4130
4131 // Keep track of the byte offset that this load ends up at. If we have seen
4132 // the byte offset, then stop here. We do not want to combine:
4133 //
4134 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4135 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4136 return std::nullopt;
4137 Loads.insert(Load);
4138
4139 // Keep track of the position of the earliest/latest loads in the pattern.
4140 // We will check that there are no load fold barriers between them later
4141 // on.
4142 //
4143 // FIXME: Is there a better way to check for load fold barriers?
4144 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4145 EarliestLoad = Load;
4146 if (!LatestLoad || dominates(*LatestLoad, *Load))
4147 LatestLoad = Load;
4148 }
4149
4150 // We found a load for each register. Let's check if each load satisfies the
4151 // pattern.
4152 assert(Loads.size() == RegsToVisit.size() &&
4153 "Expected to find a load for each register?");
4154 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4155 LatestLoad && "Expected at least two loads?");
4156
4157 // Check if there are any stores, calls, etc. between any of the loads. If
4158 // there are, then we can't safely perform the combine.
4159 //
4160 // MaxIter is chosen based off the (worst case) number of iterations it
4161 // typically takes to succeed in the LLVM test suite plus some padding.
4162 //
4163 // FIXME: Is there a better way to check for load fold barriers?
4164 const unsigned MaxIter = 20;
4165 unsigned Iter = 0;
4166 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4167 LatestLoad->getIterator())) {
4168 if (Loads.count(&MI))
4169 continue;
4170 if (MI.isLoadFoldBarrier())
4171 return std::nullopt;
4172 if (Iter++ == MaxIter)
4173 return std::nullopt;
4174 }
4175
4176 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4177}
4178
4181 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4182 assert(MI.getOpcode() == TargetOpcode::G_OR);
4183 MachineFunction &MF = *MI.getMF();
4184 // Assuming a little-endian target, transform:
4185 // s8 *a = ...
4186 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4187 // =>
4188 // s32 val = *((i32)a)
4189 //
4190 // s8 *a = ...
4191 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4192 // =>
4193 // s32 val = BSWAP(*((s32)a))
4194 Register Dst = MI.getOperand(0).getReg();
4195 LLT Ty = MRI.getType(Dst);
4196 if (Ty.isVector())
4197 return false;
4198
4199 // We need to combine at least two loads into this type. Since the smallest
4200 // possible load is into a byte, we need at least a 16-bit wide type.
4201 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4202 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4203 return false;
4204
4205 // Match a collection of non-OR instructions in the pattern.
4206 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4207 if (!RegsToVisit)
4208 return false;
4209
4210 // We have a collection of non-OR instructions. Figure out how wide each of
4211 // the small loads should be based off of the number of potential loads we
4212 // found.
4213 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4214 if (NarrowMemSizeInBits % 8 != 0)
4215 return false;
4216
4217 // Check if each register feeding into each OR is a load from the same
4218 // base pointer + some arithmetic.
4219 //
4220 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4221 //
4222 // Also verify that each of these ends up putting a[i] into the same memory
4223 // offset as a load into a wide type would.
4225 GZExtLoad *LowestIdxLoad, *LatestLoad;
4226 int64_t LowestIdx;
4227 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4228 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4229 if (!MaybeLoadInfo)
4230 return false;
4231 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4232
4233 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4234 // we found before, check if this corresponds to a big or little endian byte
4235 // pattern. If it does, then we can represent it using a load + possibly a
4236 // BSWAP.
4237 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4238 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4239 if (!IsBigEndian)
4240 return false;
4241 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4242 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4243 return false;
4244
4245 // Make sure that the load from the lowest index produces offset 0 in the
4246 // final value.
4247 //
4248 // This ensures that we won't combine something like this:
4249 //
4250 // load x[i] -> byte 2
4251 // load x[i+1] -> byte 0 ---> wide_load x[i]
4252 // load x[i+2] -> byte 1
4253 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4254 const unsigned ZeroByteOffset =
4255 *IsBigEndian
4256 ? bigEndianByteAt(NumLoadsInTy, 0)
4257 : littleEndianByteAt(NumLoadsInTy, 0);
4258 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4259 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4260 ZeroOffsetIdx->second != LowestIdx)
4261 return false;
4262
4263 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4264 // may not use index 0.
4265 Register Ptr = LowestIdxLoad->getPointerReg();
4266 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4267 LegalityQuery::MemDesc MMDesc(MMO);
4268 MMDesc.MemoryTy = Ty;
4270 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4271 return false;
4272 auto PtrInfo = MMO.getPointerInfo();
4273 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4274
4275 // Load must be allowed and fast on the target.
4277 auto &DL = MF.getDataLayout();
4278 unsigned Fast = 0;
4279 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4280 !Fast)
4281 return false;
4282
4283 MatchInfo = [=](MachineIRBuilder &MIB) {
4284 MIB.setInstrAndDebugLoc(*LatestLoad);
4285 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4286 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4287 if (NeedsBSwap)
4288 MIB.buildBSwap(Dst, LoadDst);
4289 };
4290 return true;
4291}
4292
4294 MachineInstr *&ExtMI) const {
4295 auto &PHI = cast<GPhi>(MI);
4296 Register DstReg = PHI.getReg(0);
4297
4298 // TODO: Extending a vector may be expensive, don't do this until heuristics
4299 // are better.
4300 if (MRI.getType(DstReg).isVector())
4301 return false;
4302
4303 // Try to match a phi, whose only use is an extend.
4304 if (!MRI.hasOneNonDBGUse(DstReg))
4305 return false;
4306 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4307 switch (ExtMI->getOpcode()) {
4308 case TargetOpcode::G_ANYEXT:
4309 return true; // G_ANYEXT is usually free.
4310 case TargetOpcode::G_ZEXT:
4311 case TargetOpcode::G_SEXT:
4312 break;
4313 default:
4314 return false;
4315 }
4316
4317 // If the target is likely to fold this extend away, don't propagate.
4318 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4319 return false;
4320
4321 // We don't want to propagate the extends unless there's a good chance that
4322 // they'll be optimized in some way.
4323 // Collect the unique incoming values.
4325 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4326 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4327 switch (DefMI->getOpcode()) {
4328 case TargetOpcode::G_LOAD:
4329 case TargetOpcode::G_TRUNC:
4330 case TargetOpcode::G_SEXT:
4331 case TargetOpcode::G_ZEXT:
4332 case TargetOpcode::G_ANYEXT:
4333 case TargetOpcode::G_CONSTANT:
4334 InSrcs.insert(DefMI);
4335 // Don't try to propagate if there are too many places to create new
4336 // extends, chances are it'll increase code size.
4337 if (InSrcs.size() > 2)
4338 return false;
4339 break;
4340 default:
4341 return false;
4342 }
4343 }
4344 return true;
4345}
4346
4348 MachineInstr *&ExtMI) const {
4349 auto &PHI = cast<GPhi>(MI);
4350 Register DstReg = ExtMI->getOperand(0).getReg();
4351 LLT ExtTy = MRI.getType(DstReg);
4352
4353 // Propagate the extension into the block of each incoming reg's block.
4354 // Use a SetVector here because PHIs can have duplicate edges, and we want
4355 // deterministic iteration order.
4358 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4359 auto SrcReg = PHI.getIncomingValue(I);
4360 auto *SrcMI = MRI.getVRegDef(SrcReg);
4361 if (!SrcMIs.insert(SrcMI))
4362 continue;
4363
4364 // Build an extend after each src inst.
4365 auto *MBB = SrcMI->getParent();
4366 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4367 if (InsertPt != MBB->end() && InsertPt->isPHI())
4368 InsertPt = MBB->getFirstNonPHI();
4369
4370 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4371 Builder.setDebugLoc(MI.getDebugLoc());
4372 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4373 OldToNewSrcMap[SrcMI] = NewExt;
4374 }
4375
4376 // Create a new phi with the extended inputs.
4377 Builder.setInstrAndDebugLoc(MI);
4378 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4379 NewPhi.addDef(DstReg);
4380 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4381 if (!MO.isReg()) {
4382 NewPhi.addMBB(MO.getMBB());
4383 continue;
4384 }
4385 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4386 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4387 }
4388 Builder.insertInstr(NewPhi);
4389 ExtMI->eraseFromParent();
4390}
4391
4393 Register &Reg) const {
4394 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4395 // If we have a constant index, look for a G_BUILD_VECTOR source
4396 // and find the source register that the index maps to.
4397 Register SrcVec = MI.getOperand(1).getReg();
4398 LLT SrcTy = MRI.getType(SrcVec);
4399 if (SrcTy.isScalableVector())
4400 return false;
4401
4402 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4403 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4404 return false;
4405
4406 unsigned VecIdx = Cst->Value.getZExtValue();
4407
4408 // Check if we have a build_vector or build_vector_trunc with an optional
4409 // trunc in front.
4410 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4411 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4412 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4413 }
4414
4415 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4416 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4417 return false;
4418
4419 EVT Ty(getMVTForLLT(SrcTy));
4420 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4421 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4422 return false;
4423
4424 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4425 return true;
4426}
4427
4429 Register &Reg) const {
4430 // Check the type of the register, since it may have come from a
4431 // G_BUILD_VECTOR_TRUNC.
4432 LLT ScalarTy = MRI.getType(Reg);
4433 Register DstReg = MI.getOperand(0).getReg();
4434 LLT DstTy = MRI.getType(DstReg);
4435
4436 if (ScalarTy != DstTy) {
4437 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4438 Builder.buildTrunc(DstReg, Reg);
4439 MI.eraseFromParent();
4440 return;
4441 }
4443}
4444
4447 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4448 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4449 // This combine tries to find build_vector's which have every source element
4450 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4451 // the masked load scalarization is run late in the pipeline. There's already
4452 // a combine for a similar pattern starting from the extract, but that
4453 // doesn't attempt to do it if there are multiple uses of the build_vector,
4454 // which in this case is true. Starting the combine from the build_vector
4455 // feels more natural than trying to find sibling nodes of extracts.
4456 // E.g.
4457 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4458 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4459 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4460 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4461 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4462 // ==>
4463 // replace ext{1,2,3,4} with %s{1,2,3,4}
4464
4465 Register DstReg = MI.getOperand(0).getReg();
4466 LLT DstTy = MRI.getType(DstReg);
4467 unsigned NumElts = DstTy.getNumElements();
4468
4469 SmallBitVector ExtractedElts(NumElts);
4470 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4471 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4472 return false;
4473 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4474 if (!Cst)
4475 return false;
4476 unsigned Idx = Cst->getZExtValue();
4477 if (Idx >= NumElts)
4478 return false; // Out of range.
4479 ExtractedElts.set(Idx);
4480 SrcDstPairs.emplace_back(
4481 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4482 }
4483 // Match if every element was extracted.
4484 return ExtractedElts.all();
4485}
4486
4489 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4490 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4491 for (auto &Pair : SrcDstPairs) {
4492 auto *ExtMI = Pair.second;
4493 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4494 ExtMI->eraseFromParent();
4495 }
4496 MI.eraseFromParent();
4497}
4498
4501 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4502 applyBuildFnNoErase(MI, MatchInfo);
4503 MI.eraseFromParent();
4504}
4505
4508 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4509 MatchInfo(Builder);
4510}
4511
4513 bool AllowScalarConstants,
4514 BuildFnTy &MatchInfo) const {
4515 assert(MI.getOpcode() == TargetOpcode::G_OR);
4516
4517 Register Dst = MI.getOperand(0).getReg();
4518 LLT Ty = MRI.getType(Dst);
4519 unsigned BitWidth = Ty.getScalarSizeInBits();
4520
4521 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4522 unsigned FshOpc = 0;
4523
4524 // Match (or (shl ...), (lshr ...)).
4525 if (!mi_match(Dst, MRI,
4526 // m_GOr() handles the commuted version as well.
4527 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4528 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4529 return false;
4530
4531 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4532 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4533 int64_t CstShlAmt = 0, CstLShrAmt;
4534 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4535 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4536 CstShlAmt + CstLShrAmt == BitWidth) {
4537 FshOpc = TargetOpcode::G_FSHR;
4538 Amt = LShrAmt;
4539 } else if (mi_match(LShrAmt, MRI,
4541 ShlAmt == Amt) {
4542 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4543 FshOpc = TargetOpcode::G_FSHL;
4544 } else if (mi_match(ShlAmt, MRI,
4546 LShrAmt == Amt) {
4547 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4548 FshOpc = TargetOpcode::G_FSHR;
4549 } else {
4550 return false;
4551 }
4552
4553 LLT AmtTy = MRI.getType(Amt);
4554 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4555 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4556 return false;
4557
4558 MatchInfo = [=](MachineIRBuilder &B) {
4559 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4560 };
4561 return true;
4562}
4563
4564/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4566 unsigned Opc = MI.getOpcode();
4567 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4568 Register X = MI.getOperand(1).getReg();
4569 Register Y = MI.getOperand(2).getReg();
4570 if (X != Y)
4571 return false;
4572 unsigned RotateOpc =
4573 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4574 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4575}
4576
4578 unsigned Opc = MI.getOpcode();
4579 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4580 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4581 Observer.changingInstr(MI);
4582 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4583 : TargetOpcode::G_ROTR));
4584 MI.removeOperand(2);
4585 Observer.changedInstr(MI);
4586}
4587
4588// Fold (rot x, c) -> (rot x, c % BitSize)
4590 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4591 MI.getOpcode() == TargetOpcode::G_ROTR);
4592 unsigned Bitsize =
4593 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4594 Register AmtReg = MI.getOperand(2).getReg();
4595 bool OutOfRange = false;
4596 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4597 if (auto *CI = dyn_cast<ConstantInt>(C))
4598 OutOfRange |= CI->getValue().uge(Bitsize);
4599 return true;
4600 };
4601 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4602}
4603
4605 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4606 MI.getOpcode() == TargetOpcode::G_ROTR);
4607 unsigned Bitsize =
4608 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4609 Register Amt = MI.getOperand(2).getReg();
4610 LLT AmtTy = MRI.getType(Amt);
4611 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4612 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4613 Observer.changingInstr(MI);
4614 MI.getOperand(2).setReg(Amt);
4615 Observer.changedInstr(MI);
4616}
4617
4619 int64_t &MatchInfo) const {
4620 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4621 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4622
4623 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4624 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4625 // KnownBits on the LHS in two cases:
4626 //
4627 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4628 // we cannot do any transforms so we can safely bail out early.
4629 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4630 // >=0.
4631 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4632 if (KnownRHS.isUnknown())
4633 return false;
4634
4635 std::optional<bool> KnownVal;
4636 if (KnownRHS.isZero()) {
4637 // ? uge 0 -> always true
4638 // ? ult 0 -> always false
4639 if (Pred == CmpInst::ICMP_UGE)
4640 KnownVal = true;
4641 else if (Pred == CmpInst::ICMP_ULT)
4642 KnownVal = false;
4643 }
4644
4645 if (!KnownVal) {
4646 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4647 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4648 }
4649
4650 if (!KnownVal)
4651 return false;
4652 MatchInfo =
4653 *KnownVal
4655 /*IsVector = */
4656 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4657 /* IsFP = */ false)
4658 : 0;
4659 return true;
4660}
4661
4664 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4665 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4666 // Given:
4667 //
4668 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4669 // %cmp = G_ICMP ne %x, 0
4670 //
4671 // Or:
4672 //
4673 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4674 // %cmp = G_ICMP eq %x, 1
4675 //
4676 // We can replace %cmp with %x assuming true is 1 on the target.
4677 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4678 if (!CmpInst::isEquality(Pred))
4679 return false;
4680 Register Dst = MI.getOperand(0).getReg();
4681 LLT DstTy = MRI.getType(Dst);
4683 /* IsFP = */ false) != 1)
4684 return false;
4685 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4686 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4687 return false;
4688 Register LHS = MI.getOperand(2).getReg();
4689 auto KnownLHS = VT->getKnownBits(LHS);
4690 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4691 return false;
4692 // Make sure replacing Dst with the LHS is a legal operation.
4693 LLT LHSTy = MRI.getType(LHS);
4694 unsigned LHSSize = LHSTy.getSizeInBits();
4695 unsigned DstSize = DstTy.getSizeInBits();
4696 unsigned Op = TargetOpcode::COPY;
4697 if (DstSize != LHSSize)
4698 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4699 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4700 return false;
4701 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4702 return true;
4703}
4704
4705// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4708 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4709 assert(MI.getOpcode() == TargetOpcode::G_AND);
4710
4711 // Ignore vector types to simplify matching the two constants.
4712 // TODO: do this for vectors and scalars via a demanded bits analysis.
4713 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4714 if (Ty.isVector())
4715 return false;
4716
4717 Register Src;
4718 Register AndMaskReg;
4719 int64_t AndMaskBits;
4720 int64_t OrMaskBits;
4721 if (!mi_match(MI, MRI,
4722 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4723 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4724 return false;
4725
4726 // Check if OrMask could turn on any bits in Src.
4727 if (AndMaskBits & OrMaskBits)
4728 return false;
4729
4730 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4731 Observer.changingInstr(MI);
4732 // Canonicalize the result to have the constant on the RHS.
4733 if (MI.getOperand(1).getReg() == AndMaskReg)
4734 MI.getOperand(2).setReg(AndMaskReg);
4735 MI.getOperand(1).setReg(Src);
4736 Observer.changedInstr(MI);
4737 };
4738 return true;
4739}
4740
4741/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4744 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4745 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4746 Register Dst = MI.getOperand(0).getReg();
4747 Register Src = MI.getOperand(1).getReg();
4748 LLT Ty = MRI.getType(Src);
4750 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4751 return false;
4752 int64_t Width = MI.getOperand(2).getImm();
4753 Register ShiftSrc;
4754 int64_t ShiftImm;
4755 if (!mi_match(
4756 Src, MRI,
4757 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4758 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4759 return false;
4760 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4761 return false;
4762
4763 MatchInfo = [=](MachineIRBuilder &B) {
4764 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4765 auto Cst2 = B.buildConstant(ExtractTy, Width);
4766 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4767 };
4768 return true;
4769}
4770
4771/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4773 BuildFnTy &MatchInfo) const {
4774 GAnd *And = cast<GAnd>(&MI);
4775 Register Dst = And->getReg(0);
4776 LLT Ty = MRI.getType(Dst);
4778 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4779 // into account.
4780 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4781 return false;
4782
4783 int64_t AndImm, LSBImm;
4784 Register ShiftSrc;
4785 const unsigned Size = Ty.getScalarSizeInBits();
4786 if (!mi_match(And->getReg(0), MRI,
4787 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4788 m_ICst(AndImm))))
4789 return false;
4790
4791 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4792 auto MaybeMask = static_cast<uint64_t>(AndImm);
4793 if (MaybeMask & (MaybeMask + 1))
4794 return false;
4795
4796 // LSB must fit within the register.
4797 if (static_cast<uint64_t>(LSBImm) >= Size)
4798 return false;
4799
4800 uint64_t Width = APInt(Size, AndImm).countr_one();
4801 MatchInfo = [=](MachineIRBuilder &B) {
4802 auto WidthCst = B.buildConstant(ExtractTy, Width);
4803 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4804 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4805 };
4806 return true;
4807}
4808
4811 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4812 const unsigned Opcode = MI.getOpcode();
4813 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4814
4815 const Register Dst = MI.getOperand(0).getReg();
4816
4817 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4818 ? TargetOpcode::G_SBFX
4819 : TargetOpcode::G_UBFX;
4820
4821 // Check if the type we would use for the extract is legal
4822 LLT Ty = MRI.getType(Dst);
4824 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4825 return false;
4826
4827 Register ShlSrc;
4828 int64_t ShrAmt;
4829 int64_t ShlAmt;
4830 const unsigned Size = Ty.getScalarSizeInBits();
4831
4832 // Try to match shr (shl x, c1), c2
4833 if (!mi_match(Dst, MRI,
4834 m_BinOp(Opcode,
4835 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4836 m_ICst(ShrAmt))))
4837 return false;
4838
4839 // Make sure that the shift sizes can fit a bitfield extract
4840 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4841 return false;
4842
4843 // Skip this combine if the G_SEXT_INREG combine could handle it
4844 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4845 return false;
4846
4847 // Calculate start position and width of the extract
4848 const int64_t Pos = ShrAmt - ShlAmt;
4849 const int64_t Width = Size - ShrAmt;
4850
4851 MatchInfo = [=](MachineIRBuilder &B) {
4852 auto WidthCst = B.buildConstant(ExtractTy, Width);
4853 auto PosCst = B.buildConstant(ExtractTy, Pos);
4854 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4855 };
4856 return true;
4857}
4858
4861 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4862 const unsigned Opcode = MI.getOpcode();
4863 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4864
4865 const Register Dst = MI.getOperand(0).getReg();
4866 LLT Ty = MRI.getType(Dst);
4868 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4869 return false;
4870
4871 // Try to match shr (and x, c1), c2
4872 Register AndSrc;
4873 int64_t ShrAmt;
4874 int64_t SMask;
4875 if (!mi_match(Dst, MRI,
4876 m_BinOp(Opcode,
4877 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4878 m_ICst(ShrAmt))))
4879 return false;
4880
4881 const unsigned Size = Ty.getScalarSizeInBits();
4882 if (ShrAmt < 0 || ShrAmt >= Size)
4883 return false;
4884
4885 // If the shift subsumes the mask, emit the 0 directly.
4886 if (0 == (SMask >> ShrAmt)) {
4887 MatchInfo = [=](MachineIRBuilder &B) {
4888 B.buildConstant(Dst, 0);
4889 };
4890 return true;
4891 }
4892
4893 // Check that ubfx can do the extraction, with no holes in the mask.
4894 uint64_t UMask = SMask;
4895 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4897 if (!isMask_64(UMask))
4898 return false;
4899
4900 // Calculate start position and width of the extract.
4901 const int64_t Pos = ShrAmt;
4902 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4903
4904 // It's preferable to keep the shift, rather than form G_SBFX.
4905 // TODO: remove the G_AND via demanded bits analysis.
4906 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4907 return false;
4908
4909 MatchInfo = [=](MachineIRBuilder &B) {
4910 auto WidthCst = B.buildConstant(ExtractTy, Width);
4911 auto PosCst = B.buildConstant(ExtractTy, Pos);
4912 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4913 };
4914 return true;
4915}
4916
4917bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4918 MachineInstr &MI) const {
4919 auto &PtrAdd = cast<GPtrAdd>(MI);
4920
4921 Register Src1Reg = PtrAdd.getBaseReg();
4922 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4923 if (!Src1Def)
4924 return false;
4925
4926 Register Src2Reg = PtrAdd.getOffsetReg();
4927
4928 if (MRI.hasOneNonDBGUse(Src1Reg))
4929 return false;
4930
4931 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4932 if (!C1)
4933 return false;
4934 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4935 if (!C2)
4936 return false;
4937
4938 const APInt &C1APIntVal = *C1;
4939 const APInt &C2APIntVal = *C2;
4940 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4941
4942 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4943 // This combine may end up running before ptrtoint/inttoptr combines
4944 // manage to eliminate redundant conversions, so try to look through them.
4945 MachineInstr *ConvUseMI = &UseMI;
4946 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4947 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4948 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4949 Register DefReg = ConvUseMI->getOperand(0).getReg();
4950 if (!MRI.hasOneNonDBGUse(DefReg))
4951 break;
4952 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4953 ConvUseOpc = ConvUseMI->getOpcode();
4954 }
4955 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4956 if (!LdStMI)
4957 continue;
4958 // Is x[offset2] already not a legal addressing mode? If so then
4959 // reassociating the constants breaks nothing (we test offset2 because
4960 // that's the one we hope to fold into the load or store).
4961 TargetLoweringBase::AddrMode AM;
4962 AM.HasBaseReg = true;
4963 AM.BaseOffs = C2APIntVal.getSExtValue();
4964 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4965 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4966 PtrAdd.getMF()->getFunction().getContext());
4967 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4968 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4969 AccessTy, AS))
4970 continue;
4971
4972 // Would x[offset1+offset2] still be a legal addressing mode?
4973 AM.BaseOffs = CombinedValue;
4974 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4975 AccessTy, AS))
4976 return true;
4977 }
4978
4979 return false;
4980}
4981
4983 MachineInstr *RHS,
4984 BuildFnTy &MatchInfo) const {
4985 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4986 Register Src1Reg = MI.getOperand(1).getReg();
4987 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4988 return false;
4989 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4990 if (!C2)
4991 return false;
4992
4993 // If both additions are nuw, the reassociated additions are also nuw.
4994 // If the original G_PTR_ADD is additionally nusw, X and C are both not
4995 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
4996 // therefore also nusw.
4997 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
4998 // the new G_PTR_ADDs are then also inbounds.
4999 unsigned PtrAddFlags = MI.getFlags();
5000 unsigned AddFlags = RHS->getFlags();
5001 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5002 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5003 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5004 unsigned Flags = 0;
5005 if (IsNoUWrap)
5007 if (IsNoUSWrap)
5009 if (IsInBounds)
5011
5012 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5013 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5014
5015 auto NewBase =
5016 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5017 Observer.changingInstr(MI);
5018 MI.getOperand(1).setReg(NewBase.getReg(0));
5019 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5020 MI.setFlags(Flags);
5021 Observer.changedInstr(MI);
5022 };
5023 return !reassociationCanBreakAddressingModePattern(MI);
5024}
5025
5027 MachineInstr *LHS,
5028 MachineInstr *RHS,
5029 BuildFnTy &MatchInfo) const {
5030 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5031 // if and only if (G_PTR_ADD X, C) has one use.
5032 Register LHSBase;
5033 std::optional<ValueAndVReg> LHSCstOff;
5034 if (!mi_match(MI.getBaseReg(), MRI,
5035 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5036 return false;
5037
5038 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5039
5040 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5041 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5042 // so the new G_PTR_ADDs are also inbounds.
5043 unsigned PtrAddFlags = MI.getFlags();
5044 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5045 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5046 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5048 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5050 unsigned Flags = 0;
5051 if (IsNoUWrap)
5053 if (IsNoUSWrap)
5055 if (IsInBounds)
5057
5058 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5059 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5060 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5061 // doesn't happen.
5062 LHSPtrAdd->moveBefore(&MI);
5063 Register RHSReg = MI.getOffsetReg();
5064 // set VReg will cause type mismatch if it comes from extend/trunc
5065 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5066 Observer.changingInstr(MI);
5067 MI.getOperand(2).setReg(NewCst.getReg(0));
5068 MI.setFlags(Flags);
5069 Observer.changedInstr(MI);
5070 Observer.changingInstr(*LHSPtrAdd);
5071 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5072 LHSPtrAdd->setFlags(Flags);
5073 Observer.changedInstr(*LHSPtrAdd);
5074 };
5075 return !reassociationCanBreakAddressingModePattern(MI);
5076}
5077
5079 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5080 BuildFnTy &MatchInfo) const {
5081 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5082 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5083 if (!LHSPtrAdd)
5084 return false;
5085
5086 Register Src2Reg = MI.getOperand(2).getReg();
5087 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5088 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5089 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5090 if (!C1)
5091 return false;
5092 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5093 if (!C2)
5094 return false;
5095
5096 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5097 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5098 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5099 // largest signed integer that fits into the index type, which is the maximum
5100 // size of allocated objects according to the IR Language Reference.
5101 unsigned PtrAddFlags = MI.getFlags();
5102 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5103 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5104 bool IsInBounds =
5105 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5106 unsigned Flags = 0;
5107 if (IsNoUWrap)
5109 if (IsInBounds) {
5112 }
5113
5114 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5115 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5116 Observer.changingInstr(MI);
5117 MI.getOperand(1).setReg(LHSSrc1);
5118 MI.getOperand(2).setReg(NewCst.getReg(0));
5119 MI.setFlags(Flags);
5120 Observer.changedInstr(MI);
5121 };
5122 return !reassociationCanBreakAddressingModePattern(MI);
5123}
5124
5126 BuildFnTy &MatchInfo) const {
5127 auto &PtrAdd = cast<GPtrAdd>(MI);
5128 // We're trying to match a few pointer computation patterns here for
5129 // re-association opportunities.
5130 // 1) Isolating a constant operand to be on the RHS, e.g.:
5131 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5132 //
5133 // 2) Folding two constants in each sub-tree as long as such folding
5134 // doesn't break a legal addressing mode.
5135 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5136 //
5137 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5138 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5139 // iif (G_PTR_ADD X, C) has one use.
5140 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5141 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5142
5143 // Try to match example 2.
5144 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5145 return true;
5146
5147 // Try to match example 3.
5148 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5149 return true;
5150
5151 // Try to match example 1.
5152 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5153 return true;
5154
5155 return false;
5156}
5158 Register OpLHS, Register OpRHS,
5159 BuildFnTy &MatchInfo) const {
5160 LLT OpRHSTy = MRI.getType(OpRHS);
5161 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5162
5163 if (OpLHSDef->getOpcode() != Opc)
5164 return false;
5165
5166 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5167 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5168 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5169
5170 // If the inner op is (X op C), pull the constant out so it can be folded with
5171 // other constants in the expression tree. Folding is not guaranteed so we
5172 // might have (C1 op C2). In that case do not pull a constant out because it
5173 // won't help and can lead to infinite loops.
5174 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5175 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5176 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5177 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5178 MatchInfo = [=](MachineIRBuilder &B) {
5179 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5180 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5181 };
5182 return true;
5183 }
5184 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5185 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5186 // iff (op x, c1) has one use
5187 MatchInfo = [=](MachineIRBuilder &B) {
5188 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5189 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5190 };
5191 return true;
5192 }
5193 }
5194
5195 return false;
5196}
5197
5199 BuildFnTy &MatchInfo) const {
5200 // We don't check if the reassociation will break a legal addressing mode
5201 // here since pointer arithmetic is handled by G_PTR_ADD.
5202 unsigned Opc = MI.getOpcode();
5203 Register DstReg = MI.getOperand(0).getReg();
5204 Register LHSReg = MI.getOperand(1).getReg();
5205 Register RHSReg = MI.getOperand(2).getReg();
5206
5207 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5208 return true;
5209 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5210 return true;
5211 return false;
5212}
5213
5215 APInt &MatchInfo) const {
5216 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5217 Register SrcOp = MI.getOperand(1).getReg();
5218
5219 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5220 MatchInfo = *MaybeCst;
5221 return true;
5222 }
5223
5224 return false;
5225}
5226
5228 APInt &MatchInfo) const {
5229 Register Op1 = MI.getOperand(1).getReg();
5230 Register Op2 = MI.getOperand(2).getReg();
5231 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5232 if (!MaybeCst)
5233 return false;
5234 MatchInfo = *MaybeCst;
5235 return true;
5236}
5237
5239 ConstantFP *&MatchInfo) const {
5240 Register Op1 = MI.getOperand(1).getReg();
5241 Register Op2 = MI.getOperand(2).getReg();
5242 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5243 if (!MaybeCst)
5244 return false;
5245 MatchInfo =
5246 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5247 return true;
5248}
5249
5251 ConstantFP *&MatchInfo) const {
5252 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5253 MI.getOpcode() == TargetOpcode::G_FMAD);
5254 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5255
5256 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5257 if (!Op3Cst)
5258 return false;
5259
5260 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5261 if (!Op2Cst)
5262 return false;
5263
5264 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5265 if (!Op1Cst)
5266 return false;
5267
5268 APFloat Op1F = Op1Cst->getValueAPF();
5269 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5271 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5272 return true;
5273}
5274
5277 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5278 // Look for a binop feeding into an AND with a mask:
5279 //
5280 // %add = G_ADD %lhs, %rhs
5281 // %and = G_AND %add, 000...11111111
5282 //
5283 // Check if it's possible to perform the binop at a narrower width and zext
5284 // back to the original width like so:
5285 //
5286 // %narrow_lhs = G_TRUNC %lhs
5287 // %narrow_rhs = G_TRUNC %rhs
5288 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5289 // %new_add = G_ZEXT %narrow_add
5290 // %and = G_AND %new_add, 000...11111111
5291 //
5292 // This can allow later combines to eliminate the G_AND if it turns out
5293 // that the mask is irrelevant.
5294 assert(MI.getOpcode() == TargetOpcode::G_AND);
5295 Register Dst = MI.getOperand(0).getReg();
5296 Register AndLHS = MI.getOperand(1).getReg();
5297 Register AndRHS = MI.getOperand(2).getReg();
5298 LLT WideTy = MRI.getType(Dst);
5299
5300 // If the potential binop has more than one use, then it's possible that one
5301 // of those uses will need its full width.
5302 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5303 return false;
5304
5305 // Check if the LHS feeding the AND is impacted by the high bits that we're
5306 // masking out.
5307 //
5308 // e.g. for 64-bit x, y:
5309 //
5310 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5311 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5312 if (!LHSInst)
5313 return false;
5314 unsigned LHSOpc = LHSInst->getOpcode();
5315 switch (LHSOpc) {
5316 default:
5317 return false;
5318 case TargetOpcode::G_ADD:
5319 case TargetOpcode::G_SUB:
5320 case TargetOpcode::G_MUL:
5321 case TargetOpcode::G_AND:
5322 case TargetOpcode::G_OR:
5323 case TargetOpcode::G_XOR:
5324 break;
5325 }
5326
5327 // Find the mask on the RHS.
5328 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5329 if (!Cst)
5330 return false;
5331 auto Mask = Cst->Value;
5332 if (!Mask.isMask())
5333 return false;
5334
5335 // No point in combining if there's nothing to truncate.
5336 unsigned NarrowWidth = Mask.countr_one();
5337 if (NarrowWidth == WideTy.getSizeInBits())
5338 return false;
5339 LLT NarrowTy = LLT::scalar(NarrowWidth);
5340
5341 // Check if adding the zext + truncates could be harmful.
5342 auto &MF = *MI.getMF();
5343 const auto &TLI = getTargetLowering();
5344 LLVMContext &Ctx = MF.getFunction().getContext();
5345 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5346 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5347 return false;
5348 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5349 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5350 return false;
5351 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5352 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5353 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5354 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5355 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5356 auto NarrowBinOp =
5357 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5358 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5359 Observer.changingInstr(MI);
5360 MI.getOperand(1).setReg(Ext.getReg(0));
5361 Observer.changedInstr(MI);
5362 };
5363 return true;
5364}
5365
5367 BuildFnTy &MatchInfo) const {
5368 unsigned Opc = MI.getOpcode();
5369 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5370
5371 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5372 return false;
5373
5374 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5375 Observer.changingInstr(MI);
5376 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5377 : TargetOpcode::G_SADDO;
5378 MI.setDesc(Builder.getTII().get(NewOpc));
5379 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5380 Observer.changedInstr(MI);
5381 };
5382 return true;
5383}
5384
5386 BuildFnTy &MatchInfo) const {
5387 // (G_*MULO x, 0) -> 0 + no carry out
5388 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5389 MI.getOpcode() == TargetOpcode::G_SMULO);
5390 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5391 return false;
5392 Register Dst = MI.getOperand(0).getReg();
5393 Register Carry = MI.getOperand(1).getReg();
5394 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5395 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5396 return false;
5397 MatchInfo = [=](MachineIRBuilder &B) {
5398 B.buildConstant(Dst, 0);
5399 B.buildConstant(Carry, 0);
5400 };
5401 return true;
5402}
5403
5405 BuildFnTy &MatchInfo) const {
5406 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5407 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5408 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5409 MI.getOpcode() == TargetOpcode::G_SADDE ||
5410 MI.getOpcode() == TargetOpcode::G_USUBE ||
5411 MI.getOpcode() == TargetOpcode::G_SSUBE);
5412 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5413 return false;
5414 MatchInfo = [&](MachineIRBuilder &B) {
5415 unsigned NewOpcode;
5416 switch (MI.getOpcode()) {
5417 case TargetOpcode::G_UADDE:
5418 NewOpcode = TargetOpcode::G_UADDO;
5419 break;
5420 case TargetOpcode::G_SADDE:
5421 NewOpcode = TargetOpcode::G_SADDO;
5422 break;
5423 case TargetOpcode::G_USUBE:
5424 NewOpcode = TargetOpcode::G_USUBO;
5425 break;
5426 case TargetOpcode::G_SSUBE:
5427 NewOpcode = TargetOpcode::G_SSUBO;
5428 break;
5429 }
5430 Observer.changingInstr(MI);
5431 MI.setDesc(B.getTII().get(NewOpcode));
5432 MI.removeOperand(4);
5433 Observer.changedInstr(MI);
5434 };
5435 return true;
5436}
5437
5439 BuildFnTy &MatchInfo) const {
5440 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5441 Register Dst = MI.getOperand(0).getReg();
5442 // (x + y) - z -> x (if y == z)
5443 // (x + y) - z -> y (if x == z)
5444 Register X, Y, Z;
5445 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5446 Register ReplaceReg;
5447 int64_t CstX, CstY;
5448 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5450 ReplaceReg = X;
5451 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5453 ReplaceReg = Y;
5454 if (ReplaceReg) {
5455 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5456 return true;
5457 }
5458 }
5459
5460 // x - (y + z) -> 0 - y (if x == z)
5461 // x - (y + z) -> 0 - z (if x == y)
5462 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5463 Register ReplaceReg;
5464 int64_t CstX;
5465 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5467 ReplaceReg = Y;
5468 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5470 ReplaceReg = Z;
5471 if (ReplaceReg) {
5472 MatchInfo = [=](MachineIRBuilder &B) {
5473 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5474 B.buildSub(Dst, Zero, ReplaceReg);
5475 };
5476 return true;
5477 }
5478 }
5479 return false;
5480}
5481
5483 unsigned Opcode = MI.getOpcode();
5484 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5485 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5486 Register Dst = UDivorRem.getReg(0);
5487 Register LHS = UDivorRem.getReg(1);
5488 Register RHS = UDivorRem.getReg(2);
5489 LLT Ty = MRI.getType(Dst);
5490 LLT ScalarTy = Ty.getScalarType();
5491 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5493 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5494
5495 auto &MIB = Builder;
5496
5497 bool UseSRL = false;
5498 SmallVector<Register, 16> Shifts, Factors;
5499 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5500 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5501
5502 auto BuildExactUDIVPattern = [&](const Constant *C) {
5503 // Don't recompute inverses for each splat element.
5504 if (IsSplat && !Factors.empty()) {
5505 Shifts.push_back(Shifts[0]);
5506 Factors.push_back(Factors[0]);
5507 return true;
5508 }
5509
5510 auto *CI = cast<ConstantInt>(C);
5511 APInt Divisor = CI->getValue();
5512 unsigned Shift = Divisor.countr_zero();
5513 if (Shift) {
5514 Divisor.lshrInPlace(Shift);
5515 UseSRL = true;
5516 }
5517
5518 // Calculate the multiplicative inverse modulo BW.
5519 APInt Factor = Divisor.multiplicativeInverse();
5520 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5521 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5522 return true;
5523 };
5524
5525 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5526 // Collect all magic values from the build vector.
5527 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5528 llvm_unreachable("Expected unary predicate match to succeed");
5529
5530 Register Shift, Factor;
5531 if (Ty.isVector()) {
5532 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5533 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5534 } else {
5535 Shift = Shifts[0];
5536 Factor = Factors[0];
5537 }
5538
5539 Register Res = LHS;
5540
5541 if (UseSRL)
5542 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5543
5544 return MIB.buildMul(Ty, Res, Factor);
5545 }
5546
5547 unsigned KnownLeadingZeros =
5548 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5549
5550 bool UseNPQ = false;
5551 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5552 auto BuildUDIVPattern = [&](const Constant *C) {
5553 auto *CI = cast<ConstantInt>(C);
5554 const APInt &Divisor = CI->getValue();
5555
5556 bool SelNPQ = false;
5557 APInt Magic(Divisor.getBitWidth(), 0);
5558 unsigned PreShift = 0, PostShift = 0;
5559
5560 // Magic algorithm doesn't work for division by 1. We need to emit a select
5561 // at the end.
5562 // TODO: Use undef values for divisor of 1.
5563 if (!Divisor.isOne()) {
5564
5565 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5566 // in the dividend exceeds the leading zeros for the divisor.
5569 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5570
5571 Magic = std::move(magics.Magic);
5572
5573 assert(magics.PreShift < Divisor.getBitWidth() &&
5574 "We shouldn't generate an undefined shift!");
5575 assert(magics.PostShift < Divisor.getBitWidth() &&
5576 "We shouldn't generate an undefined shift!");
5577 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5578 PreShift = magics.PreShift;
5579 PostShift = magics.PostShift;
5580 SelNPQ = magics.IsAdd;
5581 }
5582
5583 PreShifts.push_back(
5584 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5585 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5586 NPQFactors.push_back(
5587 MIB.buildConstant(ScalarTy,
5588 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5589 : APInt::getZero(EltBits))
5590 .getReg(0));
5591 PostShifts.push_back(
5592 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5593 UseNPQ |= SelNPQ;
5594 return true;
5595 };
5596
5597 // Collect the shifts/magic values from each element.
5598 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5599 (void)Matched;
5600 assert(Matched && "Expected unary predicate match to succeed");
5601
5602 Register PreShift, PostShift, MagicFactor, NPQFactor;
5603 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5604 if (RHSDef) {
5605 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5606 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5607 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5608 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5609 } else {
5610 assert(MRI.getType(RHS).isScalar() &&
5611 "Non-build_vector operation should have been a scalar");
5612 PreShift = PreShifts[0];
5613 MagicFactor = MagicFactors[0];
5614 PostShift = PostShifts[0];
5615 }
5616
5617 Register Q = LHS;
5618 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5619
5620 // Multiply the numerator (operand 0) by the magic value.
5621 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5622
5623 if (UseNPQ) {
5624 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5625
5626 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5627 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5628 if (Ty.isVector())
5629 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5630 else
5631 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5632
5633 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5634 }
5635
5636 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5637 auto One = MIB.buildConstant(Ty, 1);
5638 auto IsOne = MIB.buildICmp(
5640 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5641 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5642
5643 if (Opcode == TargetOpcode::G_UREM) {
5644 auto Prod = MIB.buildMul(Ty, ret, RHS);
5645 return MIB.buildSub(Ty, LHS, Prod);
5646 }
5647 return ret;
5648}
5649
5651 unsigned Opcode = MI.getOpcode();
5652 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5653 Register Dst = MI.getOperand(0).getReg();
5654 Register RHS = MI.getOperand(2).getReg();
5655 LLT DstTy = MRI.getType(Dst);
5656
5657 auto &MF = *MI.getMF();
5658 AttributeList Attr = MF.getFunction().getAttributes();
5659 const auto &TLI = getTargetLowering();
5660 LLVMContext &Ctx = MF.getFunction().getContext();
5661 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5662 return false;
5663
5664 // Don't do this for minsize because the instruction sequence is usually
5665 // larger.
5666 if (MF.getFunction().hasMinSize())
5667 return false;
5668
5669 if (Opcode == TargetOpcode::G_UDIV &&
5671 return matchUnaryPredicate(
5672 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5673 }
5674
5675 auto *RHSDef = MRI.getVRegDef(RHS);
5676 if (!isConstantOrConstantVector(*RHSDef, MRI))
5677 return false;
5678
5679 // Don't do this if the types are not going to be legal.
5680 if (LI) {
5681 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5682 return false;
5683 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5684 return false;
5686 {TargetOpcode::G_ICMP,
5687 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5688 DstTy}}))
5689 return false;
5690 if (Opcode == TargetOpcode::G_UREM &&
5691 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5692 return false;
5693 }
5694
5695 return matchUnaryPredicate(
5696 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5697}
5698
5700 auto *NewMI = buildUDivOrURemUsingMul(MI);
5701 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5702}
5703
5705 unsigned Opcode = MI.getOpcode();
5706 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5707 Register Dst = MI.getOperand(0).getReg();
5708 Register RHS = MI.getOperand(2).getReg();
5709 LLT DstTy = MRI.getType(Dst);
5710 auto SizeInBits = DstTy.getScalarSizeInBits();
5711 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5712
5713 auto &MF = *MI.getMF();
5714 AttributeList Attr = MF.getFunction().getAttributes();
5715 const auto &TLI = getTargetLowering();
5716 LLVMContext &Ctx = MF.getFunction().getContext();
5717 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5718 return false;
5719
5720 // Don't do this for minsize because the instruction sequence is usually
5721 // larger.
5722 if (MF.getFunction().hasMinSize())
5723 return false;
5724
5725 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5726 if (Opcode == TargetOpcode::G_SDIV &&
5728 return matchUnaryPredicate(
5729 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5730 }
5731
5732 auto *RHSDef = MRI.getVRegDef(RHS);
5733 if (!isConstantOrConstantVector(*RHSDef, MRI))
5734 return false;
5735
5736 // Don't do this if the types are not going to be legal.
5737 if (LI) {
5738 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5739 return false;
5740 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5741 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5742 return false;
5743 if (Opcode == TargetOpcode::G_SREM &&
5744 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5745 return false;
5746 }
5747
5748 return matchUnaryPredicate(
5749 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5750}
5751
5753 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5754 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5755}
5756
5758 unsigned Opcode = MI.getOpcode();
5759 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5760 Opcode == TargetOpcode::G_SREM);
5761 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5762 Register Dst = SDivorRem.getReg(0);
5763 Register LHS = SDivorRem.getReg(1);
5764 Register RHS = SDivorRem.getReg(2);
5765 LLT Ty = MRI.getType(Dst);
5766 LLT ScalarTy = Ty.getScalarType();
5767 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5769 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5770 auto &MIB = Builder;
5771
5772 bool UseSRA = false;
5773 SmallVector<Register, 16> ExactShifts, ExactFactors;
5774
5775 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5776 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5777
5778 auto BuildExactSDIVPattern = [&](const Constant *C) {
5779 // Don't recompute inverses for each splat element.
5780 if (IsSplat && !ExactFactors.empty()) {
5781 ExactShifts.push_back(ExactShifts[0]);
5782 ExactFactors.push_back(ExactFactors[0]);
5783 return true;
5784 }
5785
5786 auto *CI = cast<ConstantInt>(C);
5787 APInt Divisor = CI->getValue();
5788 unsigned Shift = Divisor.countr_zero();
5789 if (Shift) {
5790 Divisor.ashrInPlace(Shift);
5791 UseSRA = true;
5792 }
5793
5794 // Calculate the multiplicative inverse modulo BW.
5795 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5796 APInt Factor = Divisor.multiplicativeInverse();
5797 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5798 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5799 return true;
5800 };
5801
5802 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5803 // Collect all magic values from the build vector.
5804 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5805 (void)Matched;
5806 assert(Matched && "Expected unary predicate match to succeed");
5807
5808 Register Shift, Factor;
5809 if (Ty.isVector()) {
5810 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5811 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5812 } else {
5813 Shift = ExactShifts[0];
5814 Factor = ExactFactors[0];
5815 }
5816
5817 Register Res = LHS;
5818
5819 if (UseSRA)
5820 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5821
5822 return MIB.buildMul(Ty, Res, Factor);
5823 }
5824
5825 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5826
5827 auto BuildSDIVPattern = [&](const Constant *C) {
5828 auto *CI = cast<ConstantInt>(C);
5829 const APInt &Divisor = CI->getValue();
5830
5833 int NumeratorFactor = 0;
5834 int ShiftMask = -1;
5835
5836 if (Divisor.isOne() || Divisor.isAllOnes()) {
5837 // If d is +1/-1, we just multiply the numerator by +1/-1.
5838 NumeratorFactor = Divisor.getSExtValue();
5839 Magics.Magic = 0;
5840 Magics.ShiftAmount = 0;
5841 ShiftMask = 0;
5842 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5843 // If d > 0 and m < 0, add the numerator.
5844 NumeratorFactor = 1;
5845 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5846 // If d < 0 and m > 0, subtract the numerator.
5847 NumeratorFactor = -1;
5848 }
5849
5850 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5851 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5852 Shifts.push_back(
5853 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5854 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5855
5856 return true;
5857 };
5858
5859 // Collect the shifts/magic values from each element.
5860 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5861 (void)Matched;
5862 assert(Matched && "Expected unary predicate match to succeed");
5863
5864 Register MagicFactor, Factor, Shift, ShiftMask;
5865 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5866 if (RHSDef) {
5867 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5868 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5869 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5870 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5871 } else {
5872 assert(MRI.getType(RHS).isScalar() &&
5873 "Non-build_vector operation should have been a scalar");
5874 MagicFactor = MagicFactors[0];
5875 Factor = Factors[0];
5876 Shift = Shifts[0];
5877 ShiftMask = ShiftMasks[0];
5878 }
5879
5880 Register Q = LHS;
5881 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5882
5883 // (Optionally) Add/subtract the numerator using Factor.
5884 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5885 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5886
5887 // Shift right algebraic by shift value.
5888 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5889
5890 // Extract the sign bit, mask it and add it to the quotient.
5891 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5892 auto T = MIB.buildLShr(Ty, Q, SignShift);
5893 T = MIB.buildAnd(Ty, T, ShiftMask);
5894 auto ret = MIB.buildAdd(Ty, Q, T);
5895
5896 if (Opcode == TargetOpcode::G_SREM) {
5897 auto Prod = MIB.buildMul(Ty, ret, RHS);
5898 return MIB.buildSub(Ty, LHS, Prod);
5899 }
5900 return ret;
5901}
5902
5904 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5905 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5906 "Expected SDIV or UDIV");
5907 auto &Div = cast<GenericMachineInstr>(MI);
5908 Register RHS = Div.getReg(2);
5909 auto MatchPow2 = [&](const Constant *C) {
5910 auto *CI = dyn_cast<ConstantInt>(C);
5911 return CI && (CI->getValue().isPowerOf2() ||
5912 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5913 };
5914 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5915}
5916
5918 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5919 auto &SDiv = cast<GenericMachineInstr>(MI);
5920 Register Dst = SDiv.getReg(0);
5921 Register LHS = SDiv.getReg(1);
5922 Register RHS = SDiv.getReg(2);
5923 LLT Ty = MRI.getType(Dst);
5925 LLT CCVT =
5926 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5927
5928 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5929 // to the following version:
5930 //
5931 // %c1 = G_CTTZ %rhs
5932 // %inexact = G_SUB $bitwidth, %c1
5933 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5934 // %lshr = G_LSHR %sign, %inexact
5935 // %add = G_ADD %lhs, %lshr
5936 // %ashr = G_ASHR %add, %c1
5937 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5938 // %zero = G_CONSTANT $0
5939 // %neg = G_NEG %ashr
5940 // %isneg = G_ICMP SLT %rhs, %zero
5941 // %res = G_SELECT %isneg, %neg, %ashr
5942
5943 unsigned BitWidth = Ty.getScalarSizeInBits();
5944 auto Zero = Builder.buildConstant(Ty, 0);
5945
5946 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5947 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5948 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5949 // Splat the sign bit into the register
5950 auto Sign = Builder.buildAShr(
5951 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5952
5953 // Add (LHS < 0) ? abs2 - 1 : 0;
5954 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5955 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5956 auto AShr = Builder.buildAShr(Ty, Add, C1);
5957
5958 // Special case: (sdiv X, 1) -> X
5959 // Special Case: (sdiv X, -1) -> 0-X
5960 auto One = Builder.buildConstant(Ty, 1);
5961 auto MinusOne = Builder.buildConstant(Ty, -1);
5962 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5963 auto IsMinusOne =
5964 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
5965 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5966 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5967
5968 // If divided by a positive value, we're done. Otherwise, the result must be
5969 // negated.
5970 auto Neg = Builder.buildNeg(Ty, AShr);
5971 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5972 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5973 MI.eraseFromParent();
5974}
5975
5977 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5978 auto &UDiv = cast<GenericMachineInstr>(MI);
5979 Register Dst = UDiv.getReg(0);
5980 Register LHS = UDiv.getReg(1);
5981 Register RHS = UDiv.getReg(2);
5982 LLT Ty = MRI.getType(Dst);
5984
5985 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5986 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5987 MI.eraseFromParent();
5988}
5989
5991 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5992 Register RHS = MI.getOperand(2).getReg();
5993 Register Dst = MI.getOperand(0).getReg();
5994 LLT Ty = MRI.getType(Dst);
5995 LLT RHSTy = MRI.getType(RHS);
5997 auto MatchPow2ExceptOne = [&](const Constant *C) {
5998 if (auto *CI = dyn_cast<ConstantInt>(C))
5999 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6000 return false;
6001 };
6002 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6003 return false;
6004 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6005 // get log base 2, and it is not always legal for on a target.
6006 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6007 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6008}
6009
6011 Register LHS = MI.getOperand(1).getReg();
6012 Register RHS = MI.getOperand(2).getReg();
6013 Register Dst = MI.getOperand(0).getReg();
6014 LLT Ty = MRI.getType(Dst);
6016 unsigned NumEltBits = Ty.getScalarSizeInBits();
6017
6018 auto LogBase2 = buildLogBase2(RHS, Builder);
6019 auto ShiftAmt =
6020 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6021 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6022 Builder.buildLShr(Dst, LHS, Trunc);
6023 MI.eraseFromParent();
6024}
6025
6027 Register &MatchInfo) const {
6028 Register Dst = MI.getOperand(0).getReg();
6029 Register Src = MI.getOperand(1).getReg();
6030 LLT DstTy = MRI.getType(Dst);
6031 LLT SrcTy = MRI.getType(Src);
6032 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6033 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6034 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6035
6036 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6037 return false;
6038
6039 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6040 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6041 return mi_match(Src, MRI,
6042 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6043 m_SpecificICstOrSplat(SignedMin)),
6044 m_SpecificICstOrSplat(SignedMax))) ||
6045 mi_match(Src, MRI,
6046 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6047 m_SpecificICstOrSplat(SignedMax)),
6048 m_SpecificICstOrSplat(SignedMin)));
6049}
6050
6052 Register &MatchInfo) const {
6053 Register Dst = MI.getOperand(0).getReg();
6054 Builder.buildTruncSSatS(Dst, MatchInfo);
6055 MI.eraseFromParent();
6056}
6057
6059 Register &MatchInfo) const {
6060 Register Dst = MI.getOperand(0).getReg();
6061 Register Src = MI.getOperand(1).getReg();
6062 LLT DstTy = MRI.getType(Dst);
6063 LLT SrcTy = MRI.getType(Src);
6064 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6065 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6066 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6067
6068 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6069 return false;
6070 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6071 return mi_match(Src, MRI,
6073 m_SpecificICstOrSplat(UnsignedMax))) ||
6074 mi_match(Src, MRI,
6075 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6076 m_SpecificICstOrSplat(UnsignedMax)),
6077 m_SpecificICstOrSplat(0))) ||
6078 mi_match(Src, MRI,
6080 m_SpecificICstOrSplat(UnsignedMax)));
6081}
6082
6084 Register &MatchInfo) const {
6085 Register Dst = MI.getOperand(0).getReg();
6086 Builder.buildTruncSSatU(Dst, MatchInfo);
6087 MI.eraseFromParent();
6088}
6089
6091 MachineInstr &MinMI) const {
6092 Register Min = MinMI.getOperand(2).getReg();
6093 Register Val = MinMI.getOperand(1).getReg();
6094 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6095 LLT SrcTy = MRI.getType(Val);
6096 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6097 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6098 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6099
6100 if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6101 return false;
6102 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6103 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6104 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6105}
6106
6108 MachineInstr &SrcMI) const {
6109 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6110 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6111
6112 return LI &&
6113 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6114}
6115
6117 BuildFnTy &MatchInfo) const {
6118 unsigned Opc = MI.getOpcode();
6119 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6120 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6121 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6122
6123 Register Dst = MI.getOperand(0).getReg();
6124 Register X = MI.getOperand(1).getReg();
6125 Register Y = MI.getOperand(2).getReg();
6126 LLT Type = MRI.getType(Dst);
6127
6128 // fold (fadd x, fneg(y)) -> (fsub x, y)
6129 // fold (fadd fneg(y), x) -> (fsub x, y)
6130 // G_ADD is commutative so both cases are checked by m_GFAdd
6131 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6132 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6133 Opc = TargetOpcode::G_FSUB;
6134 }
6135 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6136 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6137 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6138 Opc = TargetOpcode::G_FADD;
6139 }
6140 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6141 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6142 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6143 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6144 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6145 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6146 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6147 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6148 // no opcode change
6149 } else
6150 return false;
6151
6152 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6153 Observer.changingInstr(MI);
6154 MI.setDesc(B.getTII().get(Opc));
6155 MI.getOperand(1).setReg(X);
6156 MI.getOperand(2).setReg(Y);
6157 Observer.changedInstr(MI);
6158 };
6159 return true;
6160}
6161
6163 Register &MatchInfo) const {
6164 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6165
6166 Register LHS = MI.getOperand(1).getReg();
6167 MatchInfo = MI.getOperand(2).getReg();
6168 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6169
6170 const auto LHSCst = Ty.isVector()
6171 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6173 if (!LHSCst)
6174 return false;
6175
6176 // -0.0 is always allowed
6177 if (LHSCst->Value.isNegZero())
6178 return true;
6179
6180 // +0.0 is only allowed if nsz is set.
6181 if (LHSCst->Value.isPosZero())
6182 return MI.getFlag(MachineInstr::FmNsz);
6183
6184 return false;
6185}
6186
6188 Register &MatchInfo) const {
6189 Register Dst = MI.getOperand(0).getReg();
6190 Builder.buildFNeg(
6191 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6192 eraseInst(MI);
6193}
6194
6195/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6196/// due to global flags or MachineInstr flags.
6197static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6198 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6199 return false;
6200 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6201}
6202
6203static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6204 const MachineRegisterInfo &MRI) {
6205 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6206 MRI.use_instr_nodbg_end()) >
6207 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6208 MRI.use_instr_nodbg_end());
6209}
6210
6212 bool &AllowFusionGlobally,
6213 bool &HasFMAD, bool &Aggressive,
6214 bool CanReassociate) const {
6215
6216 auto *MF = MI.getMF();
6217 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6218 const TargetOptions &Options = MF->getTarget().Options;
6219 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6220
6221 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6222 return false;
6223
6224 // Floating-point multiply-add with intermediate rounding.
6225 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6226 // Floating-point multiply-add without intermediate rounding.
6227 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6228 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6229 // No valid opcode, do not combine.
6230 if (!HasFMAD && !HasFMA)
6231 return false;
6232
6233 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6234 // If the addition is not contractable, do not combine.
6235 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6236 return false;
6237
6238 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6239 return true;
6240}
6241
6244 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6245 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6246
6247 bool AllowFusionGlobally, HasFMAD, Aggressive;
6248 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6249 return false;
6250
6251 Register Op1 = MI.getOperand(1).getReg();
6252 Register Op2 = MI.getOperand(2).getReg();
6253 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6254 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6255 unsigned PreferredFusedOpcode =
6256 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6257
6258 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6259 // prefer to fold the multiply with fewer uses.
6260 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6261 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6262 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6263 std::swap(LHS, RHS);
6264 }
6265
6266 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6267 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6268 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6269 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6270 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6271 {LHS.MI->getOperand(1).getReg(),
6272 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6273 };
6274 return true;
6275 }
6276
6277 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6278 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6279 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6280 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6281 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6282 {RHS.MI->getOperand(1).getReg(),
6283 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6284 };
6285 return true;
6286 }
6287
6288 return false;
6289}
6290
6293 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6294 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6295
6296 bool AllowFusionGlobally, HasFMAD, Aggressive;
6297 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6298 return false;
6299
6300 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6301 Register Op1 = MI.getOperand(1).getReg();
6302 Register Op2 = MI.getOperand(2).getReg();
6303 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6304 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6305 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6306
6307 unsigned PreferredFusedOpcode =
6308 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6309
6310 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6311 // prefer to fold the multiply with fewer uses.
6312 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6313 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6314 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6315 std::swap(LHS, RHS);
6316 }
6317
6318 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6319 MachineInstr *FpExtSrc;
6320 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6321 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6322 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6323 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6324 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6325 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6326 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6327 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6328 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6329 };
6330 return true;
6331 }
6332
6333 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6334 // Note: Commutes FADD operands.
6335 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6336 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6337 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6338 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6339 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6340 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6341 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6342 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6343 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6344 };
6345 return true;
6346 }
6347
6348 return false;
6349}
6350
6353 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6354 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6355
6356 bool AllowFusionGlobally, HasFMAD, Aggressive;
6357 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6358 return false;
6359
6360 Register Op1 = MI.getOperand(1).getReg();
6361 Register Op2 = MI.getOperand(2).getReg();
6362 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6363 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6364 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6365
6366 unsigned PreferredFusedOpcode =
6367 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6368
6369 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6370 // prefer to fold the multiply with fewer uses.
6371 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6372 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6373 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6374 std::swap(LHS, RHS);
6375 }
6376
6377 MachineInstr *FMA = nullptr;
6378 Register Z;
6379 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6380 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6381 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6382 TargetOpcode::G_FMUL) &&
6383 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6384 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6385 FMA = LHS.MI;
6386 Z = RHS.Reg;
6387 }
6388 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6389 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6390 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6391 TargetOpcode::G_FMUL) &&
6392 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6393 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6394 Z = LHS.Reg;
6395 FMA = RHS.MI;
6396 }
6397
6398 if (FMA) {
6399 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6400 Register X = FMA->getOperand(1).getReg();
6401 Register Y = FMA->getOperand(2).getReg();
6402 Register U = FMulMI->getOperand(1).getReg();
6403 Register V = FMulMI->getOperand(2).getReg();
6404
6405 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6406 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6407 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6408 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6409 {X, Y, InnerFMA});
6410 };
6411 return true;
6412 }
6413
6414 return false;
6415}
6416
6419 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6420 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6421
6422 bool AllowFusionGlobally, HasFMAD, Aggressive;
6423 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6424 return false;
6425
6426 if (!Aggressive)
6427 return false;
6428
6429 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6430 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6431 Register Op1 = MI.getOperand(1).getReg();
6432 Register Op2 = MI.getOperand(2).getReg();
6433 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6434 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6435
6436 unsigned PreferredFusedOpcode =
6437 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6438
6439 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6440 // prefer to fold the multiply with fewer uses.
6441 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6442 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6443 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6444 std::swap(LHS, RHS);
6445 }
6446
6447 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6448 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6450 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6451 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6452 Register InnerFMA =
6453 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6454 .getReg(0);
6455 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6456 {X, Y, InnerFMA});
6457 };
6458
6459 MachineInstr *FMulMI, *FMAMI;
6460 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6461 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6462 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6463 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6464 m_GFPExt(m_MInstr(FMulMI))) &&
6465 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6466 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6467 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6468 MatchInfo = [=](MachineIRBuilder &B) {
6469 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6470 FMulMI->getOperand(2).getReg(), RHS.Reg,
6471 LHS.MI->getOperand(1).getReg(),
6472 LHS.MI->getOperand(2).getReg(), B);
6473 };
6474 return true;
6475 }
6476
6477 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6478 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6479 // FIXME: This turns two single-precision and one double-precision
6480 // operation into two double-precision operations, which might not be
6481 // interesting for all targets, especially GPUs.
6482 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6483 FMAMI->getOpcode() == PreferredFusedOpcode) {
6484 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6485 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6486 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6487 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6488 MatchInfo = [=](MachineIRBuilder &B) {
6489 Register X = FMAMI->getOperand(1).getReg();
6490 Register Y = FMAMI->getOperand(2).getReg();
6491 X = B.buildFPExt(DstType, X).getReg(0);
6492 Y = B.buildFPExt(DstType, Y).getReg(0);
6493 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6494 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6495 };
6496
6497 return true;
6498 }
6499 }
6500
6501 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6502 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6503 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6504 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6505 m_GFPExt(m_MInstr(FMulMI))) &&
6506 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6507 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6508 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6509 MatchInfo = [=](MachineIRBuilder &B) {
6510 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6511 FMulMI->getOperand(2).getReg(), LHS.Reg,
6512 RHS.MI->getOperand(1).getReg(),
6513 RHS.MI->getOperand(2).getReg(), B);
6514 };
6515 return true;
6516 }
6517
6518 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6519 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6520 // FIXME: This turns two single-precision and one double-precision
6521 // operation into two double-precision operations, which might not be
6522 // interesting for all targets, especially GPUs.
6523 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6524 FMAMI->getOpcode() == PreferredFusedOpcode) {
6525 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6526 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6527 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6528 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6529 MatchInfo = [=](MachineIRBuilder &B) {
6530 Register X = FMAMI->getOperand(1).getReg();
6531 Register Y = FMAMI->getOperand(2).getReg();
6532 X = B.buildFPExt(DstType, X).getReg(0);
6533 Y = B.buildFPExt(DstType, Y).getReg(0);
6534 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6535 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6536 };
6537 return true;
6538 }
6539 }
6540
6541 return false;
6542}
6543
6546 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6547 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6548
6549 bool AllowFusionGlobally, HasFMAD, Aggressive;
6550 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6551 return false;
6552
6553 Register Op1 = MI.getOperand(1).getReg();
6554 Register Op2 = MI.getOperand(2).getReg();
6555 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6556 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6557 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6558
6559 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6560 // prefer to fold the multiply with fewer uses.
6561 int FirstMulHasFewerUses = true;
6562 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6563 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6564 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6565 FirstMulHasFewerUses = false;
6566
6567 unsigned PreferredFusedOpcode =
6568 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6569
6570 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6571 if (FirstMulHasFewerUses &&
6572 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6573 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6574 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6575 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6576 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6577 {LHS.MI->getOperand(1).getReg(),
6578 LHS.MI->getOperand(2).getReg(), NegZ});
6579 };
6580 return true;
6581 }
6582 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6583 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6584 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6585 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6586 Register NegY =
6587 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6588 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6589 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6590 };
6591 return true;
6592 }
6593
6594 return false;
6595}
6596
6599 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6600 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6601
6602 bool AllowFusionGlobally, HasFMAD, Aggressive;
6603 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6604 return false;
6605
6606 Register LHSReg = MI.getOperand(1).getReg();
6607 Register RHSReg = MI.getOperand(2).getReg();
6608 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6609
6610 unsigned PreferredFusedOpcode =
6611 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6612
6613 MachineInstr *FMulMI;
6614 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6615 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6616 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6617 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6618 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6619 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6620 Register NegX =
6621 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6622 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6623 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6624 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6625 };
6626 return true;
6627 }
6628
6629 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6630 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6631 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6632 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6633 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6634 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6635 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6636 {FMulMI->getOperand(1).getReg(),
6637 FMulMI->getOperand(2).getReg(), LHSReg});
6638 };
6639 return true;
6640 }
6641
6642 return false;
6643}
6644
6647 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6648 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6649
6650 bool AllowFusionGlobally, HasFMAD, Aggressive;
6651 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6652 return false;
6653
6654 Register LHSReg = MI.getOperand(1).getReg();
6655 Register RHSReg = MI.getOperand(2).getReg();
6656 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6657
6658 unsigned PreferredFusedOpcode =
6659 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6660
6661 MachineInstr *FMulMI;
6662 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6663 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6664 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6665 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6666 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6667 Register FpExtX =
6668 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6669 Register FpExtY =
6670 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6671 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6672 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6673 {FpExtX, FpExtY, NegZ});
6674 };
6675 return true;
6676 }
6677
6678 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6679 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6680 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6681 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6682 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6683 Register FpExtY =
6684 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6685 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6686 Register FpExtZ =
6687 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6688 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6689 {NegY, FpExtZ, LHSReg});
6690 };
6691 return true;
6692 }
6693
6694 return false;
6695}
6696
6699 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6700 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6701
6702 bool AllowFusionGlobally, HasFMAD, Aggressive;
6703 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6704 return false;
6705
6706 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6707 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6708 Register LHSReg = MI.getOperand(1).getReg();
6709 Register RHSReg = MI.getOperand(2).getReg();
6710
6711 unsigned PreferredFusedOpcode =
6712 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6713
6714 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6716 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6717 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6718 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6719 };
6720
6721 MachineInstr *FMulMI;
6722 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6723 // (fneg (fma (fpext x), (fpext y), z))
6724 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6725 // (fneg (fma (fpext x), (fpext y), z))
6726 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6727 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6728 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6729 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6730 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6731 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6732 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6733 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6734 FMulMI->getOperand(2).getReg(), RHSReg, B);
6735 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6736 };
6737 return true;
6738 }
6739
6740 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6741 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6742 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6743 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6744 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6745 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6746 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6747 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6748 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6749 FMulMI->getOperand(2).getReg(), LHSReg, B);
6750 };
6751 return true;
6752 }
6753
6754 return false;
6755}
6756
6758 unsigned &IdxToPropagate) const {
6759 bool PropagateNaN;
6760 switch (MI.getOpcode()) {
6761 default:
6762 return false;
6763 case TargetOpcode::G_FMINNUM:
6764 case TargetOpcode::G_FMAXNUM:
6765 PropagateNaN = false;
6766 break;
6767 case TargetOpcode::G_FMINIMUM:
6768 case TargetOpcode::G_FMAXIMUM:
6769 PropagateNaN = true;
6770 break;
6771 }
6772
6773 auto MatchNaN = [&](unsigned Idx) {
6774 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6775 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6776 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6777 return false;
6778 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6779 return true;
6780 };
6781
6782 return MatchNaN(1) || MatchNaN(2);
6783}
6784
6785// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6786// reciprocal.
6787// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6789 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6790 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6791
6792 Register X = MI.getOperand(1).getReg();
6793 Register Y = MI.getOperand(2).getReg();
6794
6795 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6796 return false;
6797
6798 // Skip if current node is a reciprocal/fneg-reciprocal.
6799 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6800 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
6801 return false;
6802
6803 // Exit early if the target does not want this transform or if there can't
6804 // possibly be enough uses of the divisor to make the transform worthwhile.
6805 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6806 if (!MinUses)
6807 return false;
6808
6809 // Find all FDIV users of the same divisor. For the moment we limit all
6810 // instructions to a single BB and use the first Instr in MatchInfo as the
6811 // dominating position.
6812 MatchInfo.push_back(&MI);
6813 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6814 if (&U == &MI || U.getParent() != MI.getParent())
6815 continue;
6816 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6817 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
6818 // This division is eligible for optimization only if global unsafe math
6819 // is enabled or if this division allows reciprocal formation.
6820 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6821 MatchInfo.push_back(&U);
6822 if (dominates(U, *MatchInfo[0]))
6823 std::swap(MatchInfo[0], MatchInfo.back());
6824 }
6825 }
6826 }
6827
6828 // Now that we have the actual number of divisor uses, make sure it meets
6829 // the minimum threshold specified by the target.
6830 return MatchInfo.size() >= MinUses;
6831}
6832
6834 SmallVector<MachineInstr *> &MatchInfo) const {
6835 // Generate the new div at the position of the first instruction, that we have
6836 // ensured will dominate all other instructions.
6837 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6838 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6839 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6840 MatchInfo[0]->getOperand(2).getReg(),
6841 MatchInfo[0]->getFlags());
6842
6843 // Replace all found div's with fmul instructions.
6844 for (MachineInstr *MI : MatchInfo) {
6845 Builder.setInsertPt(*MI->getParent(), MI);
6846 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6847 Div->getOperand(0).getReg(), MI->getFlags());
6848 MI->eraseFromParent();
6849 }
6850}
6851
6853 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6854 Register LHS = MI.getOperand(1).getReg();
6855 Register RHS = MI.getOperand(2).getReg();
6856
6857 // Helper lambda to check for opportunities for
6858 // A + (B - A) -> B
6859 // (B - A) + A -> B
6860 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6861 Register Reg;
6862 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6863 Reg == MaybeSameReg;
6864 };
6865 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6866}
6867
6869 Register &MatchInfo) const {
6870 // This combine folds the following patterns:
6871 //
6872 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6873 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6874 // into
6875 // x
6876 // if
6877 // k == sizeof(VecEltTy)/2
6878 // type(x) == type(dst)
6879 //
6880 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6881 // into
6882 // x
6883 // if
6884 // type(x) == type(dst)
6885
6886 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6887 LLT DstEltTy = DstVecTy.getElementType();
6888
6889 Register Lo, Hi;
6890
6891 if (mi_match(
6892 MI, MRI,
6894 MatchInfo = Lo;
6895 return MRI.getType(MatchInfo) == DstVecTy;
6896 }
6897
6898 std::optional<ValueAndVReg> ShiftAmount;
6899 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6900 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6901 if (mi_match(
6902 MI, MRI,
6903 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6904 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6905 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6906 MatchInfo = Lo;
6907 return MRI.getType(MatchInfo) == DstVecTy;
6908 }
6909 }
6910
6911 return false;
6912}
6913
6915 Register &MatchInfo) const {
6916 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6917 // if type(x) == type(G_TRUNC)
6918 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6919 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6920 return false;
6921
6922 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6923}
6924
6926 Register &MatchInfo) const {
6927 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6928 // y if K == size of vector element type
6929 std::optional<ValueAndVReg> ShiftAmt;
6930 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6932 m_GCst(ShiftAmt))))
6933 return false;
6934
6935 LLT MatchTy = MRI.getType(MatchInfo);
6936 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6937 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6938}
6939
6940unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6941 CmpInst::Predicate Pred, LLT DstTy,
6942 SelectPatternNaNBehaviour VsNaNRetVal) const {
6943 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6944 "Expected a NaN behaviour?");
6945 // Choose an opcode based off of legality or the behaviour when one of the
6946 // LHS/RHS may be NaN.
6947 switch (Pred) {
6948 default:
6949 return 0;
6950 case CmpInst::FCMP_UGT:
6951 case CmpInst::FCMP_UGE:
6952 case CmpInst::FCMP_OGT:
6953 case CmpInst::FCMP_OGE:
6954 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6955 return TargetOpcode::G_FMAXNUM;
6956 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6957 return TargetOpcode::G_FMAXIMUM;
6958 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6959 return TargetOpcode::G_FMAXNUM;
6960 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6961 return TargetOpcode::G_FMAXIMUM;
6962 return 0;
6963 case CmpInst::FCMP_ULT:
6964 case CmpInst::FCMP_ULE:
6965 case CmpInst::FCMP_OLT:
6966 case CmpInst::FCMP_OLE:
6967 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6968 return TargetOpcode::G_FMINNUM;
6969 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6970 return TargetOpcode::G_FMINIMUM;
6971 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6972 return TargetOpcode::G_FMINNUM;
6973 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6974 return 0;
6975 return TargetOpcode::G_FMINIMUM;
6976 }
6977}
6978
6979CombinerHelper::SelectPatternNaNBehaviour
6980CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6981 bool IsOrderedComparison) const {
6982 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6983 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6984 // Completely unsafe.
6985 if (!LHSSafe && !RHSSafe)
6986 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6987 if (LHSSafe && RHSSafe)
6988 return SelectPatternNaNBehaviour::RETURNS_ANY;
6989 // An ordered comparison will return false when given a NaN, so it
6990 // returns the RHS.
6991 if (IsOrderedComparison)
6992 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6993 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6994 // An unordered comparison will return true when given a NaN, so it
6995 // returns the LHS.
6996 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6997 : SelectPatternNaNBehaviour::RETURNS_NAN;
6998}
6999
7000bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7001 Register TrueVal, Register FalseVal,
7002 BuildFnTy &MatchInfo) const {
7003 // Match: select (fcmp cond x, y) x, y
7004 // select (fcmp cond x, y) y, x
7005 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7006 LLT DstTy = MRI.getType(Dst);
7007 // Bail out early on pointers, since we'll never want to fold to a min/max.
7008 if (DstTy.isPointer())
7009 return false;
7010 // Match a floating point compare with a less-than/greater-than predicate.
7011 // TODO: Allow multiple users of the compare if they are all selects.
7012 CmpInst::Predicate Pred;
7013 Register CmpLHS, CmpRHS;
7014 if (!mi_match(Cond, MRI,
7016 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7017 CmpInst::isEquality(Pred))
7018 return false;
7019 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7020 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7021 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7022 return false;
7023 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7024 std::swap(CmpLHS, CmpRHS);
7025 Pred = CmpInst::getSwappedPredicate(Pred);
7026 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7027 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7028 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7029 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7030 }
7031 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7032 return false;
7033 // Decide what type of max/min this should be based off of the predicate.
7034 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7035 if (!Opc || !isLegal({Opc, {DstTy}}))
7036 return false;
7037 // Comparisons between signed zero and zero may have different results...
7038 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7039 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7040 // We don't know if a comparison between two 0s will give us a consistent
7041 // result. Be conservative and only proceed if at least one side is
7042 // non-zero.
7043 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7044 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7045 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7046 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7047 return false;
7048 }
7049 }
7050 MatchInfo = [=](MachineIRBuilder &B) {
7051 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7052 };
7053 return true;
7054}
7055
7057 BuildFnTy &MatchInfo) const {
7058 // TODO: Handle integer cases.
7059 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7060 // Condition may be fed by a truncated compare.
7061 Register Cond = MI.getOperand(1).getReg();
7062 Register MaybeTrunc;
7063 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7064 Cond = MaybeTrunc;
7065 Register Dst = MI.getOperand(0).getReg();
7066 Register TrueVal = MI.getOperand(2).getReg();
7067 Register FalseVal = MI.getOperand(3).getReg();
7068 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7069}
7070
7072 BuildFnTy &MatchInfo) const {
7073 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7074 // (X + Y) == X --> Y == 0
7075 // (X + Y) != X --> Y != 0
7076 // (X - Y) == X --> Y == 0
7077 // (X - Y) != X --> Y != 0
7078 // (X ^ Y) == X --> Y == 0
7079 // (X ^ Y) != X --> Y != 0
7080 Register Dst = MI.getOperand(0).getReg();
7081 CmpInst::Predicate Pred;
7082 Register X, Y, OpLHS, OpRHS;
7083 bool MatchedSub = mi_match(
7084 Dst, MRI,
7085 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7086 if (MatchedSub && X != OpLHS)
7087 return false;
7088 if (!MatchedSub) {
7089 if (!mi_match(Dst, MRI,
7090 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7091 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7092 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7093 return false;
7094 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7095 }
7096 MatchInfo = [=](MachineIRBuilder &B) {
7097 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7098 B.buildICmp(Pred, Dst, Y, Zero);
7099 };
7100 return CmpInst::isEquality(Pred) && Y.isValid();
7101}
7102
7103/// Return the minimum useless shift amount that results in complete loss of the
7104/// source value. Return std::nullopt when it cannot determine a value.
7105static std::optional<unsigned>
7106getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7107 std::optional<int64_t> &Result) {
7108 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7109 Opcode == TargetOpcode::G_ASHR) &&
7110 "Expect G_SHL, G_LSHR or G_ASHR.");
7111 auto SignificantBits = 0;
7112 switch (Opcode) {
7113 case TargetOpcode::G_SHL:
7114 SignificantBits = ValueKB.countMinTrailingZeros();
7115 Result = 0;
7116 break;
7117 case TargetOpcode::G_LSHR:
7118 Result = 0;
7119 SignificantBits = ValueKB.countMinLeadingZeros();
7120 break;
7121 case TargetOpcode::G_ASHR:
7122 if (ValueKB.isNonNegative()) {
7123 SignificantBits = ValueKB.countMinLeadingZeros();
7124 Result = 0;
7125 } else if (ValueKB.isNegative()) {
7126 SignificantBits = ValueKB.countMinLeadingOnes();
7127 Result = -1;
7128 } else {
7129 // Cannot determine shift result.
7130 Result = std::nullopt;
7131 }
7132 break;
7133 default:
7134 break;
7135 }
7136 return ValueKB.getBitWidth() - SignificantBits;
7137}
7138
7140 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7141 Register ShiftVal = MI.getOperand(1).getReg();
7142 Register ShiftReg = MI.getOperand(2).getReg();
7143 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7144 auto IsShiftTooBig = [&](const Constant *C) {
7145 auto *CI = dyn_cast<ConstantInt>(C);
7146 if (!CI)
7147 return false;
7148 if (CI->uge(ResTy.getScalarSizeInBits())) {
7149 MatchInfo = std::nullopt;
7150 return true;
7151 }
7152 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7153 MI.getOpcode(), MatchInfo);
7154 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7155 };
7156 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7157}
7158
7160 unsigned LHSOpndIdx = 1;
7161 unsigned RHSOpndIdx = 2;
7162 switch (MI.getOpcode()) {
7163 case TargetOpcode::G_UADDO:
7164 case TargetOpcode::G_SADDO:
7165 case TargetOpcode::G_UMULO:
7166 case TargetOpcode::G_SMULO:
7167 LHSOpndIdx = 2;
7168 RHSOpndIdx = 3;
7169 break;
7170 default:
7171 break;
7172 }
7173 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7174 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7175 if (!getIConstantVRegVal(LHS, MRI)) {
7176 // Skip commuting if LHS is not a constant. But, LHS may be a
7177 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7178 // have a constant on the RHS.
7179 if (MRI.getVRegDef(LHS)->getOpcode() !=
7180 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7181 return false;
7182 }
7183 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7184 return MRI.getVRegDef(RHS)->getOpcode() !=
7185 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7186 !getIConstantVRegVal(RHS, MRI);
7187}
7188
7190 Register LHS = MI.getOperand(1).getReg();
7191 Register RHS = MI.getOperand(2).getReg();
7192 std::optional<FPValueAndVReg> ValAndVReg;
7193 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7194 return false;
7195 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7196}
7197
7199 Observer.changingInstr(MI);
7200 unsigned LHSOpndIdx = 1;
7201 unsigned RHSOpndIdx = 2;
7202 switch (MI.getOpcode()) {
7203 case TargetOpcode::G_UADDO:
7204 case TargetOpcode::G_SADDO:
7205 case TargetOpcode::G_UMULO:
7206 case TargetOpcode::G_SMULO:
7207 LHSOpndIdx = 2;
7208 RHSOpndIdx = 3;
7209 break;
7210 default:
7211 break;
7212 }
7213 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7214 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7215 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7216 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7217 Observer.changedInstr(MI);
7218}
7219
7220bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7221 LLT SrcTy = MRI.getType(Src);
7222 if (SrcTy.isFixedVector())
7223 return isConstantSplatVector(Src, 1, AllowUndefs);
7224 if (SrcTy.isScalar()) {
7225 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7226 return true;
7227 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7228 return IConstant && IConstant->Value == 1;
7229 }
7230 return false; // scalable vector
7231}
7232
7233bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7234 LLT SrcTy = MRI.getType(Src);
7235 if (SrcTy.isFixedVector())
7236 return isConstantSplatVector(Src, 0, AllowUndefs);
7237 if (SrcTy.isScalar()) {
7238 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7239 return true;
7240 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7241 return IConstant && IConstant->Value == 0;
7242 }
7243 return false; // scalable vector
7244}
7245
7246// Ignores COPYs during conformance checks.
7247// FIXME scalable vectors.
7248bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7249 bool AllowUndefs) const {
7250 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7251 if (!BuildVector)
7252 return false;
7253 unsigned NumSources = BuildVector->getNumSources();
7254
7255 for (unsigned I = 0; I < NumSources; ++I) {
7256 GImplicitDef *ImplicitDef =
7258 if (ImplicitDef && AllowUndefs)
7259 continue;
7260 if (ImplicitDef && !AllowUndefs)
7261 return false;
7262 std::optional<ValueAndVReg> IConstant =
7264 if (IConstant && IConstant->Value == SplatValue)
7265 continue;
7266 return false;
7267 }
7268 return true;
7269}
7270
7271// Ignores COPYs during lookups.
7272// FIXME scalable vectors
7273std::optional<APInt>
7274CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7275 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7276 if (IConstant)
7277 return IConstant->Value;
7278
7279 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7280 if (!BuildVector)
7281 return std::nullopt;
7282 unsigned NumSources = BuildVector->getNumSources();
7283
7284 std::optional<APInt> Value = std::nullopt;
7285 for (unsigned I = 0; I < NumSources; ++I) {
7286 std::optional<ValueAndVReg> IConstant =
7288 if (!IConstant)
7289 return std::nullopt;
7290 if (!Value)
7291 Value = IConstant->Value;
7292 else if (*Value != IConstant->Value)
7293 return std::nullopt;
7294 }
7295 return Value;
7296}
7297
7298// FIXME G_SPLAT_VECTOR
7299bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7300 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7301 if (IConstant)
7302 return true;
7303
7304 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7305 if (!BuildVector)
7306 return false;
7307
7308 unsigned NumSources = BuildVector->getNumSources();
7309 for (unsigned I = 0; I < NumSources; ++I) {
7310 std::optional<ValueAndVReg> IConstant =
7312 if (!IConstant)
7313 return false;
7314 }
7315 return true;
7316}
7317
7318// TODO: use knownbits to determine zeros
7319bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7320 BuildFnTy &MatchInfo) const {
7321 uint32_t Flags = Select->getFlags();
7322 Register Dest = Select->getReg(0);
7323 Register Cond = Select->getCondReg();
7324 Register True = Select->getTrueReg();
7325 Register False = Select->getFalseReg();
7326 LLT CondTy = MRI.getType(Select->getCondReg());
7327 LLT TrueTy = MRI.getType(Select->getTrueReg());
7328
7329 // We only do this combine for scalar boolean conditions.
7330 if (CondTy != LLT::scalar(1))
7331 return false;
7332
7333 if (TrueTy.isPointer())
7334 return false;
7335
7336 // Both are scalars.
7337 std::optional<ValueAndVReg> TrueOpt =
7339 std::optional<ValueAndVReg> FalseOpt =
7341
7342 if (!TrueOpt || !FalseOpt)
7343 return false;
7344
7345 APInt TrueValue = TrueOpt->Value;
7346 APInt FalseValue = FalseOpt->Value;
7347
7348 // select Cond, 1, 0 --> zext (Cond)
7349 if (TrueValue.isOne() && FalseValue.isZero()) {
7350 MatchInfo = [=](MachineIRBuilder &B) {
7351 B.setInstrAndDebugLoc(*Select);
7352 B.buildZExtOrTrunc(Dest, Cond);
7353 };
7354 return true;
7355 }
7356
7357 // select Cond, -1, 0 --> sext (Cond)
7358 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7359 MatchInfo = [=](MachineIRBuilder &B) {
7360 B.setInstrAndDebugLoc(*Select);
7361 B.buildSExtOrTrunc(Dest, Cond);
7362 };
7363 return true;
7364 }
7365
7366 // select Cond, 0, 1 --> zext (!Cond)
7367 if (TrueValue.isZero() && FalseValue.isOne()) {
7368 MatchInfo = [=](MachineIRBuilder &B) {
7369 B.setInstrAndDebugLoc(*Select);
7370 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7371 B.buildNot(Inner, Cond);
7372 B.buildZExtOrTrunc(Dest, Inner);
7373 };
7374 return true;
7375 }
7376
7377 // select Cond, 0, -1 --> sext (!Cond)
7378 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7379 MatchInfo = [=](MachineIRBuilder &B) {
7380 B.setInstrAndDebugLoc(*Select);
7381 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7382 B.buildNot(Inner, Cond);
7383 B.buildSExtOrTrunc(Dest, Inner);
7384 };
7385 return true;
7386 }
7387
7388 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7389 if (TrueValue - 1 == FalseValue) {
7390 MatchInfo = [=](MachineIRBuilder &B) {
7391 B.setInstrAndDebugLoc(*Select);
7392 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7393 B.buildZExtOrTrunc(Inner, Cond);
7394 B.buildAdd(Dest, Inner, False);
7395 };
7396 return true;
7397 }
7398
7399 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7400 if (TrueValue + 1 == FalseValue) {
7401 MatchInfo = [=](MachineIRBuilder &B) {
7402 B.setInstrAndDebugLoc(*Select);
7403 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7404 B.buildSExtOrTrunc(Inner, Cond);
7405 B.buildAdd(Dest, Inner, False);
7406 };
7407 return true;
7408 }
7409
7410 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7411 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7412 MatchInfo = [=](MachineIRBuilder &B) {
7413 B.setInstrAndDebugLoc(*Select);
7414 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7415 B.buildZExtOrTrunc(Inner, Cond);
7416 // The shift amount must be scalar.
7417 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7418 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7419 B.buildShl(Dest, Inner, ShAmtC, Flags);
7420 };
7421 return true;
7422 }
7423
7424 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7425 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7426 MatchInfo = [=](MachineIRBuilder &B) {
7427 B.setInstrAndDebugLoc(*Select);
7428 Register Not = MRI.createGenericVirtualRegister(CondTy);
7429 B.buildNot(Not, Cond);
7430 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7431 B.buildZExtOrTrunc(Inner, Not);
7432 // The shift amount must be scalar.
7433 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7434 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7435 B.buildShl(Dest, Inner, ShAmtC, Flags);
7436 };
7437 return true;
7438 }
7439
7440 // select Cond, -1, C --> or (sext Cond), C
7441 if (TrueValue.isAllOnes()) {
7442 MatchInfo = [=](MachineIRBuilder &B) {
7443 B.setInstrAndDebugLoc(*Select);
7444 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7445 B.buildSExtOrTrunc(Inner, Cond);
7446 B.buildOr(Dest, Inner, False, Flags);
7447 };
7448 return true;
7449 }
7450
7451 // select Cond, C, -1 --> or (sext (not Cond)), C
7452 if (FalseValue.isAllOnes()) {
7453 MatchInfo = [=](MachineIRBuilder &B) {
7454 B.setInstrAndDebugLoc(*Select);
7455 Register Not = MRI.createGenericVirtualRegister(CondTy);
7456 B.buildNot(Not, Cond);
7457 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7458 B.buildSExtOrTrunc(Inner, Not);
7459 B.buildOr(Dest, Inner, True, Flags);
7460 };
7461 return true;
7462 }
7463
7464 return false;
7465}
7466
7467// TODO: use knownbits to determine zeros
7468bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7469 BuildFnTy &MatchInfo) const {
7470 uint32_t Flags = Select->getFlags();
7471 Register DstReg = Select->getReg(0);
7472 Register Cond = Select->getCondReg();
7473 Register True = Select->getTrueReg();
7474 Register False = Select->getFalseReg();
7475 LLT CondTy = MRI.getType(Select->getCondReg());
7476 LLT TrueTy = MRI.getType(Select->getTrueReg());
7477
7478 // Boolean or fixed vector of booleans.
7479 if (CondTy.isScalableVector() ||
7480 (CondTy.isFixedVector() &&
7481 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7482 CondTy.getScalarSizeInBits() != 1)
7483 return false;
7484
7485 if (CondTy != TrueTy)
7486 return false;
7487
7488 // select Cond, Cond, F --> or Cond, F
7489 // select Cond, 1, F --> or Cond, F
7490 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7491 MatchInfo = [=](MachineIRBuilder &B) {
7492 B.setInstrAndDebugLoc(*Select);
7493 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7494 B.buildZExtOrTrunc(Ext, Cond);
7495 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7496 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7497 };
7498 return true;
7499 }
7500
7501 // select Cond, T, Cond --> and Cond, T
7502 // select Cond, T, 0 --> and Cond, T
7503 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7504 MatchInfo = [=](MachineIRBuilder &B) {
7505 B.setInstrAndDebugLoc(*Select);
7506 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7507 B.buildZExtOrTrunc(Ext, Cond);
7508 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7509 B.buildAnd(DstReg, Ext, FreezeTrue);
7510 };
7511 return true;
7512 }
7513
7514 // select Cond, T, 1 --> or (not Cond), T
7515 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7516 MatchInfo = [=](MachineIRBuilder &B) {
7517 B.setInstrAndDebugLoc(*Select);
7518 // First the not.
7519 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7520 B.buildNot(Inner, Cond);
7521 // Then an ext to match the destination register.
7522 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7523 B.buildZExtOrTrunc(Ext, Inner);
7524 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7525 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7526 };
7527 return true;
7528 }
7529
7530 // select Cond, 0, F --> and (not Cond), F
7531 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7532 MatchInfo = [=](MachineIRBuilder &B) {
7533 B.setInstrAndDebugLoc(*Select);
7534 // First the not.
7535 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7536 B.buildNot(Inner, Cond);
7537 // Then an ext to match the destination register.
7538 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7539 B.buildZExtOrTrunc(Ext, Inner);
7540 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7541 B.buildAnd(DstReg, Ext, FreezeFalse);
7542 };
7543 return true;
7544 }
7545
7546 return false;
7547}
7548
7550 BuildFnTy &MatchInfo) const {
7551 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7552 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7553
7554 Register DstReg = Select->getReg(0);
7555 Register True = Select->getTrueReg();
7556 Register False = Select->getFalseReg();
7557 LLT DstTy = MRI.getType(DstReg);
7558
7559 if (DstTy.isPointer())
7560 return false;
7561
7562 // We want to fold the icmp and replace the select.
7563 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7564 return false;
7565
7566 CmpInst::Predicate Pred = Cmp->getCond();
7567 // We need a larger or smaller predicate for
7568 // canonicalization.
7569 if (CmpInst::isEquality(Pred))
7570 return false;
7571
7572 Register CmpLHS = Cmp->getLHSReg();
7573 Register CmpRHS = Cmp->getRHSReg();
7574
7575 // We can swap CmpLHS and CmpRHS for higher hitrate.
7576 if (True == CmpRHS && False == CmpLHS) {
7577 std::swap(CmpLHS, CmpRHS);
7578 Pred = CmpInst::getSwappedPredicate(Pred);
7579 }
7580
7581 // (icmp X, Y) ? X : Y -> integer minmax.
7582 // see matchSelectPattern in ValueTracking.
7583 // Legality between G_SELECT and integer minmax can differ.
7584 if (True != CmpLHS || False != CmpRHS)
7585 return false;
7586
7587 switch (Pred) {
7588 case ICmpInst::ICMP_UGT:
7589 case ICmpInst::ICMP_UGE: {
7590 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7591 return false;
7592 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7593 return true;
7594 }
7595 case ICmpInst::ICMP_SGT:
7596 case ICmpInst::ICMP_SGE: {
7597 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7598 return false;
7599 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7600 return true;
7601 }
7602 case ICmpInst::ICMP_ULT:
7603 case ICmpInst::ICMP_ULE: {
7604 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7605 return false;
7606 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7607 return true;
7608 }
7609 case ICmpInst::ICMP_SLT:
7610 case ICmpInst::ICMP_SLE: {
7611 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7612 return false;
7613 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7614 return true;
7615 }
7616 default:
7617 return false;
7618 }
7619}
7620
7621// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7623 BuildFnTy &MatchInfo) const {
7624 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7625 Register DestReg = MI.getOperand(0).getReg();
7626 LLT DestTy = MRI.getType(DestReg);
7627
7628 Register X;
7629 Register Sub0;
7630 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7631 if (mi_match(DestReg, MRI,
7632 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7633 m_GSMax(m_Reg(X), NegPattern),
7634 m_GUMin(m_Reg(X), NegPattern),
7635 m_GUMax(m_Reg(X), NegPattern)))))) {
7636 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7637 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7638 if (isLegal({NewOpc, {DestTy}})) {
7639 MatchInfo = [=](MachineIRBuilder &B) {
7640 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7641 };
7642 return true;
7643 }
7644 }
7645
7646 return false;
7647}
7648
7651
7652 if (tryFoldSelectOfConstants(Select, MatchInfo))
7653 return true;
7654
7655 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7656 return true;
7657
7658 return false;
7659}
7660
7661/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7662/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7663/// into a single comparison using range-based reasoning.
7664/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7665bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7666 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7667 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7668 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7669 Register DstReg = Logic->getReg(0);
7670 Register LHS = Logic->getLHSReg();
7671 Register RHS = Logic->getRHSReg();
7672 unsigned Flags = Logic->getFlags();
7673
7674 // We need an G_ICMP on the LHS register.
7675 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7676 if (!Cmp1)
7677 return false;
7678
7679 // We need an G_ICMP on the RHS register.
7680 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7681 if (!Cmp2)
7682 return false;
7683
7684 // We want to fold the icmps.
7685 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7686 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7687 return false;
7688
7689 APInt C1;
7690 APInt C2;
7691 std::optional<ValueAndVReg> MaybeC1 =
7693 if (!MaybeC1)
7694 return false;
7695 C1 = MaybeC1->Value;
7696
7697 std::optional<ValueAndVReg> MaybeC2 =
7699 if (!MaybeC2)
7700 return false;
7701 C2 = MaybeC2->Value;
7702
7703 Register R1 = Cmp1->getLHSReg();
7704 Register R2 = Cmp2->getLHSReg();
7705 CmpInst::Predicate Pred1 = Cmp1->getCond();
7706 CmpInst::Predicate Pred2 = Cmp2->getCond();
7707 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7708 LLT CmpOperandTy = MRI.getType(R1);
7709
7710 if (CmpOperandTy.isPointer())
7711 return false;
7712
7713 // We build ands, adds, and constants of type CmpOperandTy.
7714 // They must be legal to build.
7715 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7716 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7717 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7718 return false;
7719
7720 // Look through add of a constant offset on R1, R2, or both operands. This
7721 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7722 std::optional<APInt> Offset1;
7723 std::optional<APInt> Offset2;
7724 if (R1 != R2) {
7725 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7726 std::optional<ValueAndVReg> MaybeOffset1 =
7728 if (MaybeOffset1) {
7729 R1 = Add->getLHSReg();
7730 Offset1 = MaybeOffset1->Value;
7731 }
7732 }
7733 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7734 std::optional<ValueAndVReg> MaybeOffset2 =
7736 if (MaybeOffset2) {
7737 R2 = Add->getLHSReg();
7738 Offset2 = MaybeOffset2->Value;
7739 }
7740 }
7741 }
7742
7743 if (R1 != R2)
7744 return false;
7745
7746 // We calculate the icmp ranges including maybe offsets.
7747 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7748 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7749 if (Offset1)
7750 CR1 = CR1.subtract(*Offset1);
7751
7752 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7753 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7754 if (Offset2)
7755 CR2 = CR2.subtract(*Offset2);
7756
7757 bool CreateMask = false;
7758 APInt LowerDiff;
7759 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7760 if (!CR) {
7761 // We need non-wrapping ranges.
7762 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7763 return false;
7764
7765 // Check whether we have equal-size ranges that only differ by one bit.
7766 // In that case we can apply a mask to map one range onto the other.
7767 LowerDiff = CR1.getLower() ^ CR2.getLower();
7768 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7769 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7770 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7771 CR1Size != CR2.getUpper() - CR2.getLower())
7772 return false;
7773
7774 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7775 CreateMask = true;
7776 }
7777
7778 if (IsAnd)
7779 CR = CR->inverse();
7780
7781 CmpInst::Predicate NewPred;
7782 APInt NewC, Offset;
7783 CR->getEquivalentICmp(NewPred, NewC, Offset);
7784
7785 // We take the result type of one of the original icmps, CmpTy, for
7786 // the to be build icmp. The operand type, CmpOperandTy, is used for
7787 // the other instructions and constants to be build. The types of
7788 // the parameters and output are the same for add and and. CmpTy
7789 // and the type of DstReg might differ. That is why we zext or trunc
7790 // the icmp into the destination register.
7791
7792 MatchInfo = [=](MachineIRBuilder &B) {
7793 if (CreateMask && Offset != 0) {
7794 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7795 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7796 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7797 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7798 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7799 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7800 B.buildZExtOrTrunc(DstReg, ICmp);
7801 } else if (CreateMask && Offset == 0) {
7802 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7803 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7804 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7805 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7806 B.buildZExtOrTrunc(DstReg, ICmp);
7807 } else if (!CreateMask && Offset != 0) {
7808 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7809 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7810 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7811 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7812 B.buildZExtOrTrunc(DstReg, ICmp);
7813 } else if (!CreateMask && Offset == 0) {
7814 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7815 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7816 B.buildZExtOrTrunc(DstReg, ICmp);
7817 } else {
7818 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7819 }
7820 };
7821 return true;
7822}
7823
7824bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7825 BuildFnTy &MatchInfo) const {
7826 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7827 Register DestReg = Logic->getReg(0);
7828 Register LHS = Logic->getLHSReg();
7829 Register RHS = Logic->getRHSReg();
7830 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7831
7832 // We need a compare on the LHS register.
7833 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7834 if (!Cmp1)
7835 return false;
7836
7837 // We need a compare on the RHS register.
7838 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7839 if (!Cmp2)
7840 return false;
7841
7842 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7843 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7844
7845 // We build one fcmp, want to fold the fcmps, replace the logic op,
7846 // and the fcmps must have the same shape.
7848 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7849 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7850 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7851 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7852 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7853 return false;
7854
7855 CmpInst::Predicate PredL = Cmp1->getCond();
7856 CmpInst::Predicate PredR = Cmp2->getCond();
7857 Register LHS0 = Cmp1->getLHSReg();
7858 Register LHS1 = Cmp1->getRHSReg();
7859 Register RHS0 = Cmp2->getLHSReg();
7860 Register RHS1 = Cmp2->getRHSReg();
7861
7862 if (LHS0 == RHS1 && LHS1 == RHS0) {
7863 // Swap RHS operands to match LHS.
7864 PredR = CmpInst::getSwappedPredicate(PredR);
7865 std::swap(RHS0, RHS1);
7866 }
7867
7868 if (LHS0 == RHS0 && LHS1 == RHS1) {
7869 // We determine the new predicate.
7870 unsigned CmpCodeL = getFCmpCode(PredL);
7871 unsigned CmpCodeR = getFCmpCode(PredR);
7872 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7873 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7874 MatchInfo = [=](MachineIRBuilder &B) {
7875 // The fcmp predicates fill the lower part of the enum.
7876 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7877 if (Pred == FCmpInst::FCMP_FALSE &&
7879 auto False = B.buildConstant(CmpTy, 0);
7880 B.buildZExtOrTrunc(DestReg, False);
7881 } else if (Pred == FCmpInst::FCMP_TRUE &&
7883 auto True =
7884 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7885 CmpTy.isVector() /*isVector*/,
7886 true /*isFP*/));
7887 B.buildZExtOrTrunc(DestReg, True);
7888 } else { // We take the predicate without predicate optimizations.
7889 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7890 B.buildZExtOrTrunc(DestReg, Cmp);
7891 }
7892 };
7893 return true;
7894 }
7895
7896 return false;
7897}
7898
7900 GAnd *And = cast<GAnd>(&MI);
7901
7902 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7903 return true;
7904
7905 if (tryFoldLogicOfFCmps(And, MatchInfo))
7906 return true;
7907
7908 return false;
7909}
7910
7912 GOr *Or = cast<GOr>(&MI);
7913
7914 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7915 return true;
7916
7917 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7918 return true;
7919
7920 return false;
7921}
7922
7924 BuildFnTy &MatchInfo) const {
7926
7927 // Addo has no flags
7928 Register Dst = Add->getReg(0);
7929 Register Carry = Add->getReg(1);
7930 Register LHS = Add->getLHSReg();
7931 Register RHS = Add->getRHSReg();
7932 bool IsSigned = Add->isSigned();
7933 LLT DstTy = MRI.getType(Dst);
7934 LLT CarryTy = MRI.getType(Carry);
7935
7936 // Fold addo, if the carry is dead -> add, undef.
7937 if (MRI.use_nodbg_empty(Carry) &&
7938 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7939 MatchInfo = [=](MachineIRBuilder &B) {
7940 B.buildAdd(Dst, LHS, RHS);
7941 B.buildUndef(Carry);
7942 };
7943 return true;
7944 }
7945
7946 // Canonicalize constant to RHS.
7947 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7948 if (IsSigned) {
7949 MatchInfo = [=](MachineIRBuilder &B) {
7950 B.buildSAddo(Dst, Carry, RHS, LHS);
7951 };
7952 return true;
7953 }
7954 // !IsSigned
7955 MatchInfo = [=](MachineIRBuilder &B) {
7956 B.buildUAddo(Dst, Carry, RHS, LHS);
7957 };
7958 return true;
7959 }
7960
7961 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7962 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7963
7964 // Fold addo(c1, c2) -> c3, carry.
7965 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7967 bool Overflow;
7968 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7969 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7970 MatchInfo = [=](MachineIRBuilder &B) {
7971 B.buildConstant(Dst, Result);
7972 B.buildConstant(Carry, Overflow);
7973 };
7974 return true;
7975 }
7976
7977 // Fold (addo x, 0) -> x, no carry
7978 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7979 MatchInfo = [=](MachineIRBuilder &B) {
7980 B.buildCopy(Dst, LHS);
7981 B.buildConstant(Carry, 0);
7982 };
7983 return true;
7984 }
7985
7986 // Given 2 constant operands whose sum does not overflow:
7987 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7988 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7989 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7990 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7991 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7992 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7993 std::optional<APInt> MaybeAddRHS =
7994 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7995 if (MaybeAddRHS) {
7996 bool Overflow;
7997 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7998 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7999 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8000 if (IsSigned) {
8001 MatchInfo = [=](MachineIRBuilder &B) {
8002 auto ConstRHS = B.buildConstant(DstTy, NewC);
8003 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8004 };
8005 return true;
8006 }
8007 // !IsSigned
8008 MatchInfo = [=](MachineIRBuilder &B) {
8009 auto ConstRHS = B.buildConstant(DstTy, NewC);
8010 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8011 };
8012 return true;
8013 }
8014 }
8015 };
8016
8017 // We try to combine addo to non-overflowing add.
8018 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8020 return false;
8021
8022 // We try to combine uaddo to non-overflowing add.
8023 if (!IsSigned) {
8024 ConstantRange CRLHS =
8025 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8026 ConstantRange CRRHS =
8027 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8028
8029 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8031 return false;
8033 MatchInfo = [=](MachineIRBuilder &B) {
8034 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8035 B.buildConstant(Carry, 0);
8036 };
8037 return true;
8038 }
8041 MatchInfo = [=](MachineIRBuilder &B) {
8042 B.buildAdd(Dst, LHS, RHS);
8043 B.buildConstant(Carry, 1);
8044 };
8045 return true;
8046 }
8047 }
8048 return false;
8049 }
8050
8051 // We try to combine saddo to non-overflowing add.
8052
8053 // If LHS and RHS each have at least two sign bits, then there is no signed
8054 // overflow.
8055 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8056 MatchInfo = [=](MachineIRBuilder &B) {
8057 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8058 B.buildConstant(Carry, 0);
8059 };
8060 return true;
8061 }
8062
8063 ConstantRange CRLHS =
8064 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8065 ConstantRange CRRHS =
8066 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8067
8068 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8070 return false;
8072 MatchInfo = [=](MachineIRBuilder &B) {
8073 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8074 B.buildConstant(Carry, 0);
8075 };
8076 return true;
8077 }
8080 MatchInfo = [=](MachineIRBuilder &B) {
8081 B.buildAdd(Dst, LHS, RHS);
8082 B.buildConstant(Carry, 1);
8083 };
8084 return true;
8085 }
8086 }
8087
8088 return false;
8089}
8090
8092 BuildFnTy &MatchInfo) const {
8094 MatchInfo(Builder);
8095 Root->eraseFromParent();
8096}
8097
8099 int64_t Exponent) const {
8100 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8102}
8103
8105 int64_t Exponent) const {
8106 auto [Dst, Base] = MI.getFirst2Regs();
8107 LLT Ty = MRI.getType(Dst);
8108 int64_t ExpVal = Exponent;
8109
8110 if (ExpVal == 0) {
8111 Builder.buildFConstant(Dst, 1.0);
8112 MI.removeFromParent();
8113 return;
8114 }
8115
8116 if (ExpVal < 0)
8117 ExpVal = -ExpVal;
8118
8119 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8120 // to generate the multiply sequence. There are more optimal ways to do this
8121 // (for example, powi(x,15) generates one more multiply than it should), but
8122 // this has the benefit of being both really simple and much better than a
8123 // libcall.
8124 std::optional<SrcOp> Res;
8125 SrcOp CurSquare = Base;
8126 while (ExpVal > 0) {
8127 if (ExpVal & 1) {
8128 if (!Res)
8129 Res = CurSquare;
8130 else
8131 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8132 }
8133
8134 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8135 ExpVal >>= 1;
8136 }
8137
8138 // If the original exponent was negative, invert the result, producing
8139 // 1/(x*x*x).
8140 if (Exponent < 0)
8141 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8142 MI.getFlags());
8143
8144 Builder.buildCopy(Dst, *Res);
8145 MI.eraseFromParent();
8146}
8147
8149 BuildFnTy &MatchInfo) const {
8150 // fold (A+C1)-C2 -> A+(C1-C2)
8151 const GSub *Sub = cast<GSub>(&MI);
8152 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8153
8154 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8155 return false;
8156
8157 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8158 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8159
8160 Register Dst = Sub->getReg(0);
8161 LLT DstTy = MRI.getType(Dst);
8162
8163 MatchInfo = [=](MachineIRBuilder &B) {
8164 auto Const = B.buildConstant(DstTy, C1 - C2);
8165 B.buildAdd(Dst, Add->getLHSReg(), Const);
8166 };
8167
8168 return true;
8169}
8170
8172 BuildFnTy &MatchInfo) const {
8173 // fold C2-(A+C1) -> (C2-C1)-A
8174 const GSub *Sub = cast<GSub>(&MI);
8175 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8176
8177 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8178 return false;
8179
8180 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8181 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8182
8183 Register Dst = Sub->getReg(0);
8184 LLT DstTy = MRI.getType(Dst);
8185
8186 MatchInfo = [=](MachineIRBuilder &B) {
8187 auto Const = B.buildConstant(DstTy, C2 - C1);
8188 B.buildSub(Dst, Const, Add->getLHSReg());
8189 };
8190
8191 return true;
8192}
8193
8195 BuildFnTy &MatchInfo) const {
8196 // fold (A-C1)-C2 -> A-(C1+C2)
8197 const GSub *Sub1 = cast<GSub>(&MI);
8198 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8199
8200 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8201 return false;
8202
8203 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8204 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8205
8206 Register Dst = Sub1->getReg(0);
8207 LLT DstTy = MRI.getType(Dst);
8208
8209 MatchInfo = [=](MachineIRBuilder &B) {
8210 auto Const = B.buildConstant(DstTy, C1 + C2);
8211 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8212 };
8213
8214 return true;
8215}
8216
8218 BuildFnTy &MatchInfo) const {
8219 // fold (C1-A)-C2 -> (C1-C2)-A
8220 const GSub *Sub1 = cast<GSub>(&MI);
8221 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8222
8223 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8224 return false;
8225
8226 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8227 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8228
8229 Register Dst = Sub1->getReg(0);
8230 LLT DstTy = MRI.getType(Dst);
8231
8232 MatchInfo = [=](MachineIRBuilder &B) {
8233 auto Const = B.buildConstant(DstTy, C1 - C2);
8234 B.buildSub(Dst, Const, Sub2->getRHSReg());
8235 };
8236
8237 return true;
8238}
8239
8241 BuildFnTy &MatchInfo) const {
8242 // fold ((A-C1)+C2) -> (A+(C2-C1))
8243 const GAdd *Add = cast<GAdd>(&MI);
8244 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8245
8246 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8247 return false;
8248
8249 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8250 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8251
8252 Register Dst = Add->getReg(0);
8253 LLT DstTy = MRI.getType(Dst);
8254
8255 MatchInfo = [=](MachineIRBuilder &B) {
8256 auto Const = B.buildConstant(DstTy, C2 - C1);
8257 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8258 };
8259
8260 return true;
8261}
8262
8264 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8265 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8266
8267 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8268 return false;
8269
8270 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8271
8272 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8273
8274 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8275 // $any:_(<8 x s16>) = G_ANYEXT $bv
8276 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8277 //
8278 // ->
8279 //
8280 // $any:_(s16) = G_ANYEXT $bv[0]
8281 // $any1:_(s16) = G_ANYEXT $bv[1]
8282 // $any2:_(s16) = G_ANYEXT $bv[2]
8283 // $any3:_(s16) = G_ANYEXT $bv[3]
8284 // $any4:_(s16) = G_ANYEXT $bv[4]
8285 // $any5:_(s16) = G_ANYEXT $bv[5]
8286 // $any6:_(s16) = G_ANYEXT $bv[6]
8287 // $any7:_(s16) = G_ANYEXT $bv[7]
8288 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8289 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8290
8291 // We want to unmerge into vectors.
8292 if (!DstTy.isFixedVector())
8293 return false;
8294
8295 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8296 if (!Any)
8297 return false;
8298
8299 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8300
8301 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8302 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8303
8304 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8305 return false;
8306
8307 // FIXME: check element types?
8308 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8309 return false;
8310
8311 LLT BigBvTy = MRI.getType(BV->getReg(0));
8312 LLT SmallBvTy = DstTy;
8313 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8314
8316 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8317 return false;
8318
8319 // We check the legality of scalar anyext.
8321 {TargetOpcode::G_ANYEXT,
8322 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8323 return false;
8324
8325 MatchInfo = [=](MachineIRBuilder &B) {
8326 // Build into each G_UNMERGE_VALUES def
8327 // a small build vector with anyext from the source build vector.
8328 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8330 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8331 Register SourceArray =
8332 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8333 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8334 Ops.push_back(AnyExt.getReg(0));
8335 }
8336 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8337 };
8338 };
8339 return true;
8340 };
8341
8342 return false;
8343}
8344
8346 BuildFnTy &MatchInfo) const {
8347
8348 bool Changed = false;
8349 auto &Shuffle = cast<GShuffleVector>(MI);
8350 ArrayRef<int> OrigMask = Shuffle.getMask();
8351 SmallVector<int, 16> NewMask;
8352 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8353 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8354 const unsigned NumDstElts = OrigMask.size();
8355 for (unsigned i = 0; i != NumDstElts; ++i) {
8356 int Idx = OrigMask[i];
8357 if (Idx >= (int)NumSrcElems) {
8358 Idx = -1;
8359 Changed = true;
8360 }
8361 NewMask.push_back(Idx);
8362 }
8363
8364 if (!Changed)
8365 return false;
8366
8367 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8368 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8369 std::move(NewMask));
8370 };
8371
8372 return true;
8373}
8374
8375static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8376 const unsigned MaskSize = Mask.size();
8377 for (unsigned I = 0; I < MaskSize; ++I) {
8378 int Idx = Mask[I];
8379 if (Idx < 0)
8380 continue;
8381
8382 if (Idx < (int)NumElems)
8383 Mask[I] = Idx + NumElems;
8384 else
8385 Mask[I] = Idx - NumElems;
8386 }
8387}
8388
8390 BuildFnTy &MatchInfo) const {
8391
8392 auto &Shuffle = cast<GShuffleVector>(MI);
8393 // If any of the two inputs is already undef, don't check the mask again to
8394 // prevent infinite loop
8395 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8396 return false;
8397
8398 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8399 return false;
8400
8401 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8402 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8404 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8405 return false;
8406
8407 ArrayRef<int> Mask = Shuffle.getMask();
8408 const unsigned NumSrcElems = Src1Ty.getNumElements();
8409
8410 bool TouchesSrc1 = false;
8411 bool TouchesSrc2 = false;
8412 const unsigned NumElems = Mask.size();
8413 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8414 if (Mask[Idx] < 0)
8415 continue;
8416
8417 if (Mask[Idx] < (int)NumSrcElems)
8418 TouchesSrc1 = true;
8419 else
8420 TouchesSrc2 = true;
8421 }
8422
8423 if (TouchesSrc1 == TouchesSrc2)
8424 return false;
8425
8426 Register NewSrc1 = Shuffle.getSrc1Reg();
8427 SmallVector<int, 16> NewMask(Mask);
8428 if (TouchesSrc2) {
8429 NewSrc1 = Shuffle.getSrc2Reg();
8430 commuteMask(NewMask, NumSrcElems);
8431 }
8432
8433 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8434 auto Undef = B.buildUndef(Src1Ty);
8435 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8436 };
8437
8438 return true;
8439}
8440
8442 BuildFnTy &MatchInfo) const {
8443 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8444
8445 Register Dst = Subo->getReg(0);
8446 Register LHS = Subo->getLHSReg();
8447 Register RHS = Subo->getRHSReg();
8448 Register Carry = Subo->getCarryOutReg();
8449 LLT DstTy = MRI.getType(Dst);
8450 LLT CarryTy = MRI.getType(Carry);
8451
8452 // Check legality before known bits.
8453 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8455 return false;
8456
8457 ConstantRange KBLHS =
8458 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8459 /* IsSigned=*/Subo->isSigned());
8460 ConstantRange KBRHS =
8461 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8462 /* IsSigned=*/Subo->isSigned());
8463
8464 if (Subo->isSigned()) {
8465 // G_SSUBO
8466 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8468 return false;
8470 MatchInfo = [=](MachineIRBuilder &B) {
8471 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8472 B.buildConstant(Carry, 0);
8473 };
8474 return true;
8475 }
8478 MatchInfo = [=](MachineIRBuilder &B) {
8479 B.buildSub(Dst, LHS, RHS);
8480 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8481 /*isVector=*/CarryTy.isVector(),
8482 /*isFP=*/false));
8483 };
8484 return true;
8485 }
8486 }
8487 return false;
8488 }
8489
8490 // G_USUBO
8491 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8493 return false;
8495 MatchInfo = [=](MachineIRBuilder &B) {
8496 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8497 B.buildConstant(Carry, 0);
8498 };
8499 return true;
8500 }
8503 MatchInfo = [=](MachineIRBuilder &B) {
8504 B.buildSub(Dst, LHS, RHS);
8505 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8506 /*isVector=*/CarryTy.isVector(),
8507 /*isFP=*/false));
8508 };
8509 return true;
8510 }
8511 }
8512
8513 return false;
8514}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
bool isNaN() const
Definition APFloat.h:1429
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1217
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1784
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:835
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1041
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1274
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is zero.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValue() const
Definition Constants.h:321
const APFloat & getValueAPF() const
Definition Constants.h:320
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isBigEndian() const
Definition DataLayout.h:208
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:101
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:260
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:337
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1481
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2034
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:651
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:459
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1441
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1606
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:739
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1564
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1588
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1621
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1653
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:670
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1544
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:201
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1474
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:965
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1577
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1678
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:467
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:499
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1459
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:234
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:251
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...