LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 TII(Builder.getMF().getSubtarget().getInstrInfo()),
66 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
67 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
68 (void)this->VT;
69}
70
72 return *Builder.getMF().getSubtarget().getTargetLowering();
73}
74
76 return Builder.getMF();
77}
78
82
83LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
84
85/// \returns The little endian in-memory byte position of byte \p I in a
86/// \p ByteWidth bytes wide type.
87///
88/// E.g. Given a 4-byte type x, x[0] -> byte 0
89static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
90 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
91 return I;
92}
93
94/// Determines the LogBase2 value for a non-null input value using the
95/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
97 auto &MRI = *MIB.getMRI();
98 LLT Ty = MRI.getType(V);
99 auto Ctlz = MIB.buildCTLZ(Ty, V);
100 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
101 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
102}
103
104/// \returns The big endian in-memory byte position of byte \p I in a
105/// \p ByteWidth bytes wide type.
106///
107/// E.g. Given a 4-byte type x, x[0] -> byte 3
108static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
109 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
110 return ByteWidth - I - 1;
111}
112
113/// Given a map from byte offsets in memory to indices in a load/store,
114/// determine if that map corresponds to a little or big endian byte pattern.
115///
116/// \param MemOffset2Idx maps memory offsets to address offsets.
117/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
118///
119/// \returns true if the map corresponds to a big endian byte pattern, false if
120/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
121///
122/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
123/// are as follows:
124///
125/// AddrOffset Little endian Big endian
126/// 0 0 3
127/// 1 1 2
128/// 2 2 1
129/// 3 3 0
130static std::optional<bool>
132 int64_t LowestIdx) {
133 // Need at least two byte positions to decide on endianness.
134 unsigned Width = MemOffset2Idx.size();
135 if (Width < 2)
136 return std::nullopt;
137 bool BigEndian = true, LittleEndian = true;
138 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
139 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
140 if (MemOffsetAndIdx == MemOffset2Idx.end())
141 return std::nullopt;
142 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
143 assert(Idx >= 0 && "Expected non-negative byte offset?");
144 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
145 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
146 if (!BigEndian && !LittleEndian)
147 return std::nullopt;
148 }
149
150 assert((BigEndian != LittleEndian) &&
151 "Pattern cannot be both big and little endian!");
152 return BigEndian;
153}
154
156
157bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
158 assert(LI && "Must have LegalizerInfo to query isLegal!");
159 return LI->getAction(Query).Action == LegalizeActions::Legal;
160}
161
163 const LegalityQuery &Query) const {
164 return isPreLegalize() || isLegal(Query);
165}
166
168 return isLegal(Query) ||
169 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
170}
171
173 const LegalityQuery &Query) const {
174 LegalizeAction Action = LI->getAction(Query).Action;
175 return Action == LegalizeActions::Legal ||
177}
178
180 if (!Ty.isVector())
181 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
182 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
183 if (isPreLegalize())
184 return true;
185 LLT EltTy = Ty.getElementType();
186 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
187 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
188}
189
191 Register ToReg) const {
192 Observer.changingAllUsesOfReg(MRI, FromReg);
193
194 if (MRI.constrainRegAttrs(ToReg, FromReg))
195 MRI.replaceRegWith(FromReg, ToReg);
196 else
197 Builder.buildCopy(FromReg, ToReg);
198
199 Observer.finishedChangingAllUsesOfReg();
200}
201
203 MachineOperand &FromRegOp,
204 Register ToReg) const {
205 assert(FromRegOp.getParent() && "Expected an operand in an MI");
206 Observer.changingInstr(*FromRegOp.getParent());
207
208 FromRegOp.setReg(ToReg);
209
210 Observer.changedInstr(*FromRegOp.getParent());
211}
212
214 unsigned ToOpcode) const {
215 Observer.changingInstr(FromMI);
216
217 FromMI.setDesc(Builder.getTII().get(ToOpcode));
218
219 Observer.changedInstr(FromMI);
220}
221
223 return RBI->getRegBank(Reg, MRI, *TRI);
224}
225
227 const RegisterBank *RegBank) const {
228 if (RegBank)
229 MRI.setRegBank(Reg, *RegBank);
230}
231
233 if (matchCombineCopy(MI)) {
235 return true;
236 }
237 return false;
238}
240 if (MI.getOpcode() != TargetOpcode::COPY)
241 return false;
242 Register DstReg = MI.getOperand(0).getReg();
243 Register SrcReg = MI.getOperand(1).getReg();
244 return canReplaceReg(DstReg, SrcReg, MRI);
245}
247 Register DstReg = MI.getOperand(0).getReg();
248 Register SrcReg = MI.getOperand(1).getReg();
249 replaceRegWith(MRI, DstReg, SrcReg);
250 MI.eraseFromParent();
251}
252
254 MachineInstr &MI, BuildFnTy &MatchInfo) const {
255 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
256 Register DstOp = MI.getOperand(0).getReg();
257 Register OrigOp = MI.getOperand(1).getReg();
258
259 if (!MRI.hasOneNonDBGUse(OrigOp))
260 return false;
261
262 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
263 // Even if only a single operand of the PHI is not guaranteed non-poison,
264 // moving freeze() backwards across a PHI can cause optimization issues for
265 // other users of that operand.
266 //
267 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
268 // the source register is unprofitable because it makes the freeze() more
269 // strict than is necessary (it would affect the whole register instead of
270 // just the subreg being frozen).
271 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
272 return false;
273
274 if (canCreateUndefOrPoison(OrigOp, MRI,
275 /*ConsiderFlagsAndMetadata=*/false))
276 return false;
277
278 std::optional<MachineOperand> MaybePoisonOperand;
279 for (MachineOperand &Operand : OrigDef->uses()) {
280 if (!Operand.isReg())
281 return false;
282
283 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
284 continue;
285
286 if (!MaybePoisonOperand)
287 MaybePoisonOperand = Operand;
288 else {
289 // We have more than one maybe-poison operand. Moving the freeze is
290 // unsafe.
291 return false;
292 }
293 }
294
295 // Eliminate freeze if all operands are guaranteed non-poison.
296 if (!MaybePoisonOperand) {
297 MatchInfo = [=](MachineIRBuilder &B) {
298 Observer.changingInstr(*OrigDef);
299 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
300 Observer.changedInstr(*OrigDef);
301 B.buildCopy(DstOp, OrigOp);
302 };
303 return true;
304 }
305
306 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
307 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
308
309 MatchInfo = [=](MachineIRBuilder &B) mutable {
310 Observer.changingInstr(*OrigDef);
311 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
312 Observer.changedInstr(*OrigDef);
313 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
314 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
316 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
317 Freeze.getReg(0));
318 replaceRegWith(MRI, DstOp, OrigOp);
319 };
320 return true;
321}
322
325 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
326 "Invalid instruction");
327 bool IsUndef = true;
328 MachineInstr *Undef = nullptr;
329
330 // Walk over all the operands of concat vectors and check if they are
331 // build_vector themselves or undef.
332 // Then collect their operands in Ops.
333 for (const MachineOperand &MO : MI.uses()) {
334 Register Reg = MO.getReg();
335 MachineInstr *Def = MRI.getVRegDef(Reg);
336 assert(Def && "Operand not defined");
337 if (!MRI.hasOneNonDBGUse(Reg))
338 return false;
339 switch (Def->getOpcode()) {
340 case TargetOpcode::G_BUILD_VECTOR:
341 IsUndef = false;
342 // Remember the operands of the build_vector to fold
343 // them into the yet-to-build flattened concat vectors.
344 for (const MachineOperand &BuildVecMO : Def->uses())
345 Ops.push_back(BuildVecMO.getReg());
346 break;
347 case TargetOpcode::G_IMPLICIT_DEF: {
348 LLT OpType = MRI.getType(Reg);
349 // Keep one undef value for all the undef operands.
350 if (!Undef) {
351 Builder.setInsertPt(*MI.getParent(), MI);
352 Undef = Builder.buildUndef(OpType.getScalarType());
353 }
354 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
355 OpType.getScalarType() &&
356 "All undefs should have the same type");
357 // Break the undef vector in as many scalar elements as needed
358 // for the flattening.
359 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
360 EltIdx != EltEnd; ++EltIdx)
361 Ops.push_back(Undef->getOperand(0).getReg());
362 break;
363 }
364 default:
365 return false;
366 }
367 }
368
369 // Check if the combine is illegal
370 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
372 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
373 return false;
374 }
375
376 if (IsUndef)
377 Ops.clear();
378
379 return true;
380}
383 // We determined that the concat_vectors can be flatten.
384 // Generate the flattened build_vector.
385 Register DstReg = MI.getOperand(0).getReg();
386 Builder.setInsertPt(*MI.getParent(), MI);
387 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
388
389 // Note: IsUndef is sort of redundant. We could have determine it by
390 // checking that at all Ops are undef. Alternatively, we could have
391 // generate a build_vector of undefs and rely on another combine to
392 // clean that up. For now, given we already gather this information
393 // in matchCombineConcatVectors, just save compile time and issue the
394 // right thing.
395 if (Ops.empty())
396 Builder.buildUndef(NewDstReg);
397 else
398 Builder.buildBuildVector(NewDstReg, Ops);
399 replaceRegWith(MRI, DstReg, NewDstReg);
400 MI.eraseFromParent();
401}
402
404 auto &Shuffle = cast<GShuffleVector>(MI);
405
406 Register SrcVec1 = Shuffle.getSrc1Reg();
407 Register SrcVec2 = Shuffle.getSrc2Reg();
408 LLT EltTy = MRI.getType(SrcVec1).getElementType();
409 int Width = MRI.getType(SrcVec1).getNumElements();
410
411 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
412 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
413
414 SmallVector<Register> Extracts;
415 // Select only applicable elements from unmerged values.
416 for (int Val : Shuffle.getMask()) {
417 if (Val == -1)
418 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
419 else if (Val < Width)
420 Extracts.push_back(Unmerge1.getReg(Val));
421 else
422 Extracts.push_back(Unmerge2.getReg(Val - Width));
423 }
424 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
425 if (Extracts.size() == 1)
426 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
427 else
428 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
429 MI.eraseFromParent();
430}
431
434 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
435 auto ConcatMI1 =
436 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
437 auto ConcatMI2 =
438 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
439 if (!ConcatMI1 || !ConcatMI2)
440 return false;
441
442 // Check that the sources of the Concat instructions have the same type
443 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
444 MRI.getType(ConcatMI2->getSourceReg(0)))
445 return false;
446
447 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
448 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
449 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
450 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
451 // Check if the index takes a whole source register from G_CONCAT_VECTORS
452 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
453 if (Mask[i] == -1) {
454 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
455 if (i + j >= Mask.size())
456 return false;
457 if (Mask[i + j] != -1)
458 return false;
459 }
461 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
462 return false;
463 Ops.push_back(0);
464 } else if (Mask[i] % ConcatSrcNumElt == 0) {
465 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
466 if (i + j >= Mask.size())
467 return false;
468 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
469 return false;
470 }
471 // Retrieve the source register from its respective G_CONCAT_VECTORS
472 // instruction
473 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
474 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
475 } else {
476 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
477 ConcatMI1->getNumSources()));
478 }
479 } else {
480 return false;
481 }
482 }
483
485 {TargetOpcode::G_CONCAT_VECTORS,
486 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
487 return false;
488
489 return !Ops.empty();
490}
491
494 LLT SrcTy;
495 for (Register &Reg : Ops) {
496 if (Reg != 0)
497 SrcTy = MRI.getType(Reg);
498 }
499 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
500
501 Register UndefReg = 0;
502
503 for (Register &Reg : Ops) {
504 if (Reg == 0) {
505 if (UndefReg == 0)
506 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
507 Reg = UndefReg;
508 }
509 }
510
511 if (Ops.size() > 1)
512 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
513 else
514 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
515 MI.eraseFromParent();
516}
517
522 return true;
523 }
524 return false;
525}
526
529 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
530 "Invalid instruction kind");
531 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
532 Register Src1 = MI.getOperand(1).getReg();
533 LLT SrcType = MRI.getType(Src1);
534
535 unsigned DstNumElts = DstType.getNumElements();
536 unsigned SrcNumElts = SrcType.getNumElements();
537
538 // If the resulting vector is smaller than the size of the source
539 // vectors being concatenated, we won't be able to replace the
540 // shuffle vector into a concat_vectors.
541 //
542 // Note: We may still be able to produce a concat_vectors fed by
543 // extract_vector_elt and so on. It is less clear that would
544 // be better though, so don't bother for now.
545 //
546 // If the destination is a scalar, the size of the sources doesn't
547 // matter. we will lower the shuffle to a plain copy. This will
548 // work only if the source and destination have the same size. But
549 // that's covered by the next condition.
550 //
551 // TODO: If the size between the source and destination don't match
552 // we could still emit an extract vector element in that case.
553 if (DstNumElts < 2 * SrcNumElts)
554 return false;
555
556 // Check that the shuffle mask can be broken evenly between the
557 // different sources.
558 if (DstNumElts % SrcNumElts != 0)
559 return false;
560
561 // Mask length is a multiple of the source vector length.
562 // Check if the shuffle is some kind of concatenation of the input
563 // vectors.
564 unsigned NumConcat = DstNumElts / SrcNumElts;
565 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
566 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
567 for (unsigned i = 0; i != DstNumElts; ++i) {
568 int Idx = Mask[i];
569 // Undef value.
570 if (Idx < 0)
571 continue;
572 // Ensure the indices in each SrcType sized piece are sequential and that
573 // the same source is used for the whole piece.
574 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
575 (ConcatSrcs[i / SrcNumElts] >= 0 &&
576 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
577 return false;
578 // Remember which source this index came from.
579 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
580 }
581
582 // The shuffle is concatenating multiple vectors together.
583 // Collect the different operands for that.
584 Register UndefReg;
585 Register Src2 = MI.getOperand(2).getReg();
586 for (auto Src : ConcatSrcs) {
587 if (Src < 0) {
588 if (!UndefReg) {
589 Builder.setInsertPt(*MI.getParent(), MI);
590 UndefReg = Builder.buildUndef(SrcType).getReg(0);
591 }
592 Ops.push_back(UndefReg);
593 } else if (Src == 0)
594 Ops.push_back(Src1);
595 else
596 Ops.push_back(Src2);
597 }
598 return true;
599}
600
602 ArrayRef<Register> Ops) const {
603 Register DstReg = MI.getOperand(0).getReg();
604 Builder.setInsertPt(*MI.getParent(), MI);
605 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
606
607 if (Ops.size() == 1)
608 Builder.buildCopy(NewDstReg, Ops[0]);
609 else
610 Builder.buildMergeLikeInstr(NewDstReg, Ops);
611
612 replaceRegWith(MRI, DstReg, NewDstReg);
613 MI.eraseFromParent();
614}
615
616namespace {
617
618/// Select a preference between two uses. CurrentUse is the current preference
619/// while *ForCandidate is attributes of the candidate under consideration.
620PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
621 PreferredTuple &CurrentUse,
622 const LLT TyForCandidate,
623 unsigned OpcodeForCandidate,
624 MachineInstr *MIForCandidate) {
625 if (!CurrentUse.Ty.isValid()) {
626 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
627 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
628 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
629 return CurrentUse;
630 }
631
632 // We permit the extend to hoist through basic blocks but this is only
633 // sensible if the target has extending loads. If you end up lowering back
634 // into a load and extend during the legalizer then the end result is
635 // hoisting the extend up to the load.
636
637 // Prefer defined extensions to undefined extensions as these are more
638 // likely to reduce the number of instructions.
639 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
640 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
641 return CurrentUse;
642 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
643 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
644 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
645
646 // Prefer sign extensions to zero extensions as sign-extensions tend to be
647 // more expensive. Don't do this if the load is already a zero-extend load
648 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
649 // later.
650 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
651 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
652 OpcodeForCandidate == TargetOpcode::G_ZEXT)
653 return CurrentUse;
654 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
655 OpcodeForCandidate == TargetOpcode::G_SEXT)
656 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
657 }
658
659 // This is potentially target specific. We've chosen the largest type
660 // because G_TRUNC is usually free. One potential catch with this is that
661 // some targets have a reduced number of larger registers than smaller
662 // registers and this choice potentially increases the live-range for the
663 // larger value.
664 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
665 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
666 }
667 return CurrentUse;
668}
669
670/// Find a suitable place to insert some instructions and insert them. This
671/// function accounts for special cases like inserting before a PHI node.
672/// The current strategy for inserting before PHI's is to duplicate the
673/// instructions for each predecessor. However, while that's ok for G_TRUNC
674/// on most targets since it generally requires no code, other targets/cases may
675/// want to try harder to find a dominating block.
676static void InsertInsnsWithoutSideEffectsBeforeUse(
679 MachineOperand &UseMO)>
680 Inserter) {
681 MachineInstr &UseMI = *UseMO.getParent();
682
683 MachineBasicBlock *InsertBB = UseMI.getParent();
684
685 // If the use is a PHI then we want the predecessor block instead.
686 if (UseMI.isPHI()) {
687 MachineOperand *PredBB = std::next(&UseMO);
688 InsertBB = PredBB->getMBB();
689 }
690
691 // If the block is the same block as the def then we want to insert just after
692 // the def instead of at the start of the block.
693 if (InsertBB == DefMI.getParent()) {
695 Inserter(InsertBB, std::next(InsertPt), UseMO);
696 return;
697 }
698
699 // Otherwise we want the start of the BB
700 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
701}
702} // end anonymous namespace
703
705 PreferredTuple Preferred;
706 if (matchCombineExtendingLoads(MI, Preferred)) {
707 applyCombineExtendingLoads(MI, Preferred);
708 return true;
709 }
710 return false;
711}
712
713static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
714 unsigned CandidateLoadOpc;
715 switch (ExtOpc) {
716 case TargetOpcode::G_ANYEXT:
717 CandidateLoadOpc = TargetOpcode::G_LOAD;
718 break;
719 case TargetOpcode::G_SEXT:
720 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
721 break;
722 case TargetOpcode::G_ZEXT:
723 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
724 break;
725 default:
726 llvm_unreachable("Unexpected extend opc");
727 }
728 return CandidateLoadOpc;
729}
730
732 MachineInstr &MI, PreferredTuple &Preferred) const {
733 // We match the loads and follow the uses to the extend instead of matching
734 // the extends and following the def to the load. This is because the load
735 // must remain in the same position for correctness (unless we also add code
736 // to find a safe place to sink it) whereas the extend is freely movable.
737 // It also prevents us from duplicating the load for the volatile case or just
738 // for performance.
739 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
740 if (!LoadMI)
741 return false;
742
743 Register LoadReg = LoadMI->getDstReg();
744
745 LLT LoadValueTy = MRI.getType(LoadReg);
746 if (!LoadValueTy.isScalar())
747 return false;
748
749 // Most architectures are going to legalize <s8 loads into at least a 1 byte
750 // load, and the MMOs can only describe memory accesses in multiples of bytes.
751 // If we try to perform extload combining on those, we can end up with
752 // %a(s8) = extload %ptr (load 1 byte from %ptr)
753 // ... which is an illegal extload instruction.
754 if (LoadValueTy.getSizeInBits() < 8)
755 return false;
756
757 // For non power-of-2 types, they will very likely be legalized into multiple
758 // loads. Don't bother trying to match them into extending loads.
760 return false;
761
762 // Find the preferred type aside from the any-extends (unless it's the only
763 // one) and non-extending ops. We'll emit an extending load to that type and
764 // and emit a variant of (extend (trunc X)) for the others according to the
765 // relative type sizes. At the same time, pick an extend to use based on the
766 // extend involved in the chosen type.
767 unsigned PreferredOpcode =
768 isa<GLoad>(&MI)
769 ? TargetOpcode::G_ANYEXT
770 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
771 Preferred = {LLT(), PreferredOpcode, nullptr};
772 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
773 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
774 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
775 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
776 const auto &MMO = LoadMI->getMMO();
777 // Don't do anything for atomics.
778 if (MMO.isAtomic())
779 continue;
780 // Check for legality.
781 if (!isPreLegalize()) {
782 LegalityQuery::MemDesc MMDesc(MMO);
783 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
784 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
785 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
786 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
787 .Action != LegalizeActions::Legal)
788 continue;
789 }
790 Preferred = ChoosePreferredUse(MI, Preferred,
791 MRI.getType(UseMI.getOperand(0).getReg()),
792 UseMI.getOpcode(), &UseMI);
793 }
794 }
795
796 // There were no extends
797 if (!Preferred.MI)
798 return false;
799 // It should be impossible to chose an extend without selecting a different
800 // type since by definition the result of an extend is larger.
801 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
802
803 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
804 return true;
805}
806
808 MachineInstr &MI, PreferredTuple &Preferred) const {
809 // Rewrite the load to the chosen extending load.
810 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
811
812 // Inserter to insert a truncate back to the original type at a given point
813 // with some basic CSE to limit truncate duplication to one per BB.
815 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
816 MachineBasicBlock::iterator InsertBefore,
817 MachineOperand &UseMO) {
818 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
819 if (PreviouslyEmitted) {
820 Observer.changingInstr(*UseMO.getParent());
821 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
822 Observer.changedInstr(*UseMO.getParent());
823 return;
824 }
825
826 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
827 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
828 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
829 EmittedInsns[InsertIntoBB] = NewMI;
830 replaceRegOpWith(MRI, UseMO, NewDstReg);
831 };
832
833 Observer.changingInstr(MI);
834 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
835 MI.setDesc(Builder.getTII().get(LoadOpc));
836
837 // Rewrite all the uses to fix up the types.
838 auto &LoadValue = MI.getOperand(0);
840 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
841
842 for (auto *UseMO : Uses) {
843 MachineInstr *UseMI = UseMO->getParent();
844
845 // If the extend is compatible with the preferred extend then we should fix
846 // up the type and extend so that it uses the preferred use.
847 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
848 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
849 Register UseDstReg = UseMI->getOperand(0).getReg();
850 MachineOperand &UseSrcMO = UseMI->getOperand(1);
851 const LLT UseDstTy = MRI.getType(UseDstReg);
852 if (UseDstReg != ChosenDstReg) {
853 if (Preferred.Ty == UseDstTy) {
854 // If the use has the same type as the preferred use, then merge
855 // the vregs and erase the extend. For example:
856 // %1:_(s8) = G_LOAD ...
857 // %2:_(s32) = G_SEXT %1(s8)
858 // %3:_(s32) = G_ANYEXT %1(s8)
859 // ... = ... %3(s32)
860 // rewrites to:
861 // %2:_(s32) = G_SEXTLOAD ...
862 // ... = ... %2(s32)
863 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
864 Observer.erasingInstr(*UseMO->getParent());
865 UseMO->getParent()->eraseFromParent();
866 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
867 // If the preferred size is smaller, then keep the extend but extend
868 // from the result of the extending load. For example:
869 // %1:_(s8) = G_LOAD ...
870 // %2:_(s32) = G_SEXT %1(s8)
871 // %3:_(s64) = G_ANYEXT %1(s8)
872 // ... = ... %3(s64)
873 /// rewrites to:
874 // %2:_(s32) = G_SEXTLOAD ...
875 // %3:_(s64) = G_ANYEXT %2:_(s32)
876 // ... = ... %3(s64)
877 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
878 } else {
879 // If the preferred size is large, then insert a truncate. For
880 // example:
881 // %1:_(s8) = G_LOAD ...
882 // %2:_(s64) = G_SEXT %1(s8)
883 // %3:_(s32) = G_ZEXT %1(s8)
884 // ... = ... %3(s32)
885 /// rewrites to:
886 // %2:_(s64) = G_SEXTLOAD ...
887 // %4:_(s8) = G_TRUNC %2:_(s32)
888 // %3:_(s64) = G_ZEXT %2:_(s8)
889 // ... = ... %3(s64)
890 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
891 InsertTruncAt);
892 }
893 continue;
894 }
895 // The use is (one of) the uses of the preferred use we chose earlier.
896 // We're going to update the load to def this value later so just erase
897 // the old extend.
898 Observer.erasingInstr(*UseMO->getParent());
899 UseMO->getParent()->eraseFromParent();
900 continue;
901 }
902
903 // The use isn't an extend. Truncate back to the type we originally loaded.
904 // This is free on many targets.
905 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
906 }
907
908 MI.getOperand(0).setReg(ChosenDstReg);
909 Observer.changedInstr(MI);
910}
911
913 BuildFnTy &MatchInfo) const {
914 assert(MI.getOpcode() == TargetOpcode::G_AND);
915
916 // If we have the following code:
917 // %mask = G_CONSTANT 255
918 // %ld = G_LOAD %ptr, (load s16)
919 // %and = G_AND %ld, %mask
920 //
921 // Try to fold it into
922 // %ld = G_ZEXTLOAD %ptr, (load s8)
923
924 Register Dst = MI.getOperand(0).getReg();
925 if (MRI.getType(Dst).isVector())
926 return false;
927
928 auto MaybeMask =
929 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
930 if (!MaybeMask)
931 return false;
932
933 APInt MaskVal = MaybeMask->Value;
934
935 if (!MaskVal.isMask())
936 return false;
937
938 Register SrcReg = MI.getOperand(1).getReg();
939 // Don't use getOpcodeDef() here since intermediate instructions may have
940 // multiple users.
941 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
942 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
943 return false;
944
945 Register LoadReg = LoadMI->getDstReg();
946 LLT RegTy = MRI.getType(LoadReg);
947 Register PtrReg = LoadMI->getPointerReg();
948 unsigned RegSize = RegTy.getSizeInBits();
949 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
950 unsigned MaskSizeBits = MaskVal.countr_one();
951
952 // The mask may not be larger than the in-memory type, as it might cover sign
953 // extended bits
954 if (MaskSizeBits > LoadSizeBits.getValue())
955 return false;
956
957 // If the mask covers the whole destination register, there's nothing to
958 // extend
959 if (MaskSizeBits >= RegSize)
960 return false;
961
962 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
963 // at least byte loads. Avoid creating such loads here
964 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
965 return false;
966
967 const MachineMemOperand &MMO = LoadMI->getMMO();
968 LegalityQuery::MemDesc MemDesc(MMO);
969
970 // Don't modify the memory access size if this is atomic/volatile, but we can
971 // still adjust the opcode to indicate the high bit behavior.
972 if (LoadMI->isSimple())
973 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
974 else if (LoadSizeBits.getValue() > MaskSizeBits ||
975 LoadSizeBits.getValue() == RegSize)
976 return false;
977
978 // TODO: Could check if it's legal with the reduced or original memory size.
980 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
981 return false;
982
983 MatchInfo = [=](MachineIRBuilder &B) {
984 B.setInstrAndDebugLoc(*LoadMI);
985 auto &MF = B.getMF();
986 auto PtrInfo = MMO.getPointerInfo();
987 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
988 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
989 LoadMI->eraseFromParent();
990 };
991 return true;
992}
993
995 const MachineInstr &UseMI) const {
996 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
997 "shouldn't consider debug uses");
998 assert(DefMI.getParent() == UseMI.getParent());
999 if (&DefMI == &UseMI)
1000 return true;
1001 const MachineBasicBlock &MBB = *DefMI.getParent();
1002 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1003 return &MI == &DefMI || &MI == &UseMI;
1004 });
1005 if (DefOrUse == MBB.end())
1006 llvm_unreachable("Block must contain both DefMI and UseMI!");
1007 return &*DefOrUse == &DefMI;
1008}
1009
1011 const MachineInstr &UseMI) const {
1012 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1013 "shouldn't consider debug uses");
1014 if (MDT)
1015 return MDT->dominates(&DefMI, &UseMI);
1016 else if (DefMI.getParent() != UseMI.getParent())
1017 return false;
1018
1019 return isPredecessor(DefMI, UseMI);
1020}
1021
1023 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1024 Register SrcReg = MI.getOperand(1).getReg();
1025 Register LoadUser = SrcReg;
1026
1027 if (MRI.getType(SrcReg).isVector())
1028 return false;
1029
1030 Register TruncSrc;
1031 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1032 LoadUser = TruncSrc;
1033
1034 uint64_t SizeInBits = MI.getOperand(2).getImm();
1035 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1036 // need any extend at all, just a truncate.
1037 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1038 // If truncating more than the original extended value, abort.
1039 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1040 if (TruncSrc &&
1041 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1042 return false;
1043 if (LoadSizeBits == SizeInBits)
1044 return true;
1045 }
1046 return false;
1047}
1048
1050 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1051 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1052 MI.eraseFromParent();
1053}
1054
1056 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1057 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1058
1059 Register DstReg = MI.getOperand(0).getReg();
1060 LLT RegTy = MRI.getType(DstReg);
1061
1062 // Only supports scalars for now.
1063 if (RegTy.isVector())
1064 return false;
1065
1066 Register SrcReg = MI.getOperand(1).getReg();
1067 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1068 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1069 return false;
1070
1071 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1072
1073 // If the sign extend extends from a narrower width than the load's width,
1074 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1075 // Avoid widening the load at all.
1076 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1077
1078 // Don't generate G_SEXTLOADs with a < 1 byte width.
1079 if (NewSizeBits < 8)
1080 return false;
1081 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1082 // anyway for most targets.
1083 if (!isPowerOf2_32(NewSizeBits))
1084 return false;
1085
1086 const MachineMemOperand &MMO = LoadDef->getMMO();
1087 LegalityQuery::MemDesc MMDesc(MMO);
1088
1089 // Don't modify the memory access size if this is atomic/volatile, but we can
1090 // still adjust the opcode to indicate the high bit behavior.
1091 if (LoadDef->isSimple())
1092 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1093 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1094 return false;
1095
1096 // TODO: Could check if it's legal with the reduced or original memory size.
1097 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1098 {MRI.getType(LoadDef->getDstReg()),
1099 MRI.getType(LoadDef->getPointerReg())},
1100 {MMDesc}}))
1101 return false;
1102
1103 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1104 return true;
1105}
1106
1108 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1109 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1110 Register LoadReg;
1111 unsigned ScalarSizeBits;
1112 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1113 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1114
1115 // If we have the following:
1116 // %ld = G_LOAD %ptr, (load 2)
1117 // %ext = G_SEXT_INREG %ld, 8
1118 // ==>
1119 // %ld = G_SEXTLOAD %ptr (load 1)
1120
1121 auto &MMO = LoadDef->getMMO();
1122 Builder.setInstrAndDebugLoc(*LoadDef);
1123 auto &MF = Builder.getMF();
1124 auto PtrInfo = MMO.getPointerInfo();
1125 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1126 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1127 LoadDef->getPointerReg(), *NewMMO);
1128 MI.eraseFromParent();
1129
1130 // Not all loads can be deleted, so make sure the old one is removed.
1131 LoadDef->eraseFromParent();
1132}
1133
1134/// Return true if 'MI' is a load or a store that may be fold it's address
1135/// operand into the load / store addressing mode.
1137 MachineRegisterInfo &MRI) {
1139 auto *MF = MI->getMF();
1140 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1141 if (!Addr)
1142 return false;
1143
1144 AM.HasBaseReg = true;
1145 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1146 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1147 else
1148 AM.Scale = 1; // [reg +/- reg]
1149
1150 return TLI.isLegalAddressingMode(
1151 MF->getDataLayout(), AM,
1152 getTypeForLLT(MI->getMMO().getMemoryType(),
1153 MF->getFunction().getContext()),
1154 MI->getMMO().getAddrSpace());
1155}
1156
1157static unsigned getIndexedOpc(unsigned LdStOpc) {
1158 switch (LdStOpc) {
1159 case TargetOpcode::G_LOAD:
1160 return TargetOpcode::G_INDEXED_LOAD;
1161 case TargetOpcode::G_STORE:
1162 return TargetOpcode::G_INDEXED_STORE;
1163 case TargetOpcode::G_ZEXTLOAD:
1164 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1165 case TargetOpcode::G_SEXTLOAD:
1166 return TargetOpcode::G_INDEXED_SEXTLOAD;
1167 default:
1168 llvm_unreachable("Unexpected opcode");
1169 }
1170}
1171
1172bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1173 // Check for legality.
1174 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1175 LLT Ty = MRI.getType(LdSt.getReg(0));
1176 LLT MemTy = LdSt.getMMO().getMemoryType();
1178 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1180 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1181 SmallVector<LLT> OpTys;
1182 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1183 OpTys = {PtrTy, Ty, Ty};
1184 else
1185 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1186
1187 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1188 return isLegal(Q);
1189}
1190
1192 "post-index-use-threshold", cl::Hidden, cl::init(32),
1193 cl::desc("Number of uses of a base pointer to check before it is no longer "
1194 "considered for post-indexing."));
1195
1196bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1198 bool &RematOffset) const {
1199 // We're looking for the following pattern, for either load or store:
1200 // %baseptr:_(p0) = ...
1201 // G_STORE %val(s64), %baseptr(p0)
1202 // %offset:_(s64) = G_CONSTANT i64 -256
1203 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1204 const auto &TLI = getTargetLowering();
1205
1206 Register Ptr = LdSt.getPointerReg();
1207 // If the store is the only use, don't bother.
1208 if (MRI.hasOneNonDBGUse(Ptr))
1209 return false;
1210
1211 if (!isIndexedLoadStoreLegal(LdSt))
1212 return false;
1213
1214 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1215 return false;
1216
1217 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1218 auto *PtrDef = MRI.getVRegDef(Ptr);
1219
1220 unsigned NumUsesChecked = 0;
1221 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1222 if (++NumUsesChecked > PostIndexUseThreshold)
1223 return false; // Try to avoid exploding compile time.
1224
1225 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1226 // The use itself might be dead. This can happen during combines if DCE
1227 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1228 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1229 continue;
1230
1231 // Check the user of this isn't the store, otherwise we'd be generate a
1232 // indexed store defining its own use.
1233 if (StoredValDef == &Use)
1234 continue;
1235
1236 Offset = PtrAdd->getOffsetReg();
1237 if (!ForceLegalIndexing &&
1238 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1239 /*IsPre*/ false, MRI))
1240 continue;
1241
1242 // Make sure the offset calculation is before the potentially indexed op.
1243 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1244 RematOffset = false;
1245 if (!dominates(*OffsetDef, LdSt)) {
1246 // If the offset however is just a G_CONSTANT, we can always just
1247 // rematerialize it where we need it.
1248 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1249 continue;
1250 RematOffset = true;
1251 }
1252
1253 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1254 if (&BasePtrUse == PtrDef)
1255 continue;
1256
1257 // If the user is a later load/store that can be post-indexed, then don't
1258 // combine this one.
1259 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1260 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1261 dominates(LdSt, *BasePtrLdSt) &&
1262 isIndexedLoadStoreLegal(*BasePtrLdSt))
1263 return false;
1264
1265 // Now we're looking for the key G_PTR_ADD instruction, which contains
1266 // the offset add that we want to fold.
1267 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1268 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1269 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1270 // If the use is in a different block, then we may produce worse code
1271 // due to the extra register pressure.
1272 if (BaseUseUse.getParent() != LdSt.getParent())
1273 return false;
1274
1275 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1276 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1277 return false;
1278 }
1279 if (!dominates(LdSt, BasePtrUse))
1280 return false; // All use must be dominated by the load/store.
1281 }
1282 }
1283
1284 Addr = PtrAdd->getReg(0);
1285 Base = PtrAdd->getBaseReg();
1286 return true;
1287 }
1288
1289 return false;
1290}
1291
1292bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1293 Register &Base,
1294 Register &Offset) const {
1295 auto &MF = *LdSt.getParent()->getParent();
1296 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1297
1298 Addr = LdSt.getPointerReg();
1299 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1300 MRI.hasOneNonDBGUse(Addr))
1301 return false;
1302
1303 if (!ForceLegalIndexing &&
1304 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1305 return false;
1306
1307 if (!isIndexedLoadStoreLegal(LdSt))
1308 return false;
1309
1310 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1311 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1312 return false;
1313
1314 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1315 // Would require a copy.
1316 if (Base == St->getValueReg())
1317 return false;
1318
1319 // We're expecting one use of Addr in MI, but it could also be the
1320 // value stored, which isn't actually dominated by the instruction.
1321 if (St->getValueReg() == Addr)
1322 return false;
1323 }
1324
1325 // Avoid increasing cross-block register pressure.
1326 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1327 if (AddrUse.getParent() != LdSt.getParent())
1328 return false;
1329
1330 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1331 // That might allow us to end base's liveness here by adjusting the constant.
1332 bool RealUse = false;
1333 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1334 if (!dominates(LdSt, AddrUse))
1335 return false; // All use must be dominated by the load/store.
1336
1337 // If Ptr may be folded in addressing mode of other use, then it's
1338 // not profitable to do this transformation.
1339 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1340 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1341 RealUse = true;
1342 } else {
1343 RealUse = true;
1344 }
1345 }
1346 return RealUse;
1347}
1348
1350 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1351 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1352
1353 // Check if there is a load that defines the vector being extracted from.
1354 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1355 if (!LoadMI)
1356 return false;
1357
1358 Register Vector = MI.getOperand(1).getReg();
1359 LLT VecEltTy = MRI.getType(Vector).getElementType();
1360
1361 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1362
1363 // Checking whether we should reduce the load width.
1364 if (!MRI.hasOneNonDBGUse(Vector))
1365 return false;
1366
1367 // Check if the defining load is simple.
1368 if (!LoadMI->isSimple())
1369 return false;
1370
1371 // If the vector element type is not a multiple of a byte then we are unable
1372 // to correctly compute an address to load only the extracted element as a
1373 // scalar.
1374 if (!VecEltTy.isByteSized())
1375 return false;
1376
1377 // Check for load fold barriers between the extraction and the load.
1378 if (MI.getParent() != LoadMI->getParent())
1379 return false;
1380 const unsigned MaxIter = 20;
1381 unsigned Iter = 0;
1382 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1383 if (II->isLoadFoldBarrier())
1384 return false;
1385 if (Iter++ == MaxIter)
1386 return false;
1387 }
1388
1389 // Check if the new load that we are going to create is legal
1390 // if we are in the post-legalization phase.
1391 MachineMemOperand MMO = LoadMI->getMMO();
1392 Align Alignment = MMO.getAlign();
1393 MachinePointerInfo PtrInfo;
1395
1396 // Finding the appropriate PtrInfo if offset is a known constant.
1397 // This is required to create the memory operand for the narrowed load.
1398 // This machine memory operand object helps us infer about legality
1399 // before we proceed to combine the instruction.
1400 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1401 int Elt = CVal->getZExtValue();
1402 // FIXME: should be (ABI size)*Elt.
1403 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1404 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1405 } else {
1406 // Discard the pointer info except the address space because the memory
1407 // operand can't represent this new access since the offset is variable.
1408 Offset = VecEltTy.getSizeInBits() / 8;
1410 }
1411
1412 Alignment = commonAlignment(Alignment, Offset);
1413
1414 Register VecPtr = LoadMI->getPointerReg();
1415 LLT PtrTy = MRI.getType(VecPtr);
1416
1417 MachineFunction &MF = *MI.getMF();
1418 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1419
1420 LegalityQuery::MemDesc MMDesc(*NewMMO);
1421
1423 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1424 return false;
1425
1426 // Load must be allowed and fast on the target.
1428 auto &DL = MF.getDataLayout();
1429 unsigned Fast = 0;
1430 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1431 &Fast) ||
1432 !Fast)
1433 return false;
1434
1435 Register Result = MI.getOperand(0).getReg();
1436 Register Index = MI.getOperand(2).getReg();
1437
1438 MatchInfo = [=](MachineIRBuilder &B) {
1439 GISelObserverWrapper DummyObserver;
1440 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1441 //// Get pointer to the vector element.
1442 Register finalPtr = Helper.getVectorElementPointer(
1443 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1444 Index);
1445 // New G_LOAD instruction.
1446 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1447 // Remove original GLOAD instruction.
1448 LoadMI->eraseFromParent();
1449 };
1450
1451 return true;
1452}
1453
1455 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1456 auto &LdSt = cast<GLoadStore>(MI);
1457
1458 if (LdSt.isAtomic())
1459 return false;
1460
1461 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1462 MatchInfo.Offset);
1463 if (!MatchInfo.IsPre &&
1464 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1465 MatchInfo.Offset, MatchInfo.RematOffset))
1466 return false;
1467
1468 return true;
1469}
1470
1472 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1473 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1474 unsigned Opcode = MI.getOpcode();
1475 bool IsStore = Opcode == TargetOpcode::G_STORE;
1476 unsigned NewOpcode = getIndexedOpc(Opcode);
1477
1478 // If the offset constant didn't happen to dominate the load/store, we can
1479 // just clone it as needed.
1480 if (MatchInfo.RematOffset) {
1481 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1482 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1483 *OldCst->getOperand(1).getCImm());
1484 MatchInfo.Offset = NewCst.getReg(0);
1485 }
1486
1487 auto MIB = Builder.buildInstr(NewOpcode);
1488 if (IsStore) {
1489 MIB.addDef(MatchInfo.Addr);
1490 MIB.addUse(MI.getOperand(0).getReg());
1491 } else {
1492 MIB.addDef(MI.getOperand(0).getReg());
1493 MIB.addDef(MatchInfo.Addr);
1494 }
1495
1496 MIB.addUse(MatchInfo.Base);
1497 MIB.addUse(MatchInfo.Offset);
1498 MIB.addImm(MatchInfo.IsPre);
1499 MIB->cloneMemRefs(*MI.getMF(), MI);
1500 MI.eraseFromParent();
1501 AddrDef.eraseFromParent();
1502
1503 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1504}
1505
1507 MachineInstr *&OtherMI) const {
1508 unsigned Opcode = MI.getOpcode();
1509 bool IsDiv, IsSigned;
1510
1511 switch (Opcode) {
1512 default:
1513 llvm_unreachable("Unexpected opcode!");
1514 case TargetOpcode::G_SDIV:
1515 case TargetOpcode::G_UDIV: {
1516 IsDiv = true;
1517 IsSigned = Opcode == TargetOpcode::G_SDIV;
1518 break;
1519 }
1520 case TargetOpcode::G_SREM:
1521 case TargetOpcode::G_UREM: {
1522 IsDiv = false;
1523 IsSigned = Opcode == TargetOpcode::G_SREM;
1524 break;
1525 }
1526 }
1527
1528 Register Src1 = MI.getOperand(1).getReg();
1529 unsigned DivOpcode, RemOpcode, DivremOpcode;
1530 if (IsSigned) {
1531 DivOpcode = TargetOpcode::G_SDIV;
1532 RemOpcode = TargetOpcode::G_SREM;
1533 DivremOpcode = TargetOpcode::G_SDIVREM;
1534 } else {
1535 DivOpcode = TargetOpcode::G_UDIV;
1536 RemOpcode = TargetOpcode::G_UREM;
1537 DivremOpcode = TargetOpcode::G_UDIVREM;
1538 }
1539
1540 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1541 return false;
1542
1543 // Combine:
1544 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1545 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1546 // into:
1547 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1548
1549 // Combine:
1550 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1551 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1552 // into:
1553 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1554
1555 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1556 if (MI.getParent() == UseMI.getParent() &&
1557 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1558 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1559 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1560 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1561 OtherMI = &UseMI;
1562 return true;
1563 }
1564 }
1565
1566 return false;
1567}
1568
1570 MachineInstr *&OtherMI) const {
1571 unsigned Opcode = MI.getOpcode();
1572 assert(OtherMI && "OtherMI shouldn't be empty.");
1573
1574 Register DestDivReg, DestRemReg;
1575 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1576 DestDivReg = MI.getOperand(0).getReg();
1577 DestRemReg = OtherMI->getOperand(0).getReg();
1578 } else {
1579 DestDivReg = OtherMI->getOperand(0).getReg();
1580 DestRemReg = MI.getOperand(0).getReg();
1581 }
1582
1583 bool IsSigned =
1584 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1585
1586 // Check which instruction is first in the block so we don't break def-use
1587 // deps by "moving" the instruction incorrectly. Also keep track of which
1588 // instruction is first so we pick it's operands, avoiding use-before-def
1589 // bugs.
1590 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1591 Builder.setInstrAndDebugLoc(*FirstInst);
1592
1593 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1594 : TargetOpcode::G_UDIVREM,
1595 {DestDivReg, DestRemReg},
1596 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1597 MI.eraseFromParent();
1598 OtherMI->eraseFromParent();
1599}
1600
1602 MachineInstr &MI, MachineInstr *&BrCond) const {
1603 assert(MI.getOpcode() == TargetOpcode::G_BR);
1604
1605 // Try to match the following:
1606 // bb1:
1607 // G_BRCOND %c1, %bb2
1608 // G_BR %bb3
1609 // bb2:
1610 // ...
1611 // bb3:
1612
1613 // The above pattern does not have a fall through to the successor bb2, always
1614 // resulting in a branch no matter which path is taken. Here we try to find
1615 // and replace that pattern with conditional branch to bb3 and otherwise
1616 // fallthrough to bb2. This is generally better for branch predictors.
1617
1618 MachineBasicBlock *MBB = MI.getParent();
1620 if (BrIt == MBB->begin())
1621 return false;
1622 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1623
1624 BrCond = &*std::prev(BrIt);
1625 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1626 return false;
1627
1628 // Check that the next block is the conditional branch target. Also make sure
1629 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1630 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1631 return BrCondTarget != MI.getOperand(0).getMBB() &&
1632 MBB->isLayoutSuccessor(BrCondTarget);
1633}
1634
1636 MachineInstr &MI, MachineInstr *&BrCond) const {
1637 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1638 Builder.setInstrAndDebugLoc(*BrCond);
1639 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1640 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1641 // this to i1 only since we might not know for sure what kind of
1642 // compare generated the condition value.
1643 auto True = Builder.buildConstant(
1644 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1645 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1646
1647 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1648 Observer.changingInstr(MI);
1649 MI.getOperand(0).setMBB(FallthroughBB);
1650 Observer.changedInstr(MI);
1651
1652 // Change the conditional branch to use the inverted condition and
1653 // new target block.
1654 Observer.changingInstr(*BrCond);
1655 BrCond->getOperand(0).setReg(Xor.getReg(0));
1656 BrCond->getOperand(1).setMBB(BrTarget);
1657 Observer.changedInstr(*BrCond);
1658}
1659
1661 MachineIRBuilder HelperBuilder(MI);
1662 GISelObserverWrapper DummyObserver;
1663 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1664 return Helper.lowerMemcpyInline(MI) ==
1666}
1667
1669 unsigned MaxLen) const {
1670 MachineIRBuilder HelperBuilder(MI);
1671 GISelObserverWrapper DummyObserver;
1672 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1673 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1675}
1676
1678 const MachineRegisterInfo &MRI,
1679 const APFloat &Val) {
1680 APFloat Result(Val);
1681 switch (MI.getOpcode()) {
1682 default:
1683 llvm_unreachable("Unexpected opcode!");
1684 case TargetOpcode::G_FNEG: {
1685 Result.changeSign();
1686 return Result;
1687 }
1688 case TargetOpcode::G_FABS: {
1689 Result.clearSign();
1690 return Result;
1691 }
1692 case TargetOpcode::G_FCEIL:
1693 Result.roundToIntegral(APFloat::rmTowardPositive);
1694 return Result;
1695 case TargetOpcode::G_FFLOOR:
1696 Result.roundToIntegral(APFloat::rmTowardNegative);
1697 return Result;
1698 case TargetOpcode::G_INTRINSIC_TRUNC:
1699 Result.roundToIntegral(APFloat::rmTowardZero);
1700 return Result;
1701 case TargetOpcode::G_INTRINSIC_ROUND:
1702 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1703 return Result;
1704 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1705 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1706 return Result;
1707 case TargetOpcode::G_FRINT:
1708 case TargetOpcode::G_FNEARBYINT:
1709 // Use default rounding mode (round to nearest, ties to even)
1710 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1711 return Result;
1712 case TargetOpcode::G_FPEXT:
1713 case TargetOpcode::G_FPTRUNC: {
1714 bool Unused;
1715 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1717 &Unused);
1718 return Result;
1719 }
1720 case TargetOpcode::G_FSQRT: {
1721 bool Unused;
1723 &Unused);
1724 Result = APFloat(sqrt(Result.convertToDouble()));
1725 break;
1726 }
1727 case TargetOpcode::G_FLOG2: {
1728 bool Unused;
1730 &Unused);
1731 Result = APFloat(log2(Result.convertToDouble()));
1732 break;
1733 }
1734 }
1735 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1736 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1737 // `G_FLOG2` reach here.
1738 bool Unused;
1739 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1740 return Result;
1741}
1742
1744 MachineInstr &MI, const ConstantFP *Cst) const {
1745 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1746 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1747 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1748 MI.eraseFromParent();
1749}
1750
1752 PtrAddChain &MatchInfo) const {
1753 // We're trying to match the following pattern:
1754 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1755 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1756 // -->
1757 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1758
1759 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1760 return false;
1761
1762 Register Add2 = MI.getOperand(1).getReg();
1763 Register Imm1 = MI.getOperand(2).getReg();
1764 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1765 if (!MaybeImmVal)
1766 return false;
1767
1768 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1769 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1770 return false;
1771
1772 Register Base = Add2Def->getOperand(1).getReg();
1773 Register Imm2 = Add2Def->getOperand(2).getReg();
1774 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1775 if (!MaybeImm2Val)
1776 return false;
1777
1778 // Check if the new combined immediate forms an illegal addressing mode.
1779 // Do not combine if it was legal before but would get illegal.
1780 // To do so, we need to find a load/store user of the pointer to get
1781 // the access type.
1782 Type *AccessTy = nullptr;
1783 auto &MF = *MI.getMF();
1784 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1785 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1786 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1787 MF.getFunction().getContext());
1788 break;
1789 }
1790 }
1792 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1793 AMNew.BaseOffs = CombinedImm.getSExtValue();
1794 if (AccessTy) {
1795 AMNew.HasBaseReg = true;
1797 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1798 AMOld.HasBaseReg = true;
1799 unsigned AS = MRI.getType(Add2).getAddressSpace();
1800 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1801 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1802 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1803 return false;
1804 }
1805
1806 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1807 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1808 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1809 // largest signed integer that fits into the index type, which is the maximum
1810 // size of allocated objects according to the IR Language Reference.
1811 unsigned PtrAddFlags = MI.getFlags();
1812 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1813 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1814 bool IsInBounds =
1815 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1816 unsigned Flags = 0;
1817 if (IsNoUWrap)
1819 if (IsInBounds) {
1822 }
1823
1824 // Pass the combined immediate to the apply function.
1825 MatchInfo.Imm = AMNew.BaseOffs;
1826 MatchInfo.Base = Base;
1827 MatchInfo.Bank = getRegBank(Imm2);
1828 MatchInfo.Flags = Flags;
1829 return true;
1830}
1831
1833 PtrAddChain &MatchInfo) const {
1834 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1835 MachineIRBuilder MIB(MI);
1836 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1837 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1838 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1839 Observer.changingInstr(MI);
1840 MI.getOperand(1).setReg(MatchInfo.Base);
1841 MI.getOperand(2).setReg(NewOffset.getReg(0));
1842 MI.setFlags(MatchInfo.Flags);
1843 Observer.changedInstr(MI);
1844}
1845
1847 RegisterImmPair &MatchInfo) const {
1848 // We're trying to match the following pattern with any of
1849 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1850 // %t1 = SHIFT %base, G_CONSTANT imm1
1851 // %root = SHIFT %t1, G_CONSTANT imm2
1852 // -->
1853 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1854
1855 unsigned Opcode = MI.getOpcode();
1856 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1857 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1858 Opcode == TargetOpcode::G_USHLSAT) &&
1859 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1860
1861 Register Shl2 = MI.getOperand(1).getReg();
1862 Register Imm1 = MI.getOperand(2).getReg();
1863 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1864 if (!MaybeImmVal)
1865 return false;
1866
1867 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1868 if (Shl2Def->getOpcode() != Opcode)
1869 return false;
1870
1871 Register Base = Shl2Def->getOperand(1).getReg();
1872 Register Imm2 = Shl2Def->getOperand(2).getReg();
1873 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1874 if (!MaybeImm2Val)
1875 return false;
1876
1877 // Pass the combined immediate to the apply function.
1878 MatchInfo.Imm =
1879 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1880 MatchInfo.Reg = Base;
1881
1882 // There is no simple replacement for a saturating unsigned left shift that
1883 // exceeds the scalar size.
1884 if (Opcode == TargetOpcode::G_USHLSAT &&
1885 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1886 return false;
1887
1888 return true;
1889}
1890
1892 RegisterImmPair &MatchInfo) const {
1893 unsigned Opcode = MI.getOpcode();
1894 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1895 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1896 Opcode == TargetOpcode::G_USHLSAT) &&
1897 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1898
1899 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1900 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1901 auto Imm = MatchInfo.Imm;
1902
1903 if (Imm >= ScalarSizeInBits) {
1904 // Any logical shift that exceeds scalar size will produce zero.
1905 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1906 Builder.buildConstant(MI.getOperand(0), 0);
1907 MI.eraseFromParent();
1908 return;
1909 }
1910 // Arithmetic shift and saturating signed left shift have no effect beyond
1911 // scalar size.
1912 Imm = ScalarSizeInBits - 1;
1913 }
1914
1915 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1916 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1917 Observer.changingInstr(MI);
1918 MI.getOperand(1).setReg(MatchInfo.Reg);
1919 MI.getOperand(2).setReg(NewImm);
1920 Observer.changedInstr(MI);
1921}
1922
1924 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1925 // We're trying to match the following pattern with any of
1926 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1927 // with any of G_AND/G_OR/G_XOR logic instructions.
1928 // %t1 = SHIFT %X, G_CONSTANT C0
1929 // %t2 = LOGIC %t1, %Y
1930 // %root = SHIFT %t2, G_CONSTANT C1
1931 // -->
1932 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1933 // %t4 = SHIFT %Y, G_CONSTANT C1
1934 // %root = LOGIC %t3, %t4
1935 unsigned ShiftOpcode = MI.getOpcode();
1936 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1937 ShiftOpcode == TargetOpcode::G_ASHR ||
1938 ShiftOpcode == TargetOpcode::G_LSHR ||
1939 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1940 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1941 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1942
1943 // Match a one-use bitwise logic op.
1944 Register LogicDest = MI.getOperand(1).getReg();
1945 if (!MRI.hasOneNonDBGUse(LogicDest))
1946 return false;
1947
1948 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1949 unsigned LogicOpcode = LogicMI->getOpcode();
1950 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1951 LogicOpcode != TargetOpcode::G_XOR)
1952 return false;
1953
1954 // Find a matching one-use shift by constant.
1955 const Register C1 = MI.getOperand(2).getReg();
1956 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1957 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1958 return false;
1959
1960 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1961
1962 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1963 // Shift should match previous one and should be a one-use.
1964 if (MI->getOpcode() != ShiftOpcode ||
1965 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1966 return false;
1967
1968 // Must be a constant.
1969 auto MaybeImmVal =
1970 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1971 if (!MaybeImmVal)
1972 return false;
1973
1974 ShiftVal = MaybeImmVal->Value.getSExtValue();
1975 return true;
1976 };
1977
1978 // Logic ops are commutative, so check each operand for a match.
1979 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1980 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1981 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1982 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1983 uint64_t C0Val;
1984
1985 if (matchFirstShift(LogicMIOp1, C0Val)) {
1986 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1987 MatchInfo.Shift2 = LogicMIOp1;
1988 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1989 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1990 MatchInfo.Shift2 = LogicMIOp2;
1991 } else
1992 return false;
1993
1994 MatchInfo.ValSum = C0Val + C1Val;
1995
1996 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1997 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1998 return false;
1999
2000 MatchInfo.Logic = LogicMI;
2001 return true;
2002}
2003
2005 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2006 unsigned Opcode = MI.getOpcode();
2007 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2008 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2009 Opcode == TargetOpcode::G_SSHLSAT) &&
2010 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2011
2012 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2013 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2014
2015 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2016
2017 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2018 Register Shift1 =
2019 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2020
2021 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2022 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2023 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2024 // remove old shift1. And it will cause crash later. So erase it earlier to
2025 // avoid the crash.
2026 MatchInfo.Shift2->eraseFromParent();
2027
2028 Register Shift2Const = MI.getOperand(2).getReg();
2029 Register Shift2 = Builder
2030 .buildInstr(Opcode, {DestType},
2031 {MatchInfo.LogicNonShiftReg, Shift2Const})
2032 .getReg(0);
2033
2034 Register Dest = MI.getOperand(0).getReg();
2035 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2036
2037 // This was one use so it's safe to remove it.
2038 MatchInfo.Logic->eraseFromParent();
2039
2040 MI.eraseFromParent();
2041}
2042
2044 BuildFnTy &MatchInfo) const {
2045 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2046 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2047 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2048 auto &Shl = cast<GenericMachineInstr>(MI);
2049 Register DstReg = Shl.getReg(0);
2050 Register SrcReg = Shl.getReg(1);
2051 Register ShiftReg = Shl.getReg(2);
2052 Register X, C1;
2053
2054 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2055 return false;
2056
2057 if (!mi_match(SrcReg, MRI,
2059 m_GOr(m_Reg(X), m_Reg(C1))))))
2060 return false;
2061
2062 APInt C1Val, C2Val;
2063 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2064 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2065 return false;
2066
2067 auto *SrcDef = MRI.getVRegDef(SrcReg);
2068 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2069 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2070 LLT SrcTy = MRI.getType(SrcReg);
2071 MatchInfo = [=](MachineIRBuilder &B) {
2072 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2073 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2074 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2075 };
2076 return true;
2077}
2078
2080 LshrOfTruncOfLshr &MatchInfo,
2081 MachineInstr &ShiftMI) const {
2082 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2083
2084 Register N0 = MI.getOperand(1).getReg();
2085 Register N1 = MI.getOperand(2).getReg();
2086 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2087
2088 APInt N1C, N001C;
2089 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2090 return false;
2091 auto N001 = ShiftMI.getOperand(2).getReg();
2092 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2093 return false;
2094
2095 if (N001C.getBitWidth() > N1C.getBitWidth())
2096 N1C = N1C.zext(N001C.getBitWidth());
2097 else
2098 N001C = N001C.zext(N1C.getBitWidth());
2099
2100 Register InnerShift = ShiftMI.getOperand(0).getReg();
2101 LLT InnerShiftTy = MRI.getType(InnerShift);
2102 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2103 if ((N1C + N001C).ult(InnerShiftSize)) {
2104 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2105 MatchInfo.ShiftAmt = N1C + N001C;
2106 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2107 MatchInfo.InnerShiftTy = InnerShiftTy;
2108
2109 if ((N001C + OpSizeInBits) == InnerShiftSize)
2110 return true;
2111 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2112 MatchInfo.Mask = true;
2113 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2114 return true;
2115 }
2116 }
2117 return false;
2118}
2119
2121 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2122 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2123
2124 Register Dst = MI.getOperand(0).getReg();
2125 auto ShiftAmt =
2126 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2127 auto Shift =
2128 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2129 if (MatchInfo.Mask == true) {
2130 APInt MaskVal =
2132 MatchInfo.MaskVal.getZExtValue());
2133 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2134 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2135 Builder.buildTrunc(Dst, And);
2136 } else
2137 Builder.buildTrunc(Dst, Shift);
2138 MI.eraseFromParent();
2139}
2140
2142 unsigned &ShiftVal) const {
2143 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2144 auto MaybeImmVal =
2145 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2146 if (!MaybeImmVal)
2147 return false;
2148
2149 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2150 return (static_cast<int32_t>(ShiftVal) != -1);
2151}
2152
2154 unsigned &ShiftVal) const {
2155 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2156 MachineIRBuilder MIB(MI);
2157 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2158 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2159 Observer.changingInstr(MI);
2160 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2161 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2162 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2164 Observer.changedInstr(MI);
2165}
2166
2168 BuildFnTy &MatchInfo) const {
2169 GSub &Sub = cast<GSub>(MI);
2170
2171 LLT Ty = MRI.getType(Sub.getReg(0));
2172
2173 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2174 return false;
2175
2177 return false;
2178
2179 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2180
2181 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2182 auto NegCst = B.buildConstant(Ty, -Imm);
2183 Observer.changingInstr(MI);
2184 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2185 MI.getOperand(2).setReg(NegCst.getReg(0));
2187 if (Imm.isMinSignedValue())
2189 Observer.changedInstr(MI);
2190 };
2191 return true;
2192}
2193
2194// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2196 RegisterImmPair &MatchData) const {
2197 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2198 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2199 return false;
2200
2201 Register LHS = MI.getOperand(1).getReg();
2202
2203 Register ExtSrc;
2204 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2205 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2206 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2207 return false;
2208
2209 Register RHS = MI.getOperand(2).getReg();
2210 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2211 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2212 if (!MaybeShiftAmtVal)
2213 return false;
2214
2215 if (LI) {
2216 LLT SrcTy = MRI.getType(ExtSrc);
2217
2218 // We only really care about the legality with the shifted value. We can
2219 // pick any type the constant shift amount, so ask the target what to
2220 // use. Otherwise we would have to guess and hope it is reported as legal.
2221 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2222 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2223 return false;
2224 }
2225
2226 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2227 MatchData.Reg = ExtSrc;
2228 MatchData.Imm = ShiftAmt;
2229
2230 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2231 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2232 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2233}
2234
2236 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2237 Register ExtSrcReg = MatchData.Reg;
2238 int64_t ShiftAmtVal = MatchData.Imm;
2239
2240 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2241 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2242 auto NarrowShift =
2243 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2244 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2245 MI.eraseFromParent();
2246}
2247
2249 Register &MatchInfo) const {
2251 SmallVector<Register, 16> MergedValues;
2252 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2253 MergedValues.emplace_back(Merge.getSourceReg(I));
2254
2255 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2256 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2257 return false;
2258
2259 for (unsigned I = 0; I < MergedValues.size(); ++I)
2260 if (MergedValues[I] != Unmerge->getReg(I))
2261 return false;
2262
2263 MatchInfo = Unmerge->getSourceReg();
2264 return true;
2265}
2266
2268 const MachineRegisterInfo &MRI) {
2269 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2270 ;
2271
2272 return Reg;
2273}
2274
2276 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2277 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2278 "Expected an unmerge");
2279 auto &Unmerge = cast<GUnmerge>(MI);
2280 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2281
2282 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2283 if (!SrcInstr)
2284 return false;
2285
2286 // Check the source type of the merge.
2287 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2288 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2289 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2290 if (SrcMergeTy != Dst0Ty && !SameSize)
2291 return false;
2292 // They are the same now (modulo a bitcast).
2293 // We can collect all the src registers.
2294 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2295 Operands.push_back(SrcInstr->getSourceReg(Idx));
2296 return true;
2297}
2298
2300 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2301 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2302 "Expected an unmerge");
2303 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2304 "Not enough operands to replace all defs");
2305 unsigned NumElems = MI.getNumOperands() - 1;
2306
2307 LLT SrcTy = MRI.getType(Operands[0]);
2308 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2309 bool CanReuseInputDirectly = DstTy == SrcTy;
2310 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2311 Register DstReg = MI.getOperand(Idx).getReg();
2312 Register SrcReg = Operands[Idx];
2313
2314 // This combine may run after RegBankSelect, so we need to be aware of
2315 // register banks.
2316 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2317 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2318 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2319 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2320 }
2321
2322 if (CanReuseInputDirectly)
2323 replaceRegWith(MRI, DstReg, SrcReg);
2324 else
2325 Builder.buildCast(DstReg, SrcReg);
2326 }
2327 MI.eraseFromParent();
2328}
2329
2331 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2332 unsigned SrcIdx = MI.getNumOperands() - 1;
2333 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2334 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2335 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2336 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2337 return false;
2338 // Break down the big constant in smaller ones.
2339 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2340 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2341 ? CstVal.getCImm()->getValue()
2342 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2343
2344 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2345 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2346 // Unmerge a constant.
2347 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2348 Csts.emplace_back(Val.trunc(ShiftAmt));
2349 Val = Val.lshr(ShiftAmt);
2350 }
2351
2352 return true;
2353}
2354
2356 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2357 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2358 "Expected an unmerge");
2359 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2360 "Not enough operands to replace all defs");
2361 unsigned NumElems = MI.getNumOperands() - 1;
2362 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2363 Register DstReg = MI.getOperand(Idx).getReg();
2364 Builder.buildConstant(DstReg, Csts[Idx]);
2365 }
2366
2367 MI.eraseFromParent();
2368}
2369
2372 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2373 unsigned SrcIdx = MI.getNumOperands() - 1;
2374 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2375 MatchInfo = [&MI](MachineIRBuilder &B) {
2376 unsigned NumElems = MI.getNumOperands() - 1;
2377 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2378 Register DstReg = MI.getOperand(Idx).getReg();
2379 B.buildUndef(DstReg);
2380 }
2381 };
2382 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2383}
2384
2386 MachineInstr &MI) const {
2387 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2388 "Expected an unmerge");
2389 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2390 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2391 return false;
2392 // Check that all the lanes are dead except the first one.
2393 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2394 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2395 return false;
2396 }
2397 return true;
2398}
2399
2401 MachineInstr &MI) const {
2402 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2403 Register Dst0Reg = MI.getOperand(0).getReg();
2404 Builder.buildTrunc(Dst0Reg, SrcReg);
2405 MI.eraseFromParent();
2406}
2407
2409 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2410 "Expected an unmerge");
2411 Register Dst0Reg = MI.getOperand(0).getReg();
2412 LLT Dst0Ty = MRI.getType(Dst0Reg);
2413 // G_ZEXT on vector applies to each lane, so it will
2414 // affect all destinations. Therefore we won't be able
2415 // to simplify the unmerge to just the first definition.
2416 if (Dst0Ty.isVector())
2417 return false;
2418 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2419 LLT SrcTy = MRI.getType(SrcReg);
2420 if (SrcTy.isVector())
2421 return false;
2422
2423 Register ZExtSrcReg;
2424 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2425 return false;
2426
2427 // Finally we can replace the first definition with
2428 // a zext of the source if the definition is big enough to hold
2429 // all of ZExtSrc bits.
2430 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2431 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2432}
2433
2435 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2436 "Expected an unmerge");
2437
2438 Register Dst0Reg = MI.getOperand(0).getReg();
2439
2440 MachineInstr *ZExtInstr =
2441 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2442 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2443 "Expecting a G_ZEXT");
2444
2445 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2446 LLT Dst0Ty = MRI.getType(Dst0Reg);
2447 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2448
2449 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2450 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2451 } else {
2452 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2453 "ZExt src doesn't fit in destination");
2454 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2455 }
2456
2457 Register ZeroReg;
2458 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2459 if (!ZeroReg)
2460 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2461 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2462 }
2463 MI.eraseFromParent();
2464}
2465
2467 unsigned TargetShiftSize,
2468 unsigned &ShiftVal) const {
2469 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2470 MI.getOpcode() == TargetOpcode::G_LSHR ||
2471 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2472
2473 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2474 if (Ty.isVector()) // TODO:
2475 return false;
2476
2477 // Don't narrow further than the requested size.
2478 unsigned Size = Ty.getSizeInBits();
2479 if (Size <= TargetShiftSize)
2480 return false;
2481
2482 auto MaybeImmVal =
2483 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2484 if (!MaybeImmVal)
2485 return false;
2486
2487 ShiftVal = MaybeImmVal->Value.getSExtValue();
2488 return ShiftVal >= Size / 2 && ShiftVal < Size;
2489}
2490
2492 MachineInstr &MI, const unsigned &ShiftVal) const {
2493 Register DstReg = MI.getOperand(0).getReg();
2494 Register SrcReg = MI.getOperand(1).getReg();
2495 LLT Ty = MRI.getType(SrcReg);
2496 unsigned Size = Ty.getSizeInBits();
2497 unsigned HalfSize = Size / 2;
2498 assert(ShiftVal >= HalfSize);
2499
2500 LLT HalfTy = LLT::scalar(HalfSize);
2501
2502 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2503 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2504
2505 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2506 Register Narrowed = Unmerge.getReg(1);
2507
2508 // dst = G_LSHR s64:x, C for C >= 32
2509 // =>
2510 // lo, hi = G_UNMERGE_VALUES x
2511 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2512
2513 if (NarrowShiftAmt != 0) {
2514 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2515 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2516 }
2517
2518 auto Zero = Builder.buildConstant(HalfTy, 0);
2519 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2520 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2521 Register Narrowed = Unmerge.getReg(0);
2522 // dst = G_SHL s64:x, C for C >= 32
2523 // =>
2524 // lo, hi = G_UNMERGE_VALUES x
2525 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2526 if (NarrowShiftAmt != 0) {
2527 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2528 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2529 }
2530
2531 auto Zero = Builder.buildConstant(HalfTy, 0);
2532 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2533 } else {
2534 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2535 auto Hi = Builder.buildAShr(
2536 HalfTy, Unmerge.getReg(1),
2537 Builder.buildConstant(HalfTy, HalfSize - 1));
2538
2539 if (ShiftVal == HalfSize) {
2540 // (G_ASHR i64:x, 32) ->
2541 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2542 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2543 } else if (ShiftVal == Size - 1) {
2544 // Don't need a second shift.
2545 // (G_ASHR i64:x, 63) ->
2546 // %narrowed = (G_ASHR hi_32(x), 31)
2547 // G_MERGE_VALUES %narrowed, %narrowed
2548 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2549 } else {
2550 auto Lo = Builder.buildAShr(
2551 HalfTy, Unmerge.getReg(1),
2552 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2553
2554 // (G_ASHR i64:x, C) ->, for C >= 32
2555 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2556 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2557 }
2558 }
2559
2560 MI.eraseFromParent();
2561}
2562
2564 MachineInstr &MI, unsigned TargetShiftAmount) const {
2565 unsigned ShiftAmt;
2566 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2567 applyCombineShiftToUnmerge(MI, ShiftAmt);
2568 return true;
2569 }
2570
2571 return false;
2572}
2573
2575 Register &Reg) const {
2576 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2577 Register DstReg = MI.getOperand(0).getReg();
2578 LLT DstTy = MRI.getType(DstReg);
2579 Register SrcReg = MI.getOperand(1).getReg();
2580 return mi_match(SrcReg, MRI,
2581 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2582}
2583
2585 Register &Reg) const {
2586 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2587 Register DstReg = MI.getOperand(0).getReg();
2588 Builder.buildCopy(DstReg, Reg);
2589 MI.eraseFromParent();
2590}
2591
2593 Register &Reg) const {
2594 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2595 Register DstReg = MI.getOperand(0).getReg();
2596 Builder.buildZExtOrTrunc(DstReg, Reg);
2597 MI.eraseFromParent();
2598}
2599
2601 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2602 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2603 Register LHS = MI.getOperand(1).getReg();
2604 Register RHS = MI.getOperand(2).getReg();
2605 LLT IntTy = MRI.getType(LHS);
2606
2607 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2608 // instruction.
2609 PtrReg.second = false;
2610 for (Register SrcReg : {LHS, RHS}) {
2611 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2612 // Don't handle cases where the integer is implicitly converted to the
2613 // pointer width.
2614 LLT PtrTy = MRI.getType(PtrReg.first);
2615 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2616 return true;
2617 }
2618
2619 PtrReg.second = true;
2620 }
2621
2622 return false;
2623}
2624
2626 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2627 Register Dst = MI.getOperand(0).getReg();
2628 Register LHS = MI.getOperand(1).getReg();
2629 Register RHS = MI.getOperand(2).getReg();
2630
2631 const bool DoCommute = PtrReg.second;
2632 if (DoCommute)
2633 std::swap(LHS, RHS);
2634 LHS = PtrReg.first;
2635
2636 LLT PtrTy = MRI.getType(LHS);
2637
2638 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2639 Builder.buildPtrToInt(Dst, PtrAdd);
2640 MI.eraseFromParent();
2641}
2642
2644 APInt &NewCst) const {
2645 auto &PtrAdd = cast<GPtrAdd>(MI);
2646 Register LHS = PtrAdd.getBaseReg();
2647 Register RHS = PtrAdd.getOffsetReg();
2648 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2649
2650 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2651 APInt Cst;
2652 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2653 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2654 // G_INTTOPTR uses zero-extension
2655 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2656 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2657 return true;
2658 }
2659 }
2660
2661 return false;
2662}
2663
2665 APInt &NewCst) const {
2666 auto &PtrAdd = cast<GPtrAdd>(MI);
2667 Register Dst = PtrAdd.getReg(0);
2668
2669 Builder.buildConstant(Dst, NewCst);
2670 PtrAdd.eraseFromParent();
2671}
2672
2674 Register &Reg) const {
2675 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2676 Register DstReg = MI.getOperand(0).getReg();
2677 Register SrcReg = MI.getOperand(1).getReg();
2678 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2679 if (OriginalSrcReg.isValid())
2680 SrcReg = OriginalSrcReg;
2681 LLT DstTy = MRI.getType(DstReg);
2682 return mi_match(SrcReg, MRI,
2683 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2684 canReplaceReg(DstReg, Reg, MRI);
2685}
2686
2688 Register &Reg) const {
2689 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2690 Register DstReg = MI.getOperand(0).getReg();
2691 Register SrcReg = MI.getOperand(1).getReg();
2692 LLT DstTy = MRI.getType(DstReg);
2693 if (mi_match(SrcReg, MRI,
2694 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2695 canReplaceReg(DstReg, Reg, MRI)) {
2696 unsigned DstSize = DstTy.getScalarSizeInBits();
2697 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2698 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2699 }
2700 return false;
2701}
2702
2704 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2705 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2706
2707 // ShiftTy > 32 > TruncTy -> 32
2708 if (ShiftSize > 32 && TruncSize < 32)
2709 return ShiftTy.changeElementSize(32);
2710
2711 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2712 // Some targets like it, some don't, some only like it under certain
2713 // conditions/processor versions, etc.
2714 // A TL hook might be needed for this.
2715
2716 // Don't combine
2717 return ShiftTy;
2718}
2719
2721 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2722 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2723 Register DstReg = MI.getOperand(0).getReg();
2724 Register SrcReg = MI.getOperand(1).getReg();
2725
2726 if (!MRI.hasOneNonDBGUse(SrcReg))
2727 return false;
2728
2729 LLT SrcTy = MRI.getType(SrcReg);
2730 LLT DstTy = MRI.getType(DstReg);
2731
2732 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2733 const auto &TL = getTargetLowering();
2734
2735 LLT NewShiftTy;
2736 switch (SrcMI->getOpcode()) {
2737 default:
2738 return false;
2739 case TargetOpcode::G_SHL: {
2740 NewShiftTy = DstTy;
2741
2742 // Make sure new shift amount is legal.
2743 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2744 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2745 return false;
2746 break;
2747 }
2748 case TargetOpcode::G_LSHR:
2749 case TargetOpcode::G_ASHR: {
2750 // For right shifts, we conservatively do not do the transform if the TRUNC
2751 // has any STORE users. The reason is that if we change the type of the
2752 // shift, we may break the truncstore combine.
2753 //
2754 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2755 for (auto &User : MRI.use_instructions(DstReg))
2756 if (User.getOpcode() == TargetOpcode::G_STORE)
2757 return false;
2758
2759 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2760 if (NewShiftTy == SrcTy)
2761 return false;
2762
2763 // Make sure we won't lose information by truncating the high bits.
2764 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2765 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2766 DstTy.getScalarSizeInBits()))
2767 return false;
2768 break;
2769 }
2770 }
2771
2773 {SrcMI->getOpcode(),
2774 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2775 return false;
2776
2777 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2778 return true;
2779}
2780
2782 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2783 MachineInstr *ShiftMI = MatchInfo.first;
2784 LLT NewShiftTy = MatchInfo.second;
2785
2786 Register Dst = MI.getOperand(0).getReg();
2787 LLT DstTy = MRI.getType(Dst);
2788
2789 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2790 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2791 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2792
2793 Register NewShift =
2794 Builder
2795 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2796 .getReg(0);
2797
2798 if (NewShiftTy == DstTy)
2799 replaceRegWith(MRI, Dst, NewShift);
2800 else
2801 Builder.buildTrunc(Dst, NewShift);
2802
2803 eraseInst(MI);
2804}
2805
2807 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2808 return MO.isReg() &&
2809 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2810 });
2811}
2812
2814 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2815 return !MO.isReg() ||
2816 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2817 });
2818}
2819
2821 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2822 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2823 return all_of(Mask, [](int Elt) { return Elt < 0; });
2824}
2825
2827 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2828 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2829 MRI);
2830}
2831
2833 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2834 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2835 MRI);
2836}
2837
2839 MachineInstr &MI) const {
2840 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2841 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2842 "Expected an insert/extract element op");
2843 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2844 if (VecTy.isScalableVector())
2845 return false;
2846
2847 unsigned IdxIdx =
2848 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2849 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2850 if (!Idx)
2851 return false;
2852 return Idx->getZExtValue() >= VecTy.getNumElements();
2853}
2854
2856 unsigned &OpIdx) const {
2857 GSelect &SelMI = cast<GSelect>(MI);
2858 auto Cst =
2859 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2860 if (!Cst)
2861 return false;
2862 OpIdx = Cst->isZero() ? 3 : 2;
2863 return true;
2864}
2865
2866void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2867
2869 const MachineOperand &MOP2) const {
2870 if (!MOP1.isReg() || !MOP2.isReg())
2871 return false;
2872 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2873 if (!InstAndDef1)
2874 return false;
2875 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2876 if (!InstAndDef2)
2877 return false;
2878 MachineInstr *I1 = InstAndDef1->MI;
2879 MachineInstr *I2 = InstAndDef2->MI;
2880
2881 // Handle a case like this:
2882 //
2883 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2884 //
2885 // Even though %0 and %1 are produced by the same instruction they are not
2886 // the same values.
2887 if (I1 == I2)
2888 return MOP1.getReg() == MOP2.getReg();
2889
2890 // If we have an instruction which loads or stores, we can't guarantee that
2891 // it is identical.
2892 //
2893 // For example, we may have
2894 //
2895 // %x1 = G_LOAD %addr (load N from @somewhere)
2896 // ...
2897 // call @foo
2898 // ...
2899 // %x2 = G_LOAD %addr (load N from @somewhere)
2900 // ...
2901 // %or = G_OR %x1, %x2
2902 //
2903 // It's possible that @foo will modify whatever lives at the address we're
2904 // loading from. To be safe, let's just assume that all loads and stores
2905 // are different (unless we have something which is guaranteed to not
2906 // change.)
2907 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2908 return false;
2909
2910 // If both instructions are loads or stores, they are equal only if both
2911 // are dereferenceable invariant loads with the same number of bits.
2912 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2915 if (!LS1 || !LS2)
2916 return false;
2917
2918 if (!I2->isDereferenceableInvariantLoad() ||
2919 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2920 return false;
2921 }
2922
2923 // Check for physical registers on the instructions first to avoid cases
2924 // like this:
2925 //
2926 // %a = COPY $physreg
2927 // ...
2928 // SOMETHING implicit-def $physreg
2929 // ...
2930 // %b = COPY $physreg
2931 //
2932 // These copies are not equivalent.
2933 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2934 return MO.isReg() && MO.getReg().isPhysical();
2935 })) {
2936 // Check if we have a case like this:
2937 //
2938 // %a = COPY $physreg
2939 // %b = COPY %a
2940 //
2941 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2942 // From that, we know that they must have the same value, since they must
2943 // have come from the same COPY.
2944 return I1->isIdenticalTo(*I2);
2945 }
2946
2947 // We don't have any physical registers, so we don't necessarily need the
2948 // same vreg defs.
2949 //
2950 // On the off-chance that there's some target instruction feeding into the
2951 // instruction, let's use produceSameValue instead of isIdenticalTo.
2952 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2953 // Handle instructions with multiple defs that produce same values. Values
2954 // are same for operands with same index.
2955 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2956 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2957 // I1 and I2 are different instructions but produce same values,
2958 // %1 and %6 are same, %1 and %7 are not the same value.
2959 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2960 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2961 }
2962 return false;
2963}
2964
2966 int64_t C) const {
2967 if (!MOP.isReg())
2968 return false;
2969 auto *MI = MRI.getVRegDef(MOP.getReg());
2970 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2971 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2972 MaybeCst->getSExtValue() == C;
2973}
2974
2976 double C) const {
2977 if (!MOP.isReg())
2978 return false;
2979 std::optional<FPValueAndVReg> MaybeCst;
2980 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2981 return false;
2982
2983 return MaybeCst->Value.isExactlyValue(C);
2984}
2985
2987 unsigned OpIdx) const {
2988 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2989 Register OldReg = MI.getOperand(0).getReg();
2990 Register Replacement = MI.getOperand(OpIdx).getReg();
2991 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2992 replaceRegWith(MRI, OldReg, Replacement);
2993 MI.eraseFromParent();
2994}
2995
2997 Register Replacement) const {
2998 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2999 Register OldReg = MI.getOperand(0).getReg();
3000 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3001 replaceRegWith(MRI, OldReg, Replacement);
3002 MI.eraseFromParent();
3003}
3004
3006 unsigned ConstIdx) const {
3007 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3008 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3009
3010 // Get the shift amount
3011 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3012 if (!VRegAndVal)
3013 return false;
3014
3015 // Return true of shift amount >= Bitwidth
3016 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3017}
3018
3020 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3021 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3022 "This is not a funnel shift operation");
3023
3024 Register ConstReg = MI.getOperand(3).getReg();
3025 LLT ConstTy = MRI.getType(ConstReg);
3026 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3027
3028 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3029 assert((VRegAndVal) && "Value is not a constant");
3030
3031 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3032 APInt NewConst = VRegAndVal->Value.urem(
3033 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3034
3035 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3036 Builder.buildInstr(
3037 MI.getOpcode(), {MI.getOperand(0)},
3038 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3039
3040 MI.eraseFromParent();
3041}
3042
3044 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3045 // Match (cond ? x : x)
3046 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3047 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3048 MRI);
3049}
3050
3052 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3053 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3054 MRI);
3055}
3056
3058 unsigned OpIdx) const {
3059 MachineOperand &MO = MI.getOperand(OpIdx);
3060 return MO.isReg() &&
3061 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3062}
3063
3065 unsigned OpIdx) const {
3066 MachineOperand &MO = MI.getOperand(OpIdx);
3067 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3068}
3069
3071 double C) const {
3072 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3073 Builder.buildFConstant(MI.getOperand(0), C);
3074 MI.eraseFromParent();
3075}
3076
3078 int64_t C) const {
3079 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3080 Builder.buildConstant(MI.getOperand(0), C);
3081 MI.eraseFromParent();
3082}
3083
3085 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3086 Builder.buildConstant(MI.getOperand(0), C);
3087 MI.eraseFromParent();
3088}
3089
3091 ConstantFP *CFP) const {
3092 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3093 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3094 MI.eraseFromParent();
3095}
3096
3098 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3099 Builder.buildUndef(MI.getOperand(0));
3100 MI.eraseFromParent();
3101}
3102
3104 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3105 Register LHS = MI.getOperand(1).getReg();
3106 Register RHS = MI.getOperand(2).getReg();
3107 Register &NewLHS = std::get<0>(MatchInfo);
3108 Register &NewRHS = std::get<1>(MatchInfo);
3109
3110 // Helper lambda to check for opportunities for
3111 // ((0-A) + B) -> B - A
3112 // (A + (0-B)) -> A - B
3113 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3114 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3115 return false;
3116 NewLHS = MaybeNewLHS;
3117 return true;
3118 };
3119
3120 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3121}
3122
3124 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3125 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3126 "Invalid opcode");
3127 Register DstReg = MI.getOperand(0).getReg();
3128 LLT DstTy = MRI.getType(DstReg);
3129 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3130
3131 if (DstTy.isScalableVector())
3132 return false;
3133
3134 unsigned NumElts = DstTy.getNumElements();
3135 // If this MI is part of a sequence of insert_vec_elts, then
3136 // don't do the combine in the middle of the sequence.
3137 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3138 TargetOpcode::G_INSERT_VECTOR_ELT)
3139 return false;
3140 MachineInstr *CurrInst = &MI;
3141 MachineInstr *TmpInst;
3142 int64_t IntImm;
3143 Register TmpReg;
3144 MatchInfo.resize(NumElts);
3145 while (mi_match(
3146 CurrInst->getOperand(0).getReg(), MRI,
3147 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3148 if (IntImm >= NumElts || IntImm < 0)
3149 return false;
3150 if (!MatchInfo[IntImm])
3151 MatchInfo[IntImm] = TmpReg;
3152 CurrInst = TmpInst;
3153 }
3154 // Variable index.
3155 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3156 return false;
3157 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3158 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3159 if (!MatchInfo[I - 1].isValid())
3160 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3161 }
3162 return true;
3163 }
3164 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3165 // overwritten, bail out.
3166 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3167 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3168}
3169
3171 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3172 Register UndefReg;
3173 auto GetUndef = [&]() {
3174 if (UndefReg)
3175 return UndefReg;
3176 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3177 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3178 return UndefReg;
3179 };
3180 for (Register &Reg : MatchInfo) {
3181 if (!Reg)
3182 Reg = GetUndef();
3183 }
3184 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3185 MI.eraseFromParent();
3186}
3187
3189 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3190 Register SubLHS, SubRHS;
3191 std::tie(SubLHS, SubRHS) = MatchInfo;
3192 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3193 MI.eraseFromParent();
3194}
3195
3196bool CombinerHelper::matchBinopWithNegInner(Register MInner, Register Other,
3197 unsigned RootOpc, Register Dst,
3198 LLT Ty,
3199 BuildFnTy &MatchInfo) const {
3200 /// Helper function for matchBinopWithNeg: tries to match one commuted form
3201 /// of `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`.
3202 MachineInstr *InnerDef = MRI.getVRegDef(MInner);
3203 if (!InnerDef)
3204 return false;
3205
3206 unsigned InnerOpc = InnerDef->getOpcode();
3207 if (InnerOpc != TargetOpcode::G_ADD && InnerOpc != TargetOpcode::G_SUB)
3208 return false;
3209
3210 if (!MRI.hasOneNonDBGUse(MInner))
3211 return false;
3212
3213 Register InnerLHS = InnerDef->getOperand(1).getReg();
3214 Register InnerRHS = InnerDef->getOperand(2).getReg();
3215 Register NotSrc;
3216 Register B, C;
3217
3218 // Check if either operand is ~b
3219 auto TryMatch = [&](Register MaybeNot, Register Other) {
3220 if (mi_match(MaybeNot, MRI, m_Not(m_Reg(NotSrc)))) {
3221 if (!MRI.hasOneNonDBGUse(MaybeNot))
3222 return false;
3223 B = NotSrc;
3224 C = Other;
3225 return true;
3226 }
3227 return false;
3228 };
3229
3230 if (!TryMatch(InnerLHS, InnerRHS) && !TryMatch(InnerRHS, InnerLHS))
3231 return false;
3232
3233 // Flip add/sub
3234 unsigned FlippedOpc = (InnerOpc == TargetOpcode::G_ADD) ? TargetOpcode::G_SUB
3235 : TargetOpcode::G_ADD;
3236
3237 Register A = Other;
3238 MatchInfo = [=](MachineIRBuilder &Builder) {
3239 auto NewInner = Builder.buildInstr(FlippedOpc, {Ty}, {B, C});
3240 auto NewNot = Builder.buildNot(Ty, NewInner);
3241 Builder.buildInstr(RootOpc, {Dst}, {A, NewNot});
3242 };
3243 return true;
3244}
3245
3247 BuildFnTy &MatchInfo) const {
3248 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
3249 // Root MI is one of G_AND, G_OR, G_XOR.
3250 // We also look for commuted forms of operations. Pattern shouldn't apply
3251 // if there are multiple reasons of inner operations.
3252
3253 unsigned RootOpc = MI.getOpcode();
3254 Register Dst = MI.getOperand(0).getReg();
3255 LLT Ty = MRI.getType(Dst);
3256
3257 Register LHS = MI.getOperand(1).getReg();
3258 Register RHS = MI.getOperand(2).getReg();
3259 // Check the commuted and uncommuted forms of the operation.
3260 return matchBinopWithNegInner(LHS, RHS, RootOpc, Dst, Ty, MatchInfo) ||
3261 matchBinopWithNegInner(RHS, LHS, RootOpc, Dst, Ty, MatchInfo);
3262}
3263
3265 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3266 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3267 //
3268 // Creates the new hand + logic instruction (but does not insert them.)
3269 //
3270 // On success, MatchInfo is populated with the new instructions. These are
3271 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3272 unsigned LogicOpcode = MI.getOpcode();
3273 assert(LogicOpcode == TargetOpcode::G_AND ||
3274 LogicOpcode == TargetOpcode::G_OR ||
3275 LogicOpcode == TargetOpcode::G_XOR);
3276 MachineIRBuilder MIB(MI);
3277 Register Dst = MI.getOperand(0).getReg();
3278 Register LHSReg = MI.getOperand(1).getReg();
3279 Register RHSReg = MI.getOperand(2).getReg();
3280
3281 // Don't recompute anything.
3282 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3283 return false;
3284
3285 // Make sure we have (hand x, ...), (hand y, ...)
3286 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3287 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3288 if (!LeftHandInst || !RightHandInst)
3289 return false;
3290 unsigned HandOpcode = LeftHandInst->getOpcode();
3291 if (HandOpcode != RightHandInst->getOpcode())
3292 return false;
3293 if (LeftHandInst->getNumOperands() < 2 ||
3294 !LeftHandInst->getOperand(1).isReg() ||
3295 RightHandInst->getNumOperands() < 2 ||
3296 !RightHandInst->getOperand(1).isReg())
3297 return false;
3298
3299 // Make sure the types match up, and if we're doing this post-legalization,
3300 // we end up with legal types.
3301 Register X = LeftHandInst->getOperand(1).getReg();
3302 Register Y = RightHandInst->getOperand(1).getReg();
3303 LLT XTy = MRI.getType(X);
3304 LLT YTy = MRI.getType(Y);
3305 if (!XTy.isValid() || XTy != YTy)
3306 return false;
3307
3308 // Optional extra source register.
3309 Register ExtraHandOpSrcReg;
3310 switch (HandOpcode) {
3311 default:
3312 return false;
3313 case TargetOpcode::G_ANYEXT:
3314 case TargetOpcode::G_SEXT:
3315 case TargetOpcode::G_ZEXT: {
3316 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3317 break;
3318 }
3319 case TargetOpcode::G_TRUNC: {
3320 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3321 const MachineFunction *MF = MI.getMF();
3322 LLVMContext &Ctx = MF->getFunction().getContext();
3323
3324 LLT DstTy = MRI.getType(Dst);
3325 const TargetLowering &TLI = getTargetLowering();
3326
3327 // Be extra careful sinking truncate. If it's free, there's no benefit in
3328 // widening a binop.
3329 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3330 return false;
3331 break;
3332 }
3333 case TargetOpcode::G_AND:
3334 case TargetOpcode::G_ASHR:
3335 case TargetOpcode::G_LSHR:
3336 case TargetOpcode::G_SHL: {
3337 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3338 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3339 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3340 return false;
3341 ExtraHandOpSrcReg = ZOp.getReg();
3342 break;
3343 }
3344 }
3345
3346 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3347 return false;
3348
3349 // Record the steps to build the new instructions.
3350 //
3351 // Steps to build (logic x, y)
3352 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3353 OperandBuildSteps LogicBuildSteps = {
3354 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3355 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3356 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3357 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3358
3359 // Steps to build hand (logic x, y), ...z
3360 OperandBuildSteps HandBuildSteps = {
3361 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3362 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3363 if (ExtraHandOpSrcReg.isValid())
3364 HandBuildSteps.push_back(
3365 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3366 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3367
3368 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3369 return true;
3370}
3371
3373 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3374 assert(MatchInfo.InstrsToBuild.size() &&
3375 "Expected at least one instr to build?");
3376 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3377 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3378 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3379 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3380 for (auto &OperandFn : InstrToBuild.OperandFns)
3381 OperandFn(Instr);
3382 }
3383 MI.eraseFromParent();
3384}
3385
3387 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3388 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3389 int64_t ShlCst, AshrCst;
3390 Register Src;
3391 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3392 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3393 m_ICstOrSplat(AshrCst))))
3394 return false;
3395 if (ShlCst != AshrCst)
3396 return false;
3398 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3399 return false;
3400 MatchInfo = std::make_tuple(Src, ShlCst);
3401 return true;
3402}
3403
3405 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3406 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3407 Register Src;
3408 int64_t ShiftAmt;
3409 std::tie(Src, ShiftAmt) = MatchInfo;
3410 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3411 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3412 MI.eraseFromParent();
3413}
3414
3415/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3418 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3419 assert(MI.getOpcode() == TargetOpcode::G_AND);
3420
3421 Register Dst = MI.getOperand(0).getReg();
3422 LLT Ty = MRI.getType(Dst);
3423
3424 Register R;
3425 int64_t C1;
3426 int64_t C2;
3427 if (!mi_match(
3428 Dst, MRI,
3429 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3430 return false;
3431
3432 MatchInfo = [=](MachineIRBuilder &B) {
3433 if (C1 & C2) {
3434 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3435 return;
3436 }
3437 auto Zero = B.buildConstant(Ty, 0);
3438 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3439 };
3440 return true;
3441}
3442
3444 Register &Replacement) const {
3445 // Given
3446 //
3447 // %y:_(sN) = G_SOMETHING
3448 // %x:_(sN) = G_SOMETHING
3449 // %res:_(sN) = G_AND %x, %y
3450 //
3451 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3452 //
3453 // Patterns like this can appear as a result of legalization. E.g.
3454 //
3455 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3456 // %one:_(s32) = G_CONSTANT i32 1
3457 // %and:_(s32) = G_AND %cmp, %one
3458 //
3459 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3460 assert(MI.getOpcode() == TargetOpcode::G_AND);
3461 if (!VT)
3462 return false;
3463
3464 Register AndDst = MI.getOperand(0).getReg();
3465 Register LHS = MI.getOperand(1).getReg();
3466 Register RHS = MI.getOperand(2).getReg();
3467
3468 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3469 // we can't do anything. If we do, then it depends on whether we have
3470 // KnownBits on the LHS.
3471 KnownBits RHSBits = VT->getKnownBits(RHS);
3472 if (RHSBits.isUnknown())
3473 return false;
3474
3475 KnownBits LHSBits = VT->getKnownBits(LHS);
3476
3477 // Check that x & Mask == x.
3478 // x & 1 == x, always
3479 // x & 0 == x, only if x is also 0
3480 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3481 //
3482 // Check if we can replace AndDst with the LHS of the G_AND
3483 if (canReplaceReg(AndDst, LHS, MRI) &&
3484 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3485 Replacement = LHS;
3486 return true;
3487 }
3488
3489 // Check if we can replace AndDst with the RHS of the G_AND
3490 if (canReplaceReg(AndDst, RHS, MRI) &&
3491 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3492 Replacement = RHS;
3493 return true;
3494 }
3495
3496 return false;
3497}
3498
3500 Register &Replacement) const {
3501 // Given
3502 //
3503 // %y:_(sN) = G_SOMETHING
3504 // %x:_(sN) = G_SOMETHING
3505 // %res:_(sN) = G_OR %x, %y
3506 //
3507 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3508 assert(MI.getOpcode() == TargetOpcode::G_OR);
3509 if (!VT)
3510 return false;
3511
3512 Register OrDst = MI.getOperand(0).getReg();
3513 Register LHS = MI.getOperand(1).getReg();
3514 Register RHS = MI.getOperand(2).getReg();
3515
3516 KnownBits LHSBits = VT->getKnownBits(LHS);
3517 KnownBits RHSBits = VT->getKnownBits(RHS);
3518
3519 // Check that x | Mask == x.
3520 // x | 0 == x, always
3521 // x | 1 == x, only if x is also 1
3522 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3523 //
3524 // Check if we can replace OrDst with the LHS of the G_OR
3525 if (canReplaceReg(OrDst, LHS, MRI) &&
3526 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3527 Replacement = LHS;
3528 return true;
3529 }
3530
3531 // Check if we can replace OrDst with the RHS of the G_OR
3532 if (canReplaceReg(OrDst, RHS, MRI) &&
3533 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3534 Replacement = RHS;
3535 return true;
3536 }
3537
3538 return false;
3539}
3540
3542 // If the input is already sign extended, just drop the extension.
3543 Register Src = MI.getOperand(1).getReg();
3544 unsigned ExtBits = MI.getOperand(2).getImm();
3545 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3546 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3547}
3548
3549static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3550 int64_t Cst, bool IsVector, bool IsFP) {
3551 // For i1, Cst will always be -1 regardless of boolean contents.
3552 return (ScalarSizeBits == 1 && Cst == -1) ||
3553 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3554}
3555
3556// This pattern aims to match the following shape to avoid extra mov
3557// instructions
3558// G_BUILD_VECTOR(
3559// G_UNMERGE_VALUES(src, 0)
3560// G_UNMERGE_VALUES(src, 1)
3561// G_IMPLICIT_DEF
3562// G_IMPLICIT_DEF
3563// )
3564// ->
3565// G_CONCAT_VECTORS(
3566// src,
3567// undef
3568// )
3571 Register &UnmergeSrc) const {
3572 auto &BV = cast<GBuildVector>(MI);
3573
3574 unsigned BuildUseCount = BV.getNumSources();
3575 if (BuildUseCount % 2 != 0)
3576 return false;
3577
3578 unsigned NumUnmerge = BuildUseCount / 2;
3579
3580 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3581
3582 // Check the first operand is an unmerge and has the correct number of
3583 // operands
3584 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3585 return false;
3586
3587 UnmergeSrc = Unmerge->getSourceReg();
3588
3589 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3590 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3591
3592 if (!UnmergeSrcTy.isVector())
3593 return false;
3594
3595 // Ensure we only generate legal instructions post-legalizer
3596 if (!IsPreLegalize &&
3597 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3598 return false;
3599
3600 // Check that all of the operands before the midpoint come from the same
3601 // unmerge and are in the same order as they are used in the build_vector
3602 for (unsigned I = 0; I < NumUnmerge; ++I) {
3603 auto MaybeUnmergeReg = BV.getSourceReg(I);
3604 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3605
3606 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3607 return false;
3608
3609 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3610 return false;
3611 }
3612
3613 // Check that all of the unmerged values are used
3614 if (Unmerge->getNumDefs() != NumUnmerge)
3615 return false;
3616
3617 // Check that all of the operands after the mid point are undefs.
3618 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3619 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3620
3621 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3622 return false;
3623 }
3624
3625 return true;
3626}
3627
3631 Register &UnmergeSrc) const {
3632 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3633 B.setInstrAndDebugLoc(MI);
3634
3635 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3636 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3637
3638 MI.eraseFromParent();
3639}
3640
3641// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3642// using vector truncates instead
3643//
3644// EXAMPLE:
3645// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3646// %T_a(i16) = G_TRUNC %a(i32)
3647// %T_b(i16) = G_TRUNC %b(i32)
3648// %Undef(i16) = G_IMPLICIT_DEF(i16)
3649// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3650//
3651// ===>
3652// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3653// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3654// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3655//
3656// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3658 Register &MatchInfo) const {
3659 auto BuildMI = cast<GBuildVector>(&MI);
3660 unsigned NumOperands = BuildMI->getNumSources();
3661 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3662
3663 // Check the G_BUILD_VECTOR sources
3664 unsigned I;
3665 MachineInstr *UnmergeMI = nullptr;
3666
3667 // Check all source TRUNCs come from the same UNMERGE instruction
3668 // and that the element order matches (BUILD_VECTOR position I
3669 // corresponds to UNMERGE result I)
3670 for (I = 0; I < NumOperands; ++I) {
3671 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3672 auto SrcMIOpc = SrcMI->getOpcode();
3673
3674 // Check if the G_TRUNC instructions all come from the same MI
3675 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3676 Register TruncSrcReg = SrcMI->getOperand(1).getReg();
3677 if (!UnmergeMI) {
3678 UnmergeMI = MRI.getVRegDef(TruncSrcReg);
3679 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3680 return false;
3681 } else {
3682 auto UnmergeSrcMI = MRI.getVRegDef(TruncSrcReg);
3683 if (UnmergeMI != UnmergeSrcMI)
3684 return false;
3685 }
3686 // Verify element ordering: BUILD_VECTOR position I must use
3687 // UNMERGE result I, otherwise the fold would lose element reordering
3688 if (UnmergeMI->getOperand(I).getReg() != TruncSrcReg)
3689 return false;
3690 } else {
3691 break;
3692 }
3693 }
3694 if (I < 2)
3695 return false;
3696
3697 // Check the remaining source elements are only G_IMPLICIT_DEF
3698 for (; I < NumOperands; ++I) {
3699 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3700 auto SrcMIOpc = SrcMI->getOpcode();
3701
3702 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3703 return false;
3704 }
3705
3706 // Check the size of unmerge source
3707 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3708 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3709 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3710 return false;
3711
3712 // Check the unmerge source and destination element types match
3713 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3714 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3715 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3716 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3717 return false;
3718
3719 // Only generate legal instructions post-legalizer
3720 if (!IsPreLegalize) {
3721 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3722
3723 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3724 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3725 return false;
3726
3727 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3728 return false;
3729 }
3730
3731 return true;
3732}
3733
3735 Register &MatchInfo) const {
3736 Register MidReg;
3737 auto BuildMI = cast<GBuildVector>(&MI);
3738 Register DstReg = BuildMI->getReg(0);
3739 LLT DstTy = MRI.getType(DstReg);
3740 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3741 unsigned DstTyNumElt = DstTy.getNumElements();
3742 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3743
3744 // No need to pad vector if only G_TRUNC is needed
3745 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3746 MidReg = MatchInfo;
3747 } else {
3748 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3749 SmallVector<Register> ConcatRegs = {MatchInfo};
3750 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3751 ConcatRegs.push_back(UndefReg);
3752
3753 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3754 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3755 }
3756
3757 Builder.buildTrunc(DstReg, MidReg);
3758 MI.eraseFromParent();
3759}
3760
3762 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3763 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3764 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3765 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3766 Register XorSrc;
3767 Register CstReg;
3768 // We match xor(src, true) here.
3769 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3770 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3771 return false;
3772
3773 if (!MRI.hasOneNonDBGUse(XorSrc))
3774 return false;
3775
3776 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3777 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3778 // list of tree nodes to visit.
3779 RegsToNegate.push_back(XorSrc);
3780 // Remember whether the comparisons are all integer or all floating point.
3781 bool IsInt = false;
3782 bool IsFP = false;
3783 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3784 Register Reg = RegsToNegate[I];
3785 if (!MRI.hasOneNonDBGUse(Reg))
3786 return false;
3787 MachineInstr *Def = MRI.getVRegDef(Reg);
3788 switch (Def->getOpcode()) {
3789 default:
3790 // Don't match if the tree contains anything other than ANDs, ORs and
3791 // comparisons.
3792 return false;
3793 case TargetOpcode::G_ICMP:
3794 if (IsFP)
3795 return false;
3796 IsInt = true;
3797 // When we apply the combine we will invert the predicate.
3798 break;
3799 case TargetOpcode::G_FCMP:
3800 if (IsInt)
3801 return false;
3802 IsFP = true;
3803 // When we apply the combine we will invert the predicate.
3804 break;
3805 case TargetOpcode::G_AND:
3806 case TargetOpcode::G_OR:
3807 // Implement De Morgan's laws:
3808 // ~(x & y) -> ~x | ~y
3809 // ~(x | y) -> ~x & ~y
3810 // When we apply the combine we will change the opcode and recursively
3811 // negate the operands.
3812 RegsToNegate.push_back(Def->getOperand(1).getReg());
3813 RegsToNegate.push_back(Def->getOperand(2).getReg());
3814 break;
3815 }
3816 }
3817
3818 // Now we know whether the comparisons are integer or floating point, check
3819 // the constant in the xor.
3820 int64_t Cst;
3821 if (Ty.isVector()) {
3822 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3823 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3824 if (!MaybeCst)
3825 return false;
3826 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3827 return false;
3828 } else {
3829 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3830 return false;
3831 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3832 return false;
3833 }
3834
3835 return true;
3836}
3837
3839 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3840 for (Register Reg : RegsToNegate) {
3841 MachineInstr *Def = MRI.getVRegDef(Reg);
3842 Observer.changingInstr(*Def);
3843 // For each comparison, invert the opcode. For each AND and OR, change the
3844 // opcode.
3845 switch (Def->getOpcode()) {
3846 default:
3847 llvm_unreachable("Unexpected opcode");
3848 case TargetOpcode::G_ICMP:
3849 case TargetOpcode::G_FCMP: {
3850 MachineOperand &PredOp = Def->getOperand(1);
3853 PredOp.setPredicate(NewP);
3854 break;
3855 }
3856 case TargetOpcode::G_AND:
3857 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3858 break;
3859 case TargetOpcode::G_OR:
3860 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3861 break;
3862 }
3863 Observer.changedInstr(*Def);
3864 }
3865
3866 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3867 MI.eraseFromParent();
3868}
3869
3871 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3872 // Match (xor (and x, y), y) (or any of its commuted cases)
3873 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3874 Register &X = MatchInfo.first;
3875 Register &Y = MatchInfo.second;
3876 Register AndReg = MI.getOperand(1).getReg();
3877 Register SharedReg = MI.getOperand(2).getReg();
3878
3879 // Find a G_AND on either side of the G_XOR.
3880 // Look for one of
3881 //
3882 // (xor (and x, y), SharedReg)
3883 // (xor SharedReg, (and x, y))
3884 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3885 std::swap(AndReg, SharedReg);
3886 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3887 return false;
3888 }
3889
3890 // Only do this if we'll eliminate the G_AND.
3891 if (!MRI.hasOneNonDBGUse(AndReg))
3892 return false;
3893
3894 // We can combine if SharedReg is the same as either the LHS or RHS of the
3895 // G_AND.
3896 if (Y != SharedReg)
3897 std::swap(X, Y);
3898 return Y == SharedReg;
3899}
3900
3902 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3903 // Fold (xor (and x, y), y) -> (and (not x), y)
3904 Register X, Y;
3905 std::tie(X, Y) = MatchInfo;
3906 auto Not = Builder.buildNot(MRI.getType(X), X);
3907 Observer.changingInstr(MI);
3908 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3909 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3910 MI.getOperand(2).setReg(Y);
3911 Observer.changedInstr(MI);
3912}
3913
3915 auto &PtrAdd = cast<GPtrAdd>(MI);
3916 Register DstReg = PtrAdd.getReg(0);
3917 LLT Ty = MRI.getType(DstReg);
3918 const DataLayout &DL = Builder.getMF().getDataLayout();
3919
3920 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3921 return false;
3922
3923 if (Ty.isPointer()) {
3924 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3925 return ConstVal && *ConstVal == 0;
3926 }
3927
3928 assert(Ty.isVector() && "Expecting a vector type");
3929 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3930 return isBuildVectorAllZeros(*VecMI, MRI);
3931}
3932
3934 auto &PtrAdd = cast<GPtrAdd>(MI);
3935 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3936 PtrAdd.eraseFromParent();
3937}
3938
3939/// The second source operand is known to be a power of 2.
3941 Register DstReg = MI.getOperand(0).getReg();
3942 Register Src0 = MI.getOperand(1).getReg();
3943 Register Pow2Src1 = MI.getOperand(2).getReg();
3944 LLT Ty = MRI.getType(DstReg);
3945
3946 // Fold (urem x, pow2) -> (and x, pow2-1)
3947 auto NegOne = Builder.buildConstant(Ty, -1);
3948 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3949 Builder.buildAnd(DstReg, Src0, Add);
3950 MI.eraseFromParent();
3951}
3952
3954 unsigned &SelectOpNo) const {
3955 Register LHS = MI.getOperand(1).getReg();
3956 Register RHS = MI.getOperand(2).getReg();
3957
3958 Register OtherOperandReg = RHS;
3959 SelectOpNo = 1;
3960 MachineInstr *Select = MRI.getVRegDef(LHS);
3961
3962 // Don't do this unless the old select is going away. We want to eliminate the
3963 // binary operator, not replace a binop with a select.
3964 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3965 !MRI.hasOneNonDBGUse(LHS)) {
3966 OtherOperandReg = LHS;
3967 SelectOpNo = 2;
3968 Select = MRI.getVRegDef(RHS);
3969 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3970 !MRI.hasOneNonDBGUse(RHS))
3971 return false;
3972 }
3973
3974 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3975 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3976
3977 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3978 /*AllowFP*/ true,
3979 /*AllowOpaqueConstants*/ false))
3980 return false;
3981 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3982 /*AllowFP*/ true,
3983 /*AllowOpaqueConstants*/ false))
3984 return false;
3985
3986 unsigned BinOpcode = MI.getOpcode();
3987
3988 // We know that one of the operands is a select of constants. Now verify that
3989 // the other binary operator operand is either a constant, or we can handle a
3990 // variable.
3991 bool CanFoldNonConst =
3992 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3993 (isNullOrNullSplat(*SelectLHS, MRI) ||
3994 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3995 (isNullOrNullSplat(*SelectRHS, MRI) ||
3996 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3997 if (CanFoldNonConst)
3998 return true;
3999
4000 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
4001 /*AllowFP*/ true,
4002 /*AllowOpaqueConstants*/ false);
4003}
4004
4005/// \p SelectOperand is the operand in binary operator \p MI that is the select
4006/// to fold.
4008 MachineInstr &MI, const unsigned &SelectOperand) const {
4009 Register Dst = MI.getOperand(0).getReg();
4010 Register LHS = MI.getOperand(1).getReg();
4011 Register RHS = MI.getOperand(2).getReg();
4012 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
4013
4014 Register SelectCond = Select->getOperand(1).getReg();
4015 Register SelectTrue = Select->getOperand(2).getReg();
4016 Register SelectFalse = Select->getOperand(3).getReg();
4017
4018 LLT Ty = MRI.getType(Dst);
4019 unsigned BinOpcode = MI.getOpcode();
4020
4021 Register FoldTrue, FoldFalse;
4022
4023 // We have a select-of-constants followed by a binary operator with a
4024 // constant. Eliminate the binop by pulling the constant math into the select.
4025 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
4026 if (SelectOperand == 1) {
4027 // TODO: SelectionDAG verifies this actually constant folds before
4028 // committing to the combine.
4029
4030 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
4031 FoldFalse =
4032 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
4033 } else {
4034 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
4035 FoldFalse =
4036 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
4037 }
4038
4039 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
4040 MI.eraseFromParent();
4041}
4042
4043std::optional<SmallVector<Register, 8>>
4044CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
4045 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
4046 // We want to detect if Root is part of a tree which represents a bunch
4047 // of loads being merged into a larger load. We'll try to recognize patterns
4048 // like, for example:
4049 //
4050 // Reg Reg
4051 // \ /
4052 // OR_1 Reg
4053 // \ /
4054 // OR_2
4055 // \ Reg
4056 // .. /
4057 // Root
4058 //
4059 // Reg Reg Reg Reg
4060 // \ / \ /
4061 // OR_1 OR_2
4062 // \ /
4063 // \ /
4064 // ...
4065 // Root
4066 //
4067 // Each "Reg" may have been produced by a load + some arithmetic. This
4068 // function will save each of them.
4069 SmallVector<Register, 8> RegsToVisit;
4071
4072 // In the "worst" case, we're dealing with a load for each byte. So, there
4073 // are at most #bytes - 1 ORs.
4074 const unsigned MaxIter =
4075 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4076 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4077 if (Ors.empty())
4078 break;
4079 const MachineInstr *Curr = Ors.pop_back_val();
4080 Register OrLHS = Curr->getOperand(1).getReg();
4081 Register OrRHS = Curr->getOperand(2).getReg();
4082
4083 // In the combine, we want to elimate the entire tree.
4084 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4085 return std::nullopt;
4086
4087 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4088 // something that may be a load + arithmetic.
4089 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4090 Ors.push_back(Or);
4091 else
4092 RegsToVisit.push_back(OrLHS);
4093 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4094 Ors.push_back(Or);
4095 else
4096 RegsToVisit.push_back(OrRHS);
4097 }
4098
4099 // We're going to try and merge each register into a wider power-of-2 type,
4100 // so we ought to have an even number of registers.
4101 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4102 return std::nullopt;
4103 return RegsToVisit;
4104}
4105
4106/// Helper function for findLoadOffsetsForLoadOrCombine.
4107///
4108/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4109/// and then moving that value into a specific byte offset.
4110///
4111/// e.g. x[i] << 24
4112///
4113/// \returns The load instruction and the byte offset it is moved into.
4114static std::optional<std::pair<GZExtLoad *, int64_t>>
4115matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4116 const MachineRegisterInfo &MRI) {
4117 assert(MRI.hasOneNonDBGUse(Reg) &&
4118 "Expected Reg to only have one non-debug use?");
4119 Register MaybeLoad;
4120 int64_t Shift;
4121 if (!mi_match(Reg, MRI,
4122 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4123 Shift = 0;
4124 MaybeLoad = Reg;
4125 }
4126
4127 if (Shift % MemSizeInBits != 0)
4128 return std::nullopt;
4129
4130 // TODO: Handle other types of loads.
4131 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4132 if (!Load)
4133 return std::nullopt;
4134
4135 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4136 return std::nullopt;
4137
4138 return std::make_pair(Load, Shift / MemSizeInBits);
4139}
4140
4141std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4142CombinerHelper::findLoadOffsetsForLoadOrCombine(
4144 const SmallVector<Register, 8> &RegsToVisit,
4145 const unsigned MemSizeInBits) const {
4146
4147 // Each load found for the pattern. There should be one for each RegsToVisit.
4148 SmallSetVector<const MachineInstr *, 8> Loads;
4149
4150 // The lowest index used in any load. (The lowest "i" for each x[i].)
4151 int64_t LowestIdx = INT64_MAX;
4152
4153 // The load which uses the lowest index.
4154 GZExtLoad *LowestIdxLoad = nullptr;
4155
4156 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4157 SmallSet<int64_t, 8> SeenIdx;
4158
4159 // Ensure each load is in the same MBB.
4160 // TODO: Support multiple MachineBasicBlocks.
4161 MachineBasicBlock *MBB = nullptr;
4162 const MachineMemOperand *MMO = nullptr;
4163
4164 // Earliest instruction-order load in the pattern.
4165 GZExtLoad *EarliestLoad = nullptr;
4166
4167 // Latest instruction-order load in the pattern.
4168 GZExtLoad *LatestLoad = nullptr;
4169
4170 // Base pointer which every load should share.
4172
4173 // We want to find a load for each register. Each load should have some
4174 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4175 // track of the load which uses the lowest index. Later, we will check if we
4176 // can use its pointer in the final, combined load.
4177 for (auto Reg : RegsToVisit) {
4178 // Find the load, and find the position that it will end up in (e.g. a
4179 // shifted) value.
4180 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4181 if (!LoadAndPos)
4182 return std::nullopt;
4183 GZExtLoad *Load;
4184 int64_t DstPos;
4185 std::tie(Load, DstPos) = *LoadAndPos;
4186
4187 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4188 // it is difficult to check for stores/calls/etc between loads.
4189 MachineBasicBlock *LoadMBB = Load->getParent();
4190 if (!MBB)
4191 MBB = LoadMBB;
4192 if (LoadMBB != MBB)
4193 return std::nullopt;
4194
4195 // Make sure that the MachineMemOperands of every seen load are compatible.
4196 auto &LoadMMO = Load->getMMO();
4197 if (!MMO)
4198 MMO = &LoadMMO;
4199 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4200 return std::nullopt;
4201
4202 // Find out what the base pointer and index for the load is.
4203 Register LoadPtr;
4204 int64_t Idx;
4205 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4206 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4207 LoadPtr = Load->getOperand(1).getReg();
4208 Idx = 0;
4209 }
4210
4211 // Don't combine things like a[i], a[i] -> a bigger load.
4212 if (!SeenIdx.insert(Idx).second)
4213 return std::nullopt;
4214
4215 // Every load must share the same base pointer; don't combine things like:
4216 //
4217 // a[i], b[i + 1] -> a bigger load.
4218 if (!BasePtr.isValid())
4219 BasePtr = LoadPtr;
4220 if (BasePtr != LoadPtr)
4221 return std::nullopt;
4222
4223 if (Idx < LowestIdx) {
4224 LowestIdx = Idx;
4225 LowestIdxLoad = Load;
4226 }
4227
4228 // Keep track of the byte offset that this load ends up at. If we have seen
4229 // the byte offset, then stop here. We do not want to combine:
4230 //
4231 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4232 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4233 return std::nullopt;
4234 Loads.insert(Load);
4235
4236 // Keep track of the position of the earliest/latest loads in the pattern.
4237 // We will check that there are no load fold barriers between them later
4238 // on.
4239 //
4240 // FIXME: Is there a better way to check for load fold barriers?
4241 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4242 EarliestLoad = Load;
4243 if (!LatestLoad || dominates(*LatestLoad, *Load))
4244 LatestLoad = Load;
4245 }
4246
4247 // We found a load for each register. Let's check if each load satisfies the
4248 // pattern.
4249 assert(Loads.size() == RegsToVisit.size() &&
4250 "Expected to find a load for each register?");
4251 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4252 LatestLoad && "Expected at least two loads?");
4253
4254 // Check if there are any stores, calls, etc. between any of the loads. If
4255 // there are, then we can't safely perform the combine.
4256 //
4257 // MaxIter is chosen based off the (worst case) number of iterations it
4258 // typically takes to succeed in the LLVM test suite plus some padding.
4259 //
4260 // FIXME: Is there a better way to check for load fold barriers?
4261 const unsigned MaxIter = 20;
4262 unsigned Iter = 0;
4263 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4264 LatestLoad->getIterator())) {
4265 if (Loads.count(&MI))
4266 continue;
4267 if (MI.isLoadFoldBarrier())
4268 return std::nullopt;
4269 if (Iter++ == MaxIter)
4270 return std::nullopt;
4271 }
4272
4273 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4274}
4275
4278 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4279 assert(MI.getOpcode() == TargetOpcode::G_OR);
4280 MachineFunction &MF = *MI.getMF();
4281 // Assuming a little-endian target, transform:
4282 // s8 *a = ...
4283 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4284 // =>
4285 // s32 val = *((i32)a)
4286 //
4287 // s8 *a = ...
4288 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4289 // =>
4290 // s32 val = BSWAP(*((s32)a))
4291 Register Dst = MI.getOperand(0).getReg();
4292 LLT Ty = MRI.getType(Dst);
4293 if (Ty.isVector())
4294 return false;
4295
4296 // We need to combine at least two loads into this type. Since the smallest
4297 // possible load is into a byte, we need at least a 16-bit wide type.
4298 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4299 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4300 return false;
4301
4302 // Match a collection of non-OR instructions in the pattern.
4303 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4304 if (!RegsToVisit)
4305 return false;
4306
4307 // We have a collection of non-OR instructions. Figure out how wide each of
4308 // the small loads should be based off of the number of potential loads we
4309 // found.
4310 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4311 if (NarrowMemSizeInBits % 8 != 0)
4312 return false;
4313
4314 // Check if each register feeding into each OR is a load from the same
4315 // base pointer + some arithmetic.
4316 //
4317 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4318 //
4319 // Also verify that each of these ends up putting a[i] into the same memory
4320 // offset as a load into a wide type would.
4322 GZExtLoad *LowestIdxLoad, *LatestLoad;
4323 int64_t LowestIdx;
4324 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4325 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4326 if (!MaybeLoadInfo)
4327 return false;
4328 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4329
4330 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4331 // we found before, check if this corresponds to a big or little endian byte
4332 // pattern. If it does, then we can represent it using a load + possibly a
4333 // BSWAP.
4334 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4335 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4336 if (!IsBigEndian)
4337 return false;
4338 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4339 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4340 return false;
4341
4342 // Make sure that the load from the lowest index produces offset 0 in the
4343 // final value.
4344 //
4345 // This ensures that we won't combine something like this:
4346 //
4347 // load x[i] -> byte 2
4348 // load x[i+1] -> byte 0 ---> wide_load x[i]
4349 // load x[i+2] -> byte 1
4350 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4351 const unsigned ZeroByteOffset =
4352 *IsBigEndian
4353 ? bigEndianByteAt(NumLoadsInTy, 0)
4354 : littleEndianByteAt(NumLoadsInTy, 0);
4355 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4356 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4357 ZeroOffsetIdx->second != LowestIdx)
4358 return false;
4359
4360 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4361 // may not use index 0.
4362 Register Ptr = LowestIdxLoad->getPointerReg();
4363 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4364 LegalityQuery::MemDesc MMDesc(MMO);
4365 MMDesc.MemoryTy = Ty;
4367 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4368 return false;
4369 auto PtrInfo = MMO.getPointerInfo();
4370 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4371
4372 // Load must be allowed and fast on the target.
4374 auto &DL = MF.getDataLayout();
4375 unsigned Fast = 0;
4376 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4377 !Fast)
4378 return false;
4379
4380 MatchInfo = [=](MachineIRBuilder &MIB) {
4381 MIB.setInstrAndDebugLoc(*LatestLoad);
4382 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4383 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4384 if (NeedsBSwap)
4385 MIB.buildBSwap(Dst, LoadDst);
4386 };
4387 return true;
4388}
4389
4391 MachineInstr *&ExtMI) const {
4392 auto &PHI = cast<GPhi>(MI);
4393 Register DstReg = PHI.getReg(0);
4394
4395 // TODO: Extending a vector may be expensive, don't do this until heuristics
4396 // are better.
4397 if (MRI.getType(DstReg).isVector())
4398 return false;
4399
4400 // Try to match a phi, whose only use is an extend.
4401 if (!MRI.hasOneNonDBGUse(DstReg))
4402 return false;
4403 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4404 switch (ExtMI->getOpcode()) {
4405 case TargetOpcode::G_ANYEXT:
4406 return true; // G_ANYEXT is usually free.
4407 case TargetOpcode::G_ZEXT:
4408 case TargetOpcode::G_SEXT:
4409 break;
4410 default:
4411 return false;
4412 }
4413
4414 // If the target is likely to fold this extend away, don't propagate.
4415 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4416 return false;
4417
4418 // We don't want to propagate the extends unless there's a good chance that
4419 // they'll be optimized in some way.
4420 // Collect the unique incoming values.
4422 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4423 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4424 switch (DefMI->getOpcode()) {
4425 case TargetOpcode::G_LOAD:
4426 case TargetOpcode::G_TRUNC:
4427 case TargetOpcode::G_SEXT:
4428 case TargetOpcode::G_ZEXT:
4429 case TargetOpcode::G_ANYEXT:
4430 case TargetOpcode::G_CONSTANT:
4431 InSrcs.insert(DefMI);
4432 // Don't try to propagate if there are too many places to create new
4433 // extends, chances are it'll increase code size.
4434 if (InSrcs.size() > 2)
4435 return false;
4436 break;
4437 default:
4438 return false;
4439 }
4440 }
4441 return true;
4442}
4443
4445 MachineInstr *&ExtMI) const {
4446 auto &PHI = cast<GPhi>(MI);
4447 Register DstReg = ExtMI->getOperand(0).getReg();
4448 LLT ExtTy = MRI.getType(DstReg);
4449
4450 // Propagate the extension into the block of each incoming reg's block.
4451 // Use a SetVector here because PHIs can have duplicate edges, and we want
4452 // deterministic iteration order.
4455 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4456 auto SrcReg = PHI.getIncomingValue(I);
4457 auto *SrcMI = MRI.getVRegDef(SrcReg);
4458 if (!SrcMIs.insert(SrcMI))
4459 continue;
4460
4461 // Build an extend after each src inst.
4462 auto *MBB = SrcMI->getParent();
4463 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4464 if (InsertPt != MBB->end() && InsertPt->isPHI())
4465 InsertPt = MBB->getFirstNonPHI();
4466
4467 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4468 Builder.setDebugLoc(MI.getDebugLoc());
4469 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4470 OldToNewSrcMap[SrcMI] = NewExt;
4471 }
4472
4473 // Create a new phi with the extended inputs.
4474 Builder.setInstrAndDebugLoc(MI);
4475 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4476 NewPhi.addDef(DstReg);
4477 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4478 if (!MO.isReg()) {
4479 NewPhi.addMBB(MO.getMBB());
4480 continue;
4481 }
4482 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4483 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4484 }
4485 Builder.insertInstr(NewPhi);
4486 ExtMI->eraseFromParent();
4487}
4488
4490 Register &Reg) const {
4491 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4492 // If we have a constant index, look for a G_BUILD_VECTOR source
4493 // and find the source register that the index maps to.
4494 Register SrcVec = MI.getOperand(1).getReg();
4495 LLT SrcTy = MRI.getType(SrcVec);
4496 if (SrcTy.isScalableVector())
4497 return false;
4498
4499 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4500 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4501 return false;
4502
4503 unsigned VecIdx = Cst->Value.getZExtValue();
4504
4505 // Check if we have a build_vector or build_vector_trunc with an optional
4506 // trunc in front.
4507 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4508 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4509 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4510 }
4511
4512 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4513 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4514 return false;
4515
4516 EVT Ty(getMVTForLLT(SrcTy));
4517 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4518 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4519 return false;
4520
4521 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4522 return true;
4523}
4524
4526 Register &Reg) const {
4527 // Check the type of the register, since it may have come from a
4528 // G_BUILD_VECTOR_TRUNC.
4529 LLT ScalarTy = MRI.getType(Reg);
4530 Register DstReg = MI.getOperand(0).getReg();
4531 LLT DstTy = MRI.getType(DstReg);
4532
4533 if (ScalarTy != DstTy) {
4534 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4535 Builder.buildTrunc(DstReg, Reg);
4536 MI.eraseFromParent();
4537 return;
4538 }
4540}
4541
4544 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4545 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4546 // This combine tries to find build_vector's which have every source element
4547 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4548 // the masked load scalarization is run late in the pipeline. There's already
4549 // a combine for a similar pattern starting from the extract, but that
4550 // doesn't attempt to do it if there are multiple uses of the build_vector,
4551 // which in this case is true. Starting the combine from the build_vector
4552 // feels more natural than trying to find sibling nodes of extracts.
4553 // E.g.
4554 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4555 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4556 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4557 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4558 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4559 // ==>
4560 // replace ext{1,2,3,4} with %s{1,2,3,4}
4561
4562 Register DstReg = MI.getOperand(0).getReg();
4563 LLT DstTy = MRI.getType(DstReg);
4564 unsigned NumElts = DstTy.getNumElements();
4565
4566 SmallBitVector ExtractedElts(NumElts);
4567 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4568 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4569 return false;
4570 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4571 if (!Cst)
4572 return false;
4573 unsigned Idx = Cst->getZExtValue();
4574 if (Idx >= NumElts)
4575 return false; // Out of range.
4576 ExtractedElts.set(Idx);
4577 SrcDstPairs.emplace_back(
4578 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4579 }
4580 // Match if every element was extracted.
4581 return ExtractedElts.all();
4582}
4583
4586 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4587 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4588 for (auto &Pair : SrcDstPairs) {
4589 auto *ExtMI = Pair.second;
4590 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4591 ExtMI->eraseFromParent();
4592 }
4593 MI.eraseFromParent();
4594}
4595
4598 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4599 applyBuildFnNoErase(MI, MatchInfo);
4600 MI.eraseFromParent();
4601}
4602
4605 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4606 MatchInfo(Builder);
4607}
4608
4610 bool AllowScalarConstants,
4611 BuildFnTy &MatchInfo) const {
4612 assert(MI.getOpcode() == TargetOpcode::G_OR);
4613
4614 Register Dst = MI.getOperand(0).getReg();
4615 LLT Ty = MRI.getType(Dst);
4616 unsigned BitWidth = Ty.getScalarSizeInBits();
4617
4618 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4619 unsigned FshOpc = 0;
4620
4621 // Match (or (shl ...), (lshr ...)).
4622 if (!mi_match(Dst, MRI,
4623 // m_GOr() handles the commuted version as well.
4624 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4625 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4626 return false;
4627
4628 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4629 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4630 int64_t CstShlAmt = 0, CstLShrAmt;
4631 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4632 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4633 CstShlAmt + CstLShrAmt == BitWidth) {
4634 FshOpc = TargetOpcode::G_FSHR;
4635 Amt = LShrAmt;
4636 } else if (mi_match(LShrAmt, MRI,
4638 ShlAmt == Amt) {
4639 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4640 FshOpc = TargetOpcode::G_FSHL;
4641 } else if (mi_match(ShlAmt, MRI,
4643 LShrAmt == Amt) {
4644 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4645 FshOpc = TargetOpcode::G_FSHR;
4646 } else {
4647 return false;
4648 }
4649
4650 LLT AmtTy = MRI.getType(Amt);
4651 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4652 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4653 return false;
4654
4655 MatchInfo = [=](MachineIRBuilder &B) {
4656 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4657 };
4658 return true;
4659}
4660
4661/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4663 unsigned Opc = MI.getOpcode();
4664 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4665 Register X = MI.getOperand(1).getReg();
4666 Register Y = MI.getOperand(2).getReg();
4667 if (X != Y)
4668 return false;
4669 unsigned RotateOpc =
4670 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4671 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4672}
4673
4675 unsigned Opc = MI.getOpcode();
4676 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4677 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4678 Observer.changingInstr(MI);
4679 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4680 : TargetOpcode::G_ROTR));
4681 MI.removeOperand(2);
4682 Observer.changedInstr(MI);
4683}
4684
4685// Fold (rot x, c) -> (rot x, c % BitSize)
4687 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4688 MI.getOpcode() == TargetOpcode::G_ROTR);
4689 unsigned Bitsize =
4690 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4691 Register AmtReg = MI.getOperand(2).getReg();
4692 bool OutOfRange = false;
4693 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4694 if (auto *CI = dyn_cast<ConstantInt>(C))
4695 OutOfRange |= CI->getValue().uge(Bitsize);
4696 return true;
4697 };
4698 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4699}
4700
4702 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4703 MI.getOpcode() == TargetOpcode::G_ROTR);
4704 unsigned Bitsize =
4705 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4706 Register Amt = MI.getOperand(2).getReg();
4707 LLT AmtTy = MRI.getType(Amt);
4708 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4709 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4710 Observer.changingInstr(MI);
4711 MI.getOperand(2).setReg(Amt);
4712 Observer.changedInstr(MI);
4713}
4714
4716 int64_t &MatchInfo) const {
4717 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4718 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4719
4720 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4721 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4722 // KnownBits on the LHS in two cases:
4723 //
4724 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4725 // we cannot do any transforms so we can safely bail out early.
4726 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4727 // >=0.
4728 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4729 if (KnownRHS.isUnknown())
4730 return false;
4731
4732 std::optional<bool> KnownVal;
4733 if (KnownRHS.isZero()) {
4734 // ? uge 0 -> always true
4735 // ? ult 0 -> always false
4736 if (Pred == CmpInst::ICMP_UGE)
4737 KnownVal = true;
4738 else if (Pred == CmpInst::ICMP_ULT)
4739 KnownVal = false;
4740 }
4741
4742 if (!KnownVal) {
4743 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4744 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4745 }
4746
4747 if (!KnownVal)
4748 return false;
4749 MatchInfo =
4750 *KnownVal
4752 /*IsVector = */
4753 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4754 /* IsFP = */ false)
4755 : 0;
4756 return true;
4757}
4758
4761 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4762 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4763 // Given:
4764 //
4765 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4766 // %cmp = G_ICMP ne %x, 0
4767 //
4768 // Or:
4769 //
4770 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4771 // %cmp = G_ICMP eq %x, 1
4772 //
4773 // We can replace %cmp with %x assuming true is 1 on the target.
4774 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4775 if (!CmpInst::isEquality(Pred))
4776 return false;
4777 Register Dst = MI.getOperand(0).getReg();
4778 LLT DstTy = MRI.getType(Dst);
4780 /* IsFP = */ false) != 1)
4781 return false;
4782 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4783 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4784 return false;
4785 Register LHS = MI.getOperand(2).getReg();
4786 auto KnownLHS = VT->getKnownBits(LHS);
4787 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4788 return false;
4789 // Make sure replacing Dst with the LHS is a legal operation.
4790 LLT LHSTy = MRI.getType(LHS);
4791 unsigned LHSSize = LHSTy.getSizeInBits();
4792 unsigned DstSize = DstTy.getSizeInBits();
4793 unsigned Op = TargetOpcode::COPY;
4794 if (DstSize != LHSSize)
4795 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4796 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4797 return false;
4798 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4799 return true;
4800}
4801
4802// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4805 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4806 assert(MI.getOpcode() == TargetOpcode::G_AND);
4807
4808 // Ignore vector types to simplify matching the two constants.
4809 // TODO: do this for vectors and scalars via a demanded bits analysis.
4810 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4811 if (Ty.isVector())
4812 return false;
4813
4814 Register Src;
4815 Register AndMaskReg;
4816 int64_t AndMaskBits;
4817 int64_t OrMaskBits;
4818 if (!mi_match(MI, MRI,
4819 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4820 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4821 return false;
4822
4823 // Check if OrMask could turn on any bits in Src.
4824 if (AndMaskBits & OrMaskBits)
4825 return false;
4826
4827 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4828 Observer.changingInstr(MI);
4829 // Canonicalize the result to have the constant on the RHS.
4830 if (MI.getOperand(1).getReg() == AndMaskReg)
4831 MI.getOperand(2).setReg(AndMaskReg);
4832 MI.getOperand(1).setReg(Src);
4833 Observer.changedInstr(MI);
4834 };
4835 return true;
4836}
4837
4838/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4841 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4842 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4843 Register Dst = MI.getOperand(0).getReg();
4844 Register Src = MI.getOperand(1).getReg();
4845 LLT Ty = MRI.getType(Src);
4847 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4848 return false;
4849 int64_t Width = MI.getOperand(2).getImm();
4850 Register ShiftSrc;
4851 int64_t ShiftImm;
4852 if (!mi_match(
4853 Src, MRI,
4854 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4855 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4856 return false;
4857 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4858 return false;
4859
4860 MatchInfo = [=](MachineIRBuilder &B) {
4861 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4862 auto Cst2 = B.buildConstant(ExtractTy, Width);
4863 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4864 };
4865 return true;
4866}
4867
4868/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4870 BuildFnTy &MatchInfo) const {
4871 GAnd *And = cast<GAnd>(&MI);
4872 Register Dst = And->getReg(0);
4873 LLT Ty = MRI.getType(Dst);
4875 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4876 // into account.
4877 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4878 return false;
4879
4880 int64_t AndImm, LSBImm;
4881 Register ShiftSrc;
4882 const unsigned Size = Ty.getScalarSizeInBits();
4883 if (!mi_match(And->getReg(0), MRI,
4884 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4885 m_ICst(AndImm))))
4886 return false;
4887
4888 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4889 auto MaybeMask = static_cast<uint64_t>(AndImm);
4890 if (MaybeMask & (MaybeMask + 1))
4891 return false;
4892
4893 // LSB must fit within the register.
4894 if (static_cast<uint64_t>(LSBImm) >= Size)
4895 return false;
4896
4897 uint64_t Width = APInt(Size, AndImm).countr_one();
4898 MatchInfo = [=](MachineIRBuilder &B) {
4899 auto WidthCst = B.buildConstant(ExtractTy, Width);
4900 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4901 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4902 };
4903 return true;
4904}
4905
4908 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4909 const unsigned Opcode = MI.getOpcode();
4910 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4911
4912 const Register Dst = MI.getOperand(0).getReg();
4913
4914 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4915 ? TargetOpcode::G_SBFX
4916 : TargetOpcode::G_UBFX;
4917
4918 // Check if the type we would use for the extract is legal
4919 LLT Ty = MRI.getType(Dst);
4921 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4922 return false;
4923
4924 Register ShlSrc;
4925 int64_t ShrAmt;
4926 int64_t ShlAmt;
4927 const unsigned Size = Ty.getScalarSizeInBits();
4928
4929 // Try to match shr (shl x, c1), c2
4930 if (!mi_match(Dst, MRI,
4931 m_BinOp(Opcode,
4932 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4933 m_ICst(ShrAmt))))
4934 return false;
4935
4936 // Make sure that the shift sizes can fit a bitfield extract
4937 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4938 return false;
4939
4940 // Skip this combine if the G_SEXT_INREG combine could handle it
4941 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4942 return false;
4943
4944 // Calculate start position and width of the extract
4945 const int64_t Pos = ShrAmt - ShlAmt;
4946 const int64_t Width = Size - ShrAmt;
4947
4948 MatchInfo = [=](MachineIRBuilder &B) {
4949 auto WidthCst = B.buildConstant(ExtractTy, Width);
4950 auto PosCst = B.buildConstant(ExtractTy, Pos);
4951 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4952 };
4953 return true;
4954}
4955
4958 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4959 const unsigned Opcode = MI.getOpcode();
4960 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4961
4962 const Register Dst = MI.getOperand(0).getReg();
4963 LLT Ty = MRI.getType(Dst);
4965 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4966 return false;
4967
4968 // Try to match shr (and x, c1), c2
4969 Register AndSrc;
4970 int64_t ShrAmt;
4971 int64_t SMask;
4972 if (!mi_match(Dst, MRI,
4973 m_BinOp(Opcode,
4974 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4975 m_ICst(ShrAmt))))
4976 return false;
4977
4978 const unsigned Size = Ty.getScalarSizeInBits();
4979 if (ShrAmt < 0 || ShrAmt >= Size)
4980 return false;
4981
4982 // If the shift subsumes the mask, emit the 0 directly.
4983 if (0 == (SMask >> ShrAmt)) {
4984 MatchInfo = [=](MachineIRBuilder &B) {
4985 B.buildConstant(Dst, 0);
4986 };
4987 return true;
4988 }
4989
4990 // Check that ubfx can do the extraction, with no holes in the mask.
4991 uint64_t UMask = SMask;
4992 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4994 if (!isMask_64(UMask))
4995 return false;
4996
4997 // Calculate start position and width of the extract.
4998 const int64_t Pos = ShrAmt;
4999 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
5000
5001 // It's preferable to keep the shift, rather than form G_SBFX.
5002 // TODO: remove the G_AND via demanded bits analysis.
5003 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
5004 return false;
5005
5006 MatchInfo = [=](MachineIRBuilder &B) {
5007 auto WidthCst = B.buildConstant(ExtractTy, Width);
5008 auto PosCst = B.buildConstant(ExtractTy, Pos);
5009 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
5010 };
5011 return true;
5012}
5013
5014bool CombinerHelper::reassociationCanBreakAddressingModePattern(
5015 MachineInstr &MI) const {
5016 auto &PtrAdd = cast<GPtrAdd>(MI);
5017
5018 Register Src1Reg = PtrAdd.getBaseReg();
5019 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
5020 if (!Src1Def)
5021 return false;
5022
5023 Register Src2Reg = PtrAdd.getOffsetReg();
5024
5025 if (MRI.hasOneNonDBGUse(Src1Reg))
5026 return false;
5027
5028 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
5029 if (!C1)
5030 return false;
5031 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5032 if (!C2)
5033 return false;
5034
5035 const APInt &C1APIntVal = *C1;
5036 const APInt &C2APIntVal = *C2;
5037 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
5038
5039 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
5040 // This combine may end up running before ptrtoint/inttoptr combines
5041 // manage to eliminate redundant conversions, so try to look through them.
5042 MachineInstr *ConvUseMI = &UseMI;
5043 unsigned ConvUseOpc = ConvUseMI->getOpcode();
5044 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
5045 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
5046 Register DefReg = ConvUseMI->getOperand(0).getReg();
5047 if (!MRI.hasOneNonDBGUse(DefReg))
5048 break;
5049 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
5050 ConvUseOpc = ConvUseMI->getOpcode();
5051 }
5052 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
5053 if (!LdStMI)
5054 continue;
5055 // Is x[offset2] already not a legal addressing mode? If so then
5056 // reassociating the constants breaks nothing (we test offset2 because
5057 // that's the one we hope to fold into the load or store).
5058 TargetLoweringBase::AddrMode AM;
5059 AM.HasBaseReg = true;
5060 AM.BaseOffs = C2APIntVal.getSExtValue();
5061 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
5062 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
5063 PtrAdd.getMF()->getFunction().getContext());
5064 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
5065 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5066 AccessTy, AS))
5067 continue;
5068
5069 // Would x[offset1+offset2] still be a legal addressing mode?
5070 AM.BaseOffs = CombinedValue;
5071 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5072 AccessTy, AS))
5073 return true;
5074 }
5075
5076 return false;
5077}
5078
5080 MachineInstr *RHS,
5081 BuildFnTy &MatchInfo) const {
5082 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5083 Register Src1Reg = MI.getOperand(1).getReg();
5084 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5085 return false;
5086 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5087 if (!C2)
5088 return false;
5089
5090 // If both additions are nuw, the reassociated additions are also nuw.
5091 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5092 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5093 // therefore also nusw.
5094 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5095 // the new G_PTR_ADDs are then also inbounds.
5096 unsigned PtrAddFlags = MI.getFlags();
5097 unsigned AddFlags = RHS->getFlags();
5098 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5099 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5100 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5101 unsigned Flags = 0;
5102 if (IsNoUWrap)
5104 if (IsNoUSWrap)
5106 if (IsInBounds)
5108
5109 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5110 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5111
5112 auto NewBase =
5113 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5114 Observer.changingInstr(MI);
5115 MI.getOperand(1).setReg(NewBase.getReg(0));
5116 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5117 MI.setFlags(Flags);
5118 Observer.changedInstr(MI);
5119 };
5120 return !reassociationCanBreakAddressingModePattern(MI);
5121}
5122
5124 MachineInstr *LHS,
5125 MachineInstr *RHS,
5126 BuildFnTy &MatchInfo) const {
5127 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5128 // if and only if (G_PTR_ADD X, C) has one use.
5129 Register LHSBase;
5130 std::optional<ValueAndVReg> LHSCstOff;
5131 if (!mi_match(MI.getBaseReg(), MRI,
5132 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5133 return false;
5134
5135 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5136
5137 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5138 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5139 // so the new G_PTR_ADDs are also inbounds.
5140 unsigned PtrAddFlags = MI.getFlags();
5141 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5142 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5143 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5145 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5147 unsigned Flags = 0;
5148 if (IsNoUWrap)
5150 if (IsNoUSWrap)
5152 if (IsInBounds)
5154
5155 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5156 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5157 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5158 // doesn't happen.
5159 LHSPtrAdd->moveBefore(&MI);
5160 Register RHSReg = MI.getOffsetReg();
5161 // set VReg will cause type mismatch if it comes from extend/trunc
5162 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5163 Observer.changingInstr(MI);
5164 MI.getOperand(2).setReg(NewCst.getReg(0));
5165 MI.setFlags(Flags);
5166 Observer.changedInstr(MI);
5167 Observer.changingInstr(*LHSPtrAdd);
5168 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5169 LHSPtrAdd->setFlags(Flags);
5170 Observer.changedInstr(*LHSPtrAdd);
5171 };
5172 return !reassociationCanBreakAddressingModePattern(MI);
5173}
5174
5176 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5177 BuildFnTy &MatchInfo) const {
5178 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5179 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5180 if (!LHSPtrAdd)
5181 return false;
5182
5183 Register Src2Reg = MI.getOperand(2).getReg();
5184 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5185 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5186 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5187 if (!C1)
5188 return false;
5189 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5190 if (!C2)
5191 return false;
5192
5193 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5194 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5195 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5196 // largest signed integer that fits into the index type, which is the maximum
5197 // size of allocated objects according to the IR Language Reference.
5198 unsigned PtrAddFlags = MI.getFlags();
5199 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5200 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5201 bool IsInBounds =
5202 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5203 unsigned Flags = 0;
5204 if (IsNoUWrap)
5206 if (IsInBounds) {
5209 }
5210
5211 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5212 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5213 Observer.changingInstr(MI);
5214 MI.getOperand(1).setReg(LHSSrc1);
5215 MI.getOperand(2).setReg(NewCst.getReg(0));
5216 MI.setFlags(Flags);
5217 Observer.changedInstr(MI);
5218 };
5219 return !reassociationCanBreakAddressingModePattern(MI);
5220}
5221
5223 BuildFnTy &MatchInfo) const {
5224 auto &PtrAdd = cast<GPtrAdd>(MI);
5225 // We're trying to match a few pointer computation patterns here for
5226 // re-association opportunities.
5227 // 1) Isolating a constant operand to be on the RHS, e.g.:
5228 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5229 //
5230 // 2) Folding two constants in each sub-tree as long as such folding
5231 // doesn't break a legal addressing mode.
5232 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5233 //
5234 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5235 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5236 // iif (G_PTR_ADD X, C) has one use.
5237 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5238 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5239
5240 // Try to match example 2.
5241 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5242 return true;
5243
5244 // Try to match example 3.
5245 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5246 return true;
5247
5248 // Try to match example 1.
5249 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5250 return true;
5251
5252 return false;
5253}
5255 Register OpLHS, Register OpRHS,
5256 BuildFnTy &MatchInfo) const {
5257 LLT OpRHSTy = MRI.getType(OpRHS);
5258 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5259
5260 if (OpLHSDef->getOpcode() != Opc)
5261 return false;
5262
5263 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5264 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5265 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5266
5267 // If the inner op is (X op C), pull the constant out so it can be folded with
5268 // other constants in the expression tree. Folding is not guaranteed so we
5269 // might have (C1 op C2). In that case do not pull a constant out because it
5270 // won't help and can lead to infinite loops.
5271 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5272 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5273 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5274 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5275 MatchInfo = [=](MachineIRBuilder &B) {
5276 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5277 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5278 };
5279 return true;
5280 }
5281 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5282 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5283 // iff (op x, c1) has one use
5284 MatchInfo = [=](MachineIRBuilder &B) {
5285 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5286 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5287 };
5288 return true;
5289 }
5290 }
5291
5292 return false;
5293}
5294
5296 BuildFnTy &MatchInfo) const {
5297 // We don't check if the reassociation will break a legal addressing mode
5298 // here since pointer arithmetic is handled by G_PTR_ADD.
5299 unsigned Opc = MI.getOpcode();
5300 Register DstReg = MI.getOperand(0).getReg();
5301 Register LHSReg = MI.getOperand(1).getReg();
5302 Register RHSReg = MI.getOperand(2).getReg();
5303
5304 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5305 return true;
5306 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5307 return true;
5308 return false;
5309}
5310
5312 APInt &MatchInfo) const {
5313 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5314 Register SrcOp = MI.getOperand(1).getReg();
5315
5316 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5317 MatchInfo = *MaybeCst;
5318 return true;
5319 }
5320
5321 return false;
5322}
5323
5325 BuildFnTy &MatchInfo) const {
5326 Register Dst = MI.getOperand(0).getReg();
5327 auto Csts = ConstantFoldUnaryIntOp(MI.getOpcode(), MRI.getType(Dst),
5328 MI.getOperand(1).getReg(), MRI);
5329 if (Csts.empty())
5330 return false;
5331
5332 MatchInfo = [Dst, Csts = std::move(Csts)](MachineIRBuilder &B) {
5333 if (Csts.size() == 1)
5334 B.buildConstant(Dst, Csts[0]);
5335 else
5336 B.buildBuildVectorConstant(Dst, Csts);
5337 };
5338 return true;
5339}
5340
5342 APInt &MatchInfo) const {
5343 Register Op1 = MI.getOperand(1).getReg();
5344 Register Op2 = MI.getOperand(2).getReg();
5345 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5346 if (!MaybeCst)
5347 return false;
5348 MatchInfo = *MaybeCst;
5349 return true;
5350}
5351
5353 ConstantFP *&MatchInfo) const {
5354 Register Op1 = MI.getOperand(1).getReg();
5355 Register Op2 = MI.getOperand(2).getReg();
5356 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5357 if (!MaybeCst)
5358 return false;
5359 MatchInfo =
5360 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5361 return true;
5362}
5363
5365 ConstantFP *&MatchInfo) const {
5366 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5367 MI.getOpcode() == TargetOpcode::G_FMAD);
5368 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5369
5370 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5371 if (!Op3Cst)
5372 return false;
5373
5374 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5375 if (!Op2Cst)
5376 return false;
5377
5378 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5379 if (!Op1Cst)
5380 return false;
5381
5382 APFloat Op1F = Op1Cst->getValueAPF();
5383 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5385 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5386 return true;
5387}
5388
5391 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5392 // Look for a binop feeding into an AND with a mask:
5393 //
5394 // %add = G_ADD %lhs, %rhs
5395 // %and = G_AND %add, 000...11111111
5396 //
5397 // Check if it's possible to perform the binop at a narrower width and zext
5398 // back to the original width like so:
5399 //
5400 // %narrow_lhs = G_TRUNC %lhs
5401 // %narrow_rhs = G_TRUNC %rhs
5402 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5403 // %new_add = G_ZEXT %narrow_add
5404 // %and = G_AND %new_add, 000...11111111
5405 //
5406 // This can allow later combines to eliminate the G_AND if it turns out
5407 // that the mask is irrelevant.
5408 assert(MI.getOpcode() == TargetOpcode::G_AND);
5409 Register Dst = MI.getOperand(0).getReg();
5410 Register AndLHS = MI.getOperand(1).getReg();
5411 Register AndRHS = MI.getOperand(2).getReg();
5412 LLT WideTy = MRI.getType(Dst);
5413
5414 // If the potential binop has more than one use, then it's possible that one
5415 // of those uses will need its full width.
5416 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5417 return false;
5418
5419 // Check if the LHS feeding the AND is impacted by the high bits that we're
5420 // masking out.
5421 //
5422 // e.g. for 64-bit x, y:
5423 //
5424 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5425 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5426 if (!LHSInst)
5427 return false;
5428 unsigned LHSOpc = LHSInst->getOpcode();
5429 switch (LHSOpc) {
5430 default:
5431 return false;
5432 case TargetOpcode::G_ADD:
5433 case TargetOpcode::G_SUB:
5434 case TargetOpcode::G_MUL:
5435 case TargetOpcode::G_AND:
5436 case TargetOpcode::G_OR:
5437 case TargetOpcode::G_XOR:
5438 break;
5439 }
5440
5441 // Find the mask on the RHS.
5442 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5443 if (!Cst)
5444 return false;
5445 auto Mask = Cst->Value;
5446 if (!Mask.isMask())
5447 return false;
5448
5449 // No point in combining if there's nothing to truncate.
5450 unsigned NarrowWidth = Mask.countr_one();
5451 if (NarrowWidth == WideTy.getSizeInBits())
5452 return false;
5453 LLT NarrowTy = LLT::integer(NarrowWidth);
5454
5455 // Check if adding the zext + truncates could be harmful.
5456 auto &MF = *MI.getMF();
5457 const auto &TLI = getTargetLowering();
5458 LLVMContext &Ctx = MF.getFunction().getContext();
5459 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5460 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5461 return false;
5462 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5463 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5464 return false;
5465 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5466 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5467 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5468 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5469 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5470 auto NarrowBinOp =
5471 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5472 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5473 Observer.changingInstr(MI);
5474 MI.getOperand(1).setReg(Ext.getReg(0));
5475 Observer.changedInstr(MI);
5476 };
5477 return true;
5478}
5479
5481 BuildFnTy &MatchInfo) const {
5482 unsigned Opc = MI.getOpcode();
5483 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5484
5485 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5486 return false;
5487
5488 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5489 Observer.changingInstr(MI);
5490 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5491 : TargetOpcode::G_SADDO;
5492 MI.setDesc(Builder.getTII().get(NewOpc));
5493 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5494 Observer.changedInstr(MI);
5495 };
5496 return true;
5497}
5498
5500 BuildFnTy &MatchInfo) const {
5501 // (G_*MULO x, 0) -> 0 + no carry out
5502 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5503 MI.getOpcode() == TargetOpcode::G_SMULO);
5504 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5505 return false;
5506 Register Dst = MI.getOperand(0).getReg();
5507 Register Carry = MI.getOperand(1).getReg();
5508 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5509 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5510 return false;
5511 MatchInfo = [=](MachineIRBuilder &B) {
5512 B.buildConstant(Dst, 0);
5513 B.buildConstant(Carry, 0);
5514 };
5515 return true;
5516}
5517
5519 BuildFnTy &MatchInfo) const {
5520 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5521 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5522 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5523 MI.getOpcode() == TargetOpcode::G_SADDE ||
5524 MI.getOpcode() == TargetOpcode::G_USUBE ||
5525 MI.getOpcode() == TargetOpcode::G_SSUBE);
5526 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5527 return false;
5528 MatchInfo = [&](MachineIRBuilder &B) {
5529 unsigned NewOpcode;
5530 switch (MI.getOpcode()) {
5531 case TargetOpcode::G_UADDE:
5532 NewOpcode = TargetOpcode::G_UADDO;
5533 break;
5534 case TargetOpcode::G_SADDE:
5535 NewOpcode = TargetOpcode::G_SADDO;
5536 break;
5537 case TargetOpcode::G_USUBE:
5538 NewOpcode = TargetOpcode::G_USUBO;
5539 break;
5540 case TargetOpcode::G_SSUBE:
5541 NewOpcode = TargetOpcode::G_SSUBO;
5542 break;
5543 }
5544 Observer.changingInstr(MI);
5545 MI.setDesc(B.getTII().get(NewOpcode));
5546 MI.removeOperand(4);
5547 Observer.changedInstr(MI);
5548 };
5549 return true;
5550}
5551
5553 BuildFnTy &MatchInfo) const {
5554 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5555 Register Dst = MI.getOperand(0).getReg();
5556 // (x + y) - z -> x (if y == z)
5557 // (x + y) - z -> y (if x == z)
5558 Register X, Y, Z;
5559 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5560 Register ReplaceReg;
5561 int64_t CstX, CstY;
5562 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5564 ReplaceReg = X;
5565 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5567 ReplaceReg = Y;
5568 if (ReplaceReg) {
5569 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5570 return true;
5571 }
5572 }
5573
5574 // x - (y + z) -> 0 - y (if x == z)
5575 // x - (y + z) -> 0 - z (if x == y)
5576 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5577 Register ReplaceReg;
5578 int64_t CstX;
5579 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5581 ReplaceReg = Y;
5582 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5584 ReplaceReg = Z;
5585 if (ReplaceReg) {
5586 MatchInfo = [=](MachineIRBuilder &B) {
5587 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5588 B.buildSub(Dst, Zero, ReplaceReg);
5589 };
5590 return true;
5591 }
5592 }
5593 return false;
5594}
5595
5597 unsigned Opcode = MI.getOpcode();
5598 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5599 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5600 Register Dst = UDivorRem.getReg(0);
5601 Register LHS = UDivorRem.getReg(1);
5602 Register RHS = UDivorRem.getReg(2);
5603 LLT Ty = MRI.getType(Dst);
5604 LLT ScalarTy = Ty.getScalarType();
5605 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5607 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5608
5609 auto &MIB = Builder;
5610
5611 bool UseSRL = false;
5612 SmallVector<Register, 16> Shifts, Factors;
5613 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5614 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5615
5616 auto BuildExactUDIVPattern = [&](const Constant *C) {
5617 // Don't recompute inverses for each splat element.
5618 if (IsSplat && !Factors.empty()) {
5619 Shifts.push_back(Shifts[0]);
5620 Factors.push_back(Factors[0]);
5621 return true;
5622 }
5623
5624 auto *CI = cast<ConstantInt>(C);
5625 APInt Divisor = CI->getValue();
5626 unsigned Shift = Divisor.countr_zero();
5627 if (Shift) {
5628 Divisor.lshrInPlace(Shift);
5629 UseSRL = true;
5630 }
5631
5632 // Calculate the multiplicative inverse modulo BW.
5633 APInt Factor = Divisor.multiplicativeInverse();
5634 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5635 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5636 return true;
5637 };
5638
5639 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5640 // Collect all magic values from the build vector.
5641 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5642 llvm_unreachable("Expected unary predicate match to succeed");
5643
5644 Register Shift, Factor;
5645 if (Ty.isVector()) {
5646 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5647 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5648 } else {
5649 Shift = Shifts[0];
5650 Factor = Factors[0];
5651 }
5652
5653 Register Res = LHS;
5654
5655 if (UseSRL)
5656 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5657
5658 return MIB.buildMul(Ty, Res, Factor);
5659 }
5660
5661 unsigned KnownLeadingZeros =
5662 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5663
5664 bool UseNPQ = false;
5665 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5666 auto BuildUDIVPattern = [&](const Constant *C) {
5667 auto *CI = cast<ConstantInt>(C);
5668 const APInt &Divisor = CI->getValue();
5669
5670 bool SelNPQ = false;
5671 APInt Magic(Divisor.getBitWidth(), 0);
5672 unsigned PreShift = 0, PostShift = 0;
5673
5674 // Magic algorithm doesn't work for division by 1. We need to emit a select
5675 // at the end.
5676 // TODO: Use undef values for divisor of 1.
5677 if (!Divisor.isOne()) {
5678
5679 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5680 // in the dividend exceeds the leading zeros for the divisor.
5683 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5684
5685 Magic = std::move(magics.Magic);
5686
5687 assert(magics.PreShift < Divisor.getBitWidth() &&
5688 "We shouldn't generate an undefined shift!");
5689 assert(magics.PostShift < Divisor.getBitWidth() &&
5690 "We shouldn't generate an undefined shift!");
5691 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5692 PreShift = magics.PreShift;
5693 PostShift = magics.PostShift;
5694 SelNPQ = magics.IsAdd;
5695 }
5696
5697 PreShifts.push_back(
5698 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5699 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5700 NPQFactors.push_back(
5701 MIB.buildConstant(ScalarTy,
5702 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5703 : APInt::getZero(EltBits))
5704 .getReg(0));
5705 PostShifts.push_back(
5706 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5707 UseNPQ |= SelNPQ;
5708 return true;
5709 };
5710
5711 // Collect the shifts/magic values from each element.
5712 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5713 (void)Matched;
5714 assert(Matched && "Expected unary predicate match to succeed");
5715
5716 Register PreShift, PostShift, MagicFactor, NPQFactor;
5717 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5718 if (RHSDef) {
5719 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5720 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5721 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5722 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5723 } else {
5724 assert(MRI.getType(RHS).isScalar() &&
5725 "Non-build_vector operation should have been a scalar");
5726 PreShift = PreShifts[0];
5727 MagicFactor = MagicFactors[0];
5728 PostShift = PostShifts[0];
5729 }
5730
5731 Register Q = LHS;
5732 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5733
5734 // Multiply the numerator (operand 0) by the magic value.
5735 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5736
5737 if (UseNPQ) {
5738 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5739
5740 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5741 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5742 if (Ty.isVector())
5743 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5744 else
5745 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5746
5747 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5748 }
5749
5750 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5751 auto One = MIB.buildConstant(Ty, 1);
5752 auto IsOne = MIB.buildICmp(
5754 Ty.isScalar() ? LLT::integer(1) : Ty.changeElementType(LLT::integer(1)),
5755 RHS, One);
5756 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5757
5758 if (Opcode == TargetOpcode::G_UREM) {
5759 auto Prod = MIB.buildMul(Ty, ret, RHS);
5760 return MIB.buildSub(Ty, LHS, Prod);
5761 }
5762 return ret;
5763}
5764
5766 unsigned Opcode = MI.getOpcode();
5767 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5768 Register Dst = MI.getOperand(0).getReg();
5769 Register RHS = MI.getOperand(2).getReg();
5770 LLT DstTy = MRI.getType(Dst);
5771
5772 auto &MF = *MI.getMF();
5773 AttributeList Attr = MF.getFunction().getAttributes();
5774 const auto &TLI = getTargetLowering();
5775 LLVMContext &Ctx = MF.getFunction().getContext();
5776 if (DstTy.getScalarSizeInBits() == 1 ||
5777 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5778 return false;
5779
5780 // Don't do this for minsize because the instruction sequence is usually
5781 // larger.
5782 if (MF.getFunction().hasMinSize())
5783 return false;
5784
5785 if (Opcode == TargetOpcode::G_UDIV &&
5787 return matchUnaryPredicate(
5788 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5789 }
5790
5791 auto *RHSDef = MRI.getVRegDef(RHS);
5792 if (!isConstantOrConstantVector(*RHSDef, MRI))
5793 return false;
5794
5795 // Don't do this if the types are not going to be legal.
5796 if (LI) {
5797 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5798 return false;
5799 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5800 return false;
5802 {TargetOpcode::G_ICMP,
5803 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5804 DstTy}}))
5805 return false;
5806 if (Opcode == TargetOpcode::G_UREM &&
5807 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5808 return false;
5809 }
5810
5811 return matchUnaryPredicate(
5812 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5813}
5814
5816 auto *NewMI = buildUDivOrURemUsingMul(MI);
5817 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5818}
5819
5821 unsigned Opcode = MI.getOpcode();
5822 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5823 Register Dst = MI.getOperand(0).getReg();
5824 Register RHS = MI.getOperand(2).getReg();
5825 LLT DstTy = MRI.getType(Dst);
5826 auto SizeInBits = DstTy.getScalarSizeInBits();
5827 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5828
5829 auto &MF = *MI.getMF();
5830 AttributeList Attr = MF.getFunction().getAttributes();
5831 const auto &TLI = getTargetLowering();
5832 LLVMContext &Ctx = MF.getFunction().getContext();
5833 if (DstTy.getScalarSizeInBits() < 3 ||
5834 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5835 return false;
5836
5837 // Don't do this for minsize because the instruction sequence is usually
5838 // larger.
5839 if (MF.getFunction().hasMinSize())
5840 return false;
5841
5842 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5843 if (Opcode == TargetOpcode::G_SDIV &&
5845 return matchUnaryPredicate(
5846 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5847 }
5848
5849 auto *RHSDef = MRI.getVRegDef(RHS);
5850 if (!isConstantOrConstantVector(*RHSDef, MRI))
5851 return false;
5852
5853 // Don't do this if the types are not going to be legal.
5854 if (LI) {
5855 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5856 return false;
5857 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5858 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5859 return false;
5860 if (Opcode == TargetOpcode::G_SREM &&
5861 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5862 return false;
5863 }
5864
5865 return matchUnaryPredicate(
5866 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5867}
5868
5870 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5871 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5872}
5873
5875 unsigned Opcode = MI.getOpcode();
5876 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5877 Opcode == TargetOpcode::G_SREM);
5878 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5879 Register Dst = SDivorRem.getReg(0);
5880 Register LHS = SDivorRem.getReg(1);
5881 Register RHS = SDivorRem.getReg(2);
5882 LLT Ty = MRI.getType(Dst);
5883 LLT ScalarTy = Ty.getScalarType();
5884 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5886 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5887 auto &MIB = Builder;
5888
5889 bool UseSRA = false;
5890 SmallVector<Register, 16> ExactShifts, ExactFactors;
5891
5892 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5893 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5894
5895 auto BuildExactSDIVPattern = [&](const Constant *C) {
5896 // Don't recompute inverses for each splat element.
5897 if (IsSplat && !ExactFactors.empty()) {
5898 ExactShifts.push_back(ExactShifts[0]);
5899 ExactFactors.push_back(ExactFactors[0]);
5900 return true;
5901 }
5902
5903 auto *CI = cast<ConstantInt>(C);
5904 APInt Divisor = CI->getValue();
5905 unsigned Shift = Divisor.countr_zero();
5906 if (Shift) {
5907 Divisor.ashrInPlace(Shift);
5908 UseSRA = true;
5909 }
5910
5911 // Calculate the multiplicative inverse modulo BW.
5912 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5913 APInt Factor = Divisor.multiplicativeInverse();
5914 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5915 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5916 return true;
5917 };
5918
5919 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5920 // Collect all magic values from the build vector.
5921 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5922 (void)Matched;
5923 assert(Matched && "Expected unary predicate match to succeed");
5924
5925 Register Shift, Factor;
5926 if (Ty.isVector()) {
5927 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5928 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5929 } else {
5930 Shift = ExactShifts[0];
5931 Factor = ExactFactors[0];
5932 }
5933
5934 Register Res = LHS;
5935
5936 if (UseSRA)
5937 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5938
5939 return MIB.buildMul(Ty, Res, Factor);
5940 }
5941
5942 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5943
5944 auto BuildSDIVPattern = [&](const Constant *C) {
5945 auto *CI = cast<ConstantInt>(C);
5946 const APInt &Divisor = CI->getValue();
5947
5950 int NumeratorFactor = 0;
5951 int ShiftMask = -1;
5952
5953 if (Divisor.isOne() || Divisor.isAllOnes()) {
5954 // If d is +1/-1, we just multiply the numerator by +1/-1.
5955 NumeratorFactor = Divisor.getSExtValue();
5956 Magics.Magic = 0;
5957 Magics.ShiftAmount = 0;
5958 ShiftMask = 0;
5959 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5960 // If d > 0 and m < 0, add the numerator.
5961 NumeratorFactor = 1;
5962 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5963 // If d < 0 and m > 0, subtract the numerator.
5964 NumeratorFactor = -1;
5965 }
5966
5967 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5968 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5969 Shifts.push_back(
5970 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5971 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5972
5973 return true;
5974 };
5975
5976 // Collect the shifts/magic values from each element.
5977 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5978 (void)Matched;
5979 assert(Matched && "Expected unary predicate match to succeed");
5980
5981 Register MagicFactor, Factor, Shift, ShiftMask;
5982 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5983 if (RHSDef) {
5984 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5985 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5986 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5987 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5988 } else {
5989 assert(MRI.getType(RHS).isScalar() &&
5990 "Non-build_vector operation should have been a scalar");
5991 MagicFactor = MagicFactors[0];
5992 Factor = Factors[0];
5993 Shift = Shifts[0];
5994 ShiftMask = ShiftMasks[0];
5995 }
5996
5997 Register Q = LHS;
5998 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5999
6000 // (Optionally) Add/subtract the numerator using Factor.
6001 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
6002 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
6003
6004 // Shift right algebraic by shift value.
6005 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
6006
6007 // Extract the sign bit, mask it and add it to the quotient.
6008 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
6009 auto T = MIB.buildLShr(Ty, Q, SignShift);
6010 T = MIB.buildAnd(Ty, T, ShiftMask);
6011 auto ret = MIB.buildAdd(Ty, Q, T);
6012
6013 if (Opcode == TargetOpcode::G_SREM) {
6014 auto Prod = MIB.buildMul(Ty, ret, RHS);
6015 return MIB.buildSub(Ty, LHS, Prod);
6016 }
6017 return ret;
6018}
6019
6021 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
6022 MI.getOpcode() == TargetOpcode::G_UDIV) &&
6023 "Expected SDIV or UDIV");
6024 auto &Div = cast<GenericMachineInstr>(MI);
6025 Register RHS = Div.getReg(2);
6026 auto MatchPow2 = [&](const Constant *C) {
6027 auto *CI = dyn_cast<ConstantInt>(C);
6028 return CI && (CI->getValue().isPowerOf2() ||
6029 (IsSigned && CI->getValue().isNegatedPowerOf2()));
6030 };
6031 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
6032}
6033
6035 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
6036 auto &SDiv = cast<GenericMachineInstr>(MI);
6037 Register Dst = SDiv.getReg(0);
6038 Register LHS = SDiv.getReg(1);
6039 Register RHS = SDiv.getReg(2);
6040 LLT Ty = MRI.getType(Dst);
6042 LLT CCVT = Ty.isVector() ? LLT::vector(Ty.getElementCount(), LLT::integer(1))
6043 : LLT::integer(1);
6044
6045 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
6046 // to the following version:
6047 //
6048 // %c1 = G_CTTZ %rhs
6049 // %inexact = G_SUB $bitwidth, %c1
6050 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
6051 // %lshr = G_LSHR %sign, %inexact
6052 // %add = G_ADD %lhs, %lshr
6053 // %ashr = G_ASHR %add, %c1
6054 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
6055 // %zero = G_CONSTANT $0
6056 // %neg = G_NEG %ashr
6057 // %isneg = G_ICMP SLT %rhs, %zero
6058 // %res = G_SELECT %isneg, %neg, %ashr
6059
6060 unsigned BitWidth = Ty.getScalarSizeInBits();
6061 auto Zero = Builder.buildConstant(Ty, 0);
6062
6063 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
6064 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6065 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
6066 // Splat the sign bit into the register
6067 auto Sign = Builder.buildAShr(
6068 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
6069
6070 // Add (LHS < 0) ? abs2 - 1 : 0;
6071 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
6072 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
6073 auto AShr = Builder.buildAShr(Ty, Add, C1);
6074
6075 // Special case: (sdiv X, 1) -> X
6076 // Special Case: (sdiv X, -1) -> 0-X
6077 auto One = Builder.buildConstant(Ty, 1);
6078 auto MinusOne = Builder.buildConstant(Ty, -1);
6079 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
6080 auto IsMinusOne =
6081 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
6082 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
6083 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
6084
6085 // If divided by a positive value, we're done. Otherwise, the result must be
6086 // negated.
6087 auto Neg = Builder.buildNeg(Ty, AShr);
6088 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
6089 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
6090 MI.eraseFromParent();
6091}
6092
6094 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6095 auto &UDiv = cast<GenericMachineInstr>(MI);
6096 Register Dst = UDiv.getReg(0);
6097 Register LHS = UDiv.getReg(1);
6098 Register RHS = UDiv.getReg(2);
6099 LLT Ty = MRI.getType(Dst);
6101
6102 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6103 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6104 MI.eraseFromParent();
6105}
6106
6108 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6109 Register RHS = MI.getOperand(2).getReg();
6110 Register Dst = MI.getOperand(0).getReg();
6111 LLT Ty = MRI.getType(Dst);
6112 LLT RHSTy = MRI.getType(RHS);
6114 auto MatchPow2ExceptOne = [&](const Constant *C) {
6115 if (auto *CI = dyn_cast<ConstantInt>(C))
6116 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6117 return false;
6118 };
6119 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6120 return false;
6121 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6122 // get log base 2, and it is not always legal for on a target.
6123 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6124 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6125}
6126
6128 Register LHS = MI.getOperand(1).getReg();
6129 Register RHS = MI.getOperand(2).getReg();
6130 Register Dst = MI.getOperand(0).getReg();
6131 LLT Ty = MRI.getType(Dst);
6133 unsigned NumEltBits = Ty.getScalarSizeInBits();
6134
6135 auto LogBase2 = buildLogBase2(RHS, Builder);
6136 auto ShiftAmt =
6137 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6138 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6139 Builder.buildLShr(Dst, LHS, Trunc);
6140 MI.eraseFromParent();
6141}
6142
6144 Register &MatchInfo) const {
6145 Register Dst = MI.getOperand(0).getReg();
6146 Register Src = MI.getOperand(1).getReg();
6147 LLT DstTy = MRI.getType(Dst);
6148 LLT SrcTy = MRI.getType(Src);
6149 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6150 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6151 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6152
6154 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6155 return false;
6156
6157 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6158 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6159 return mi_match(Src, MRI,
6160 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6161 m_SpecificICstOrSplat(SignedMin)),
6162 m_SpecificICstOrSplat(SignedMax))) ||
6163 mi_match(Src, MRI,
6164 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6165 m_SpecificICstOrSplat(SignedMax)),
6166 m_SpecificICstOrSplat(SignedMin)));
6167}
6168
6170 Register &MatchInfo) const {
6171 Register Dst = MI.getOperand(0).getReg();
6172 Builder.buildTruncSSatS(Dst, MatchInfo);
6173 MI.eraseFromParent();
6174}
6175
6177 Register &MatchInfo) const {
6178 Register Dst = MI.getOperand(0).getReg();
6179 Register Src = MI.getOperand(1).getReg();
6180 LLT DstTy = MRI.getType(Dst);
6181 LLT SrcTy = MRI.getType(Src);
6182 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6183 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6184 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6185
6187 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6188 return false;
6189 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6190 return mi_match(Src, MRI,
6192 m_SpecificICstOrSplat(UnsignedMax))) ||
6193 mi_match(Src, MRI,
6194 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6195 m_SpecificICstOrSplat(UnsignedMax)),
6196 m_SpecificICstOrSplat(0))) ||
6197 mi_match(Src, MRI,
6199 m_SpecificICstOrSplat(UnsignedMax)));
6200}
6201
6203 Register &MatchInfo) const {
6204 Register Dst = MI.getOperand(0).getReg();
6205 Builder.buildTruncSSatU(Dst, MatchInfo);
6206 MI.eraseFromParent();
6207}
6208
6210 MachineInstr &MinMI) const {
6211 Register Min = MinMI.getOperand(2).getReg();
6212 Register Val = MinMI.getOperand(1).getReg();
6213 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6214 LLT SrcTy = MRI.getType(Val);
6215 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6216 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6217 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6218
6220 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6221 return false;
6222 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6223 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6224 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6225}
6226
6228 MachineInstr &SrcMI) const {
6229 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6230 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6231
6232 return LI &&
6233 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6234}
6235
6237 BuildFnTy &MatchInfo) const {
6238 unsigned Opc = MI.getOpcode();
6239 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6240 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6241 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6242
6243 Register Dst = MI.getOperand(0).getReg();
6244 Register X = MI.getOperand(1).getReg();
6245 Register Y = MI.getOperand(2).getReg();
6246 LLT Type = MRI.getType(Dst);
6247
6248 // fold (fadd x, fneg(y)) -> (fsub x, y)
6249 // fold (fadd fneg(y), x) -> (fsub x, y)
6250 // G_ADD is commutative so both cases are checked by m_GFAdd
6251 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6252 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6253 Opc = TargetOpcode::G_FSUB;
6254 }
6255 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6256 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6257 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6258 Opc = TargetOpcode::G_FADD;
6259 }
6260 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6261 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6262 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6263 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6264 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6265 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6266 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6267 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6268 // no opcode change
6269 } else
6270 return false;
6271
6272 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6273 Observer.changingInstr(MI);
6274 MI.setDesc(B.getTII().get(Opc));
6275 MI.getOperand(1).setReg(X);
6276 MI.getOperand(2).setReg(Y);
6277 Observer.changedInstr(MI);
6278 };
6279 return true;
6280}
6281
6283 Register &MatchInfo) const {
6284 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6285
6286 Register LHS = MI.getOperand(1).getReg();
6287 MatchInfo = MI.getOperand(2).getReg();
6288 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6289
6290 const auto LHSCst = Ty.isVector()
6291 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6293 if (!LHSCst)
6294 return false;
6295
6296 // -0.0 is always allowed
6297 if (LHSCst->Value.isNegZero())
6298 return true;
6299
6300 // +0.0 is only allowed if nsz is set.
6301 if (LHSCst->Value.isPosZero())
6302 return MI.getFlag(MachineInstr::FmNsz);
6303
6304 return false;
6305}
6306
6308 Register &MatchInfo) const {
6309 Register Dst = MI.getOperand(0).getReg();
6310 Builder.buildFNeg(
6311 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6312 eraseInst(MI);
6313}
6314
6315/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6316/// due to global flags or MachineInstr flags.
6317static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6318 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6319 return false;
6320 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6321}
6322
6323static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6324 const MachineRegisterInfo &MRI) {
6325 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6326 MRI.use_instr_nodbg_end()) >
6327 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6328 MRI.use_instr_nodbg_end());
6329}
6330
6332 bool &AllowFusionGlobally,
6333 bool &HasFMAD, bool &Aggressive,
6334 bool CanReassociate) const {
6335
6336 auto *MF = MI.getMF();
6337 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6338 const TargetOptions &Options = MF->getTarget().Options;
6339 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6340
6341 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6342 return false;
6343
6344 // Floating-point multiply-add with intermediate rounding.
6345 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6346 // Floating-point multiply-add without intermediate rounding.
6347 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6348 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6349 // No valid opcode, do not combine.
6350 if (!HasFMAD && !HasFMA)
6351 return false;
6352
6353 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6354 // If the addition is not contractable, do not combine.
6355 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6356 return false;
6357
6358 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6359 return true;
6360}
6361
6364 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6365 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6366
6367 bool AllowFusionGlobally, HasFMAD, Aggressive;
6368 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6369 return false;
6370
6371 Register Op1 = MI.getOperand(1).getReg();
6372 Register Op2 = MI.getOperand(2).getReg();
6373 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6374 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6375 unsigned PreferredFusedOpcode =
6376 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6377
6378 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6379 // prefer to fold the multiply with fewer uses.
6380 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6381 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6382 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6383 std::swap(LHS, RHS);
6384 }
6385
6386 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6387 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6388 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6389 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6390 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6391 {LHS.MI->getOperand(1).getReg(),
6392 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6393 };
6394 return true;
6395 }
6396
6397 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6398 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6399 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6400 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6401 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6402 {RHS.MI->getOperand(1).getReg(),
6403 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6404 };
6405 return true;
6406 }
6407
6408 return false;
6409}
6410
6413 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6414 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6415
6416 bool AllowFusionGlobally, HasFMAD, Aggressive;
6417 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6418 return false;
6419
6420 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6421 Register Op1 = MI.getOperand(1).getReg();
6422 Register Op2 = MI.getOperand(2).getReg();
6423 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6424 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6425 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6426
6427 unsigned PreferredFusedOpcode =
6428 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6429
6430 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6431 // prefer to fold the multiply with fewer uses.
6432 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6433 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6434 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6435 std::swap(LHS, RHS);
6436 }
6437
6438 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6439 MachineInstr *FpExtSrc;
6440 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6441 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6442 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6443 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6444 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6445 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6446 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6447 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6448 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6449 };
6450 return true;
6451 }
6452
6453 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6454 // Note: Commutes FADD operands.
6455 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6456 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6457 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6458 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6459 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6460 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6461 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6462 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6463 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6464 };
6465 return true;
6466 }
6467
6468 return false;
6469}
6470
6473 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6474 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6475
6476 bool AllowFusionGlobally, HasFMAD, Aggressive;
6477 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6478 return false;
6479
6480 Register Op1 = MI.getOperand(1).getReg();
6481 Register Op2 = MI.getOperand(2).getReg();
6482 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6483 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6484 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6485
6486 unsigned PreferredFusedOpcode =
6487 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6488
6489 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6490 // prefer to fold the multiply with fewer uses.
6491 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6492 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6493 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6494 std::swap(LHS, RHS);
6495 }
6496
6497 MachineInstr *FMA = nullptr;
6498 Register Z;
6499 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6500 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6501 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6502 TargetOpcode::G_FMUL) &&
6503 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6504 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6505 FMA = LHS.MI;
6506 Z = RHS.Reg;
6507 }
6508 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6509 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6510 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6511 TargetOpcode::G_FMUL) &&
6512 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6513 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6514 Z = LHS.Reg;
6515 FMA = RHS.MI;
6516 }
6517
6518 if (FMA) {
6519 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6520 Register X = FMA->getOperand(1).getReg();
6521 Register Y = FMA->getOperand(2).getReg();
6522 Register U = FMulMI->getOperand(1).getReg();
6523 Register V = FMulMI->getOperand(2).getReg();
6524
6525 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6526 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6527 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6528 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6529 {X, Y, InnerFMA});
6530 };
6531 return true;
6532 }
6533
6534 return false;
6535}
6536
6539 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6540 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6541
6542 bool AllowFusionGlobally, HasFMAD, Aggressive;
6543 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6544 return false;
6545
6546 if (!Aggressive)
6547 return false;
6548
6549 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6550 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6551 Register Op1 = MI.getOperand(1).getReg();
6552 Register Op2 = MI.getOperand(2).getReg();
6553 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6554 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6555
6556 unsigned PreferredFusedOpcode =
6557 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6558
6559 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6560 // prefer to fold the multiply with fewer uses.
6561 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6562 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6563 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6564 std::swap(LHS, RHS);
6565 }
6566
6567 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6568 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6570 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6571 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6572 Register InnerFMA =
6573 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6574 .getReg(0);
6575 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6576 {X, Y, InnerFMA});
6577 };
6578
6579 MachineInstr *FMulMI, *FMAMI;
6580 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6581 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6582 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6583 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6584 m_GFPExt(m_MInstr(FMulMI))) &&
6585 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6586 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6587 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6588 MatchInfo = [=](MachineIRBuilder &B) {
6589 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6590 FMulMI->getOperand(2).getReg(), RHS.Reg,
6591 LHS.MI->getOperand(1).getReg(),
6592 LHS.MI->getOperand(2).getReg(), B);
6593 };
6594 return true;
6595 }
6596
6597 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6598 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6599 // FIXME: This turns two single-precision and one double-precision
6600 // operation into two double-precision operations, which might not be
6601 // interesting for all targets, especially GPUs.
6602 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6603 FMAMI->getOpcode() == PreferredFusedOpcode) {
6604 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6605 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6606 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6607 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6608 MatchInfo = [=](MachineIRBuilder &B) {
6609 Register X = FMAMI->getOperand(1).getReg();
6610 Register Y = FMAMI->getOperand(2).getReg();
6611 X = B.buildFPExt(DstType, X).getReg(0);
6612 Y = B.buildFPExt(DstType, Y).getReg(0);
6613 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6614 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6615 };
6616
6617 return true;
6618 }
6619 }
6620
6621 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6622 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6623 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6624 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6625 m_GFPExt(m_MInstr(FMulMI))) &&
6626 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6627 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6628 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6629 MatchInfo = [=](MachineIRBuilder &B) {
6630 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6631 FMulMI->getOperand(2).getReg(), LHS.Reg,
6632 RHS.MI->getOperand(1).getReg(),
6633 RHS.MI->getOperand(2).getReg(), B);
6634 };
6635 return true;
6636 }
6637
6638 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6639 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6640 // FIXME: This turns two single-precision and one double-precision
6641 // operation into two double-precision operations, which might not be
6642 // interesting for all targets, especially GPUs.
6643 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6644 FMAMI->getOpcode() == PreferredFusedOpcode) {
6645 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6646 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6647 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6648 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6649 MatchInfo = [=](MachineIRBuilder &B) {
6650 Register X = FMAMI->getOperand(1).getReg();
6651 Register Y = FMAMI->getOperand(2).getReg();
6652 X = B.buildFPExt(DstType, X).getReg(0);
6653 Y = B.buildFPExt(DstType, Y).getReg(0);
6654 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6655 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6656 };
6657 return true;
6658 }
6659 }
6660
6661 return false;
6662}
6663
6666 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6667 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6668
6669 bool AllowFusionGlobally, HasFMAD, Aggressive;
6670 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6671 return false;
6672
6673 Register Op1 = MI.getOperand(1).getReg();
6674 Register Op2 = MI.getOperand(2).getReg();
6675 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6676 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6677 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6678
6679 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6680 // prefer to fold the multiply with fewer uses.
6681 int FirstMulHasFewerUses = true;
6682 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6683 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6684 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6685 FirstMulHasFewerUses = false;
6686
6687 unsigned PreferredFusedOpcode =
6688 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6689
6690 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6691 if (FirstMulHasFewerUses &&
6692 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6693 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6694 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6695 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6696 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6697 {LHS.MI->getOperand(1).getReg(),
6698 LHS.MI->getOperand(2).getReg(), NegZ});
6699 };
6700 return true;
6701 }
6702 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6703 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6704 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6705 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6706 Register NegY =
6707 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6708 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6709 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6710 };
6711 return true;
6712 }
6713
6714 return false;
6715}
6716
6719 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6720 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6721
6722 bool AllowFusionGlobally, HasFMAD, Aggressive;
6723 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6724 return false;
6725
6726 Register LHSReg = MI.getOperand(1).getReg();
6727 Register RHSReg = MI.getOperand(2).getReg();
6728 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6729
6730 unsigned PreferredFusedOpcode =
6731 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6732
6733 MachineInstr *FMulMI;
6734 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6735 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6736 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6737 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6738 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6739 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6740 Register NegX =
6741 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6742 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6743 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6744 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6745 };
6746 return true;
6747 }
6748
6749 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6750 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6751 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6752 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6753 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6754 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6755 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6756 {FMulMI->getOperand(1).getReg(),
6757 FMulMI->getOperand(2).getReg(), LHSReg});
6758 };
6759 return true;
6760 }
6761
6762 return false;
6763}
6764
6767 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6768 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6769
6770 bool AllowFusionGlobally, HasFMAD, Aggressive;
6771 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6772 return false;
6773
6774 Register LHSReg = MI.getOperand(1).getReg();
6775 Register RHSReg = MI.getOperand(2).getReg();
6776 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6777
6778 unsigned PreferredFusedOpcode =
6779 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6780
6781 MachineInstr *FMulMI;
6782 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6783 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6784 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6785 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6786 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6787 Register FpExtX =
6788 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6789 Register FpExtY =
6790 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6791 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6792 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6793 {FpExtX, FpExtY, NegZ});
6794 };
6795 return true;
6796 }
6797
6798 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6799 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6800 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6801 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6802 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6803 Register FpExtY =
6804 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6805 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6806 Register FpExtZ =
6807 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6808 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6809 {NegY, FpExtZ, LHSReg});
6810 };
6811 return true;
6812 }
6813
6814 return false;
6815}
6816
6819 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6820 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6821
6822 bool AllowFusionGlobally, HasFMAD, Aggressive;
6823 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6824 return false;
6825
6826 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6827 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6828 Register LHSReg = MI.getOperand(1).getReg();
6829 Register RHSReg = MI.getOperand(2).getReg();
6830
6831 unsigned PreferredFusedOpcode =
6832 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6833
6834 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6836 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6837 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6838 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6839 };
6840
6841 MachineInstr *FMulMI;
6842 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6843 // (fneg (fma (fpext x), (fpext y), z))
6844 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6845 // (fneg (fma (fpext x), (fpext y), z))
6846 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6847 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6848 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6849 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6850 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6851 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6852 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6853 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6854 FMulMI->getOperand(2).getReg(), RHSReg, B);
6855 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6856 };
6857 return true;
6858 }
6859
6860 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6861 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6862 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6863 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6864 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6865 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6866 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6867 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6868 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6869 FMulMI->getOperand(2).getReg(), LHSReg, B);
6870 };
6871 return true;
6872 }
6873
6874 return false;
6875}
6876
6878 unsigned &IdxToPropagate) const {
6879 bool PropagateNaN;
6880 switch (MI.getOpcode()) {
6881 default:
6882 return false;
6883 case TargetOpcode::G_FMINNUM:
6884 case TargetOpcode::G_FMAXNUM:
6885 PropagateNaN = false;
6886 break;
6887 case TargetOpcode::G_FMINIMUM:
6888 case TargetOpcode::G_FMAXIMUM:
6889 PropagateNaN = true;
6890 break;
6891 }
6892
6893 auto MatchNaN = [&](unsigned Idx) {
6894 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6895 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6896 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6897 return false;
6898 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6899 return true;
6900 };
6901
6902 return MatchNaN(1) || MatchNaN(2);
6903}
6904
6905// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6906// reciprocal.
6907// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6909 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6910 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6911
6912 Register X = MI.getOperand(1).getReg();
6913 Register Y = MI.getOperand(2).getReg();
6914
6915 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6916 return false;
6917
6918 auto IsOne = [this](Register X) {
6919 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6920 return N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0));
6921 };
6922
6923 // Skip if current node is a reciprocal/fneg-reciprocal.
6924 if (IsOne(X))
6925 return false;
6926
6927 // Exit early if the target does not want this transform or if there can't
6928 // possibly be enough uses of the divisor to make the transform worthwhile.
6929 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6930 if (!MinUses)
6931 return false;
6932
6933 // Find all FDIV users of the same divisor. For the moment we limit all
6934 // instructions to a single BB and use the first Instr in MatchInfo as the
6935 // dominating position.
6936 MatchInfo.push_back(&MI);
6937 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6938 if (&U == &MI || U.getParent() != MI.getParent())
6939 continue;
6940 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6941 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y &&
6942 !IsOne(U.getOperand(1).getReg())) {
6943 // This division is eligible for optimization only if global unsafe math
6944 // is enabled or if this division allows reciprocal formation.
6945 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6946 MatchInfo.push_back(&U);
6947 if (dominates(U, *MatchInfo[0]))
6948 std::swap(MatchInfo[0], MatchInfo.back());
6949 }
6950 }
6951 }
6952
6953 // Now that we have the actual number of divisor uses, make sure it meets
6954 // the minimum threshold specified by the target.
6955 return MatchInfo.size() >= MinUses;
6956}
6957
6959 SmallVector<MachineInstr *> &MatchInfo) const {
6960 // Generate the new div at the position of the first instruction, that we have
6961 // ensured will dominate all other instructions.
6962 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6963 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6964 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6965 MatchInfo[0]->getOperand(2).getReg(),
6966 MatchInfo[0]->getFlags());
6967
6968 // Replace all found div's with fmul instructions.
6969 for (MachineInstr *MI : MatchInfo) {
6970 Builder.setInsertPt(*MI->getParent(), MI);
6971 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6972 Div->getOperand(0).getReg(), MI->getFlags());
6973 MI->eraseFromParent();
6974 }
6975}
6976
6978 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6979 Register LHS = MI.getOperand(1).getReg();
6980 Register RHS = MI.getOperand(2).getReg();
6981
6982 // Helper lambda to check for opportunities for
6983 // A + (B - A) -> B
6984 // (B - A) + A -> B
6985 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6986 Register Reg;
6987 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6988 Reg == MaybeSameReg;
6989 };
6990 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6991}
6992
6994 Register &MatchInfo) const {
6995 // This combine folds the following patterns:
6996 //
6997 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6998 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6999 // into
7000 // x
7001 // if
7002 // k == sizeof(VecEltTy)/2
7003 // type(x) == type(dst)
7004 //
7005 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
7006 // into
7007 // x
7008 // if
7009 // type(x) == type(dst)
7010
7011 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
7012 LLT DstEltTy = DstVecTy.getElementType();
7013
7014 Register Lo, Hi;
7015
7016 if (mi_match(
7017 MI, MRI,
7019 MatchInfo = Lo;
7020 return MRI.getType(MatchInfo) == DstVecTy;
7021 }
7022
7023 std::optional<ValueAndVReg> ShiftAmount;
7024 const auto LoPattern = m_GBitcast(m_Reg(Lo));
7025 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
7026 if (mi_match(
7027 MI, MRI,
7028 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
7029 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
7030 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
7031 MatchInfo = Lo;
7032 return MRI.getType(MatchInfo) == DstVecTy;
7033 }
7034 }
7035
7036 return false;
7037}
7038
7040 Register &MatchInfo) const {
7041 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
7042 // if type(x) == type(G_TRUNC)
7043 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7044 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
7045 return false;
7046
7047 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
7048}
7049
7051 Register &MatchInfo) const {
7052 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
7053 // y if K == size of vector element type
7054 std::optional<ValueAndVReg> ShiftAmt;
7055 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7057 m_GCst(ShiftAmt))))
7058 return false;
7059
7060 LLT MatchTy = MRI.getType(MatchInfo);
7061 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
7062 MatchTy == MRI.getType(MI.getOperand(0).getReg());
7063}
7064
7065unsigned CombinerHelper::getFPMinMaxOpcForSelect(
7066 CmpInst::Predicate Pred, LLT DstTy,
7067 SelectPatternNaNBehaviour VsNaNRetVal) const {
7068 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
7069 "Expected a NaN behaviour?");
7070 // Choose an opcode based off of legality or the behaviour when one of the
7071 // LHS/RHS may be NaN.
7072 switch (Pred) {
7073 default:
7074 return 0;
7075 case CmpInst::FCMP_UGT:
7076 case CmpInst::FCMP_UGE:
7077 case CmpInst::FCMP_OGT:
7078 case CmpInst::FCMP_OGE:
7079 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7080 return TargetOpcode::G_FMAXNUM;
7081 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7082 return TargetOpcode::G_FMAXIMUM;
7083 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
7084 return TargetOpcode::G_FMAXNUM;
7085 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
7086 return TargetOpcode::G_FMAXIMUM;
7087 return 0;
7088 case CmpInst::FCMP_ULT:
7089 case CmpInst::FCMP_ULE:
7090 case CmpInst::FCMP_OLT:
7091 case CmpInst::FCMP_OLE:
7092 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7093 return TargetOpcode::G_FMINNUM;
7094 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7095 return TargetOpcode::G_FMINIMUM;
7096 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
7097 return TargetOpcode::G_FMINNUM;
7098 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7099 return 0;
7100 return TargetOpcode::G_FMINIMUM;
7101 }
7102}
7103
7104CombinerHelper::SelectPatternNaNBehaviour
7105CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7106 bool IsOrderedComparison) const {
7107 bool LHSSafe = VT->isKnownNeverNaN(LHS);
7108 bool RHSSafe = VT->isKnownNeverNaN(RHS);
7109 // Completely unsafe.
7110 if (!LHSSafe && !RHSSafe)
7111 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7112 if (LHSSafe && RHSSafe)
7113 return SelectPatternNaNBehaviour::RETURNS_ANY;
7114 // An ordered comparison will return false when given a NaN, so it
7115 // returns the RHS.
7116 if (IsOrderedComparison)
7117 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7118 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7119 // An unordered comparison will return true when given a NaN, so it
7120 // returns the LHS.
7121 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7122 : SelectPatternNaNBehaviour::RETURNS_NAN;
7123}
7124
7125bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7126 Register TrueVal, Register FalseVal,
7127 BuildFnTy &MatchInfo) const {
7128 // Match: select (fcmp cond x, y) x, y
7129 // select (fcmp cond x, y) y, x
7130 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7131 LLT DstTy = MRI.getType(Dst);
7132 // Bail out early on pointers, since we'll never want to fold to a min/max.
7133 if (DstTy.isPointer())
7134 return false;
7135 // Match a floating point compare with a less-than/greater-than predicate.
7136 // TODO: Allow multiple users of the compare if they are all selects.
7137 CmpInst::Predicate Pred;
7138 Register CmpLHS, CmpRHS;
7139 if (!mi_match(Cond, MRI,
7141 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7142 CmpInst::isEquality(Pred))
7143 return false;
7144 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7145 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7146 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7147 return false;
7148 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7149 std::swap(CmpLHS, CmpRHS);
7150 Pred = CmpInst::getSwappedPredicate(Pred);
7151 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7152 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7153 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7154 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7155 }
7156 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7157 return false;
7158 // Decide what type of max/min this should be based off of the predicate.
7159 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7160 if (!Opc || !isLegal({Opc, {DstTy}}))
7161 return false;
7162 // Comparisons between signed zero and zero may have different results...
7163 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7164 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7165 // We don't know if a comparison between two 0s will give us a consistent
7166 // result. Be conservative and only proceed if at least one side is
7167 // non-zero.
7168 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7169 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7170 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7171 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7172 return false;
7173 }
7174 }
7175 MatchInfo = [=](MachineIRBuilder &B) {
7176 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7177 };
7178 return true;
7179}
7180
7182 BuildFnTy &MatchInfo) const {
7183 // TODO: Handle integer cases.
7184 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7185 // Condition may be fed by a truncated compare.
7186 Register Cond = MI.getOperand(1).getReg();
7187 Register MaybeTrunc;
7188 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7189 Cond = MaybeTrunc;
7190 Register Dst = MI.getOperand(0).getReg();
7191 Register TrueVal = MI.getOperand(2).getReg();
7192 Register FalseVal = MI.getOperand(3).getReg();
7193 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7194}
7195
7197 BuildFnTy &MatchInfo) const {
7198 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7199 // (X + Y) == X --> Y == 0
7200 // (X + Y) != X --> Y != 0
7201 // (X - Y) == X --> Y == 0
7202 // (X - Y) != X --> Y != 0
7203 // (X ^ Y) == X --> Y == 0
7204 // (X ^ Y) != X --> Y != 0
7205 Register Dst = MI.getOperand(0).getReg();
7206 CmpInst::Predicate Pred;
7207 Register X, Y, OpLHS, OpRHS;
7208 bool MatchedSub = mi_match(
7209 Dst, MRI,
7210 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7211 if (MatchedSub && X != OpLHS)
7212 return false;
7213 if (!MatchedSub) {
7214 if (!mi_match(Dst, MRI,
7215 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7216 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7217 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7218 return false;
7219 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7220 }
7221 MatchInfo = [=](MachineIRBuilder &B) {
7222 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7223 B.buildICmp(Pred, Dst, Y, Zero);
7224 };
7225 return CmpInst::isEquality(Pred) && Y.isValid();
7226}
7227
7228/// Return the minimum useless shift amount that results in complete loss of the
7229/// source value. Return std::nullopt when it cannot determine a value.
7230static std::optional<unsigned>
7231getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7232 std::optional<int64_t> &Result) {
7233 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7234 Opcode == TargetOpcode::G_ASHR) &&
7235 "Expect G_SHL, G_LSHR or G_ASHR.");
7236 auto SignificantBits = 0;
7237 switch (Opcode) {
7238 case TargetOpcode::G_SHL:
7239 SignificantBits = ValueKB.countMinTrailingZeros();
7240 Result = 0;
7241 break;
7242 case TargetOpcode::G_LSHR:
7243 Result = 0;
7244 SignificantBits = ValueKB.countMinLeadingZeros();
7245 break;
7246 case TargetOpcode::G_ASHR:
7247 if (ValueKB.isNonNegative()) {
7248 SignificantBits = ValueKB.countMinLeadingZeros();
7249 Result = 0;
7250 } else if (ValueKB.isNegative()) {
7251 SignificantBits = ValueKB.countMinLeadingOnes();
7252 Result = -1;
7253 } else {
7254 // Cannot determine shift result.
7255 Result = std::nullopt;
7256 }
7257 break;
7258 default:
7259 break;
7260 }
7261 return ValueKB.getBitWidth() - SignificantBits;
7262}
7263
7265 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7266 Register ShiftVal = MI.getOperand(1).getReg();
7267 Register ShiftReg = MI.getOperand(2).getReg();
7268 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7269 auto IsShiftTooBig = [&](const Constant *C) {
7270 auto *CI = dyn_cast<ConstantInt>(C);
7271 if (!CI)
7272 return false;
7273 if (CI->uge(ResTy.getScalarSizeInBits())) {
7274 MatchInfo = std::nullopt;
7275 return true;
7276 }
7277 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7278 MI.getOpcode(), MatchInfo);
7279 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7280 };
7281 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7282}
7283
7285 unsigned LHSOpndIdx = 1;
7286 unsigned RHSOpndIdx = 2;
7287 switch (MI.getOpcode()) {
7288 case TargetOpcode::G_UADDO:
7289 case TargetOpcode::G_SADDO:
7290 case TargetOpcode::G_UMULO:
7291 case TargetOpcode::G_SMULO:
7292 LHSOpndIdx = 2;
7293 RHSOpndIdx = 3;
7294 break;
7295 default:
7296 break;
7297 }
7298 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7299 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7300 if (!getIConstantVRegVal(LHS, MRI)) {
7301 // Skip commuting if LHS is not a constant. But, LHS may be a
7302 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7303 // have a constant on the RHS.
7304 if (MRI.getVRegDef(LHS)->getOpcode() !=
7305 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7306 return false;
7307 }
7308 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7309 return MRI.getVRegDef(RHS)->getOpcode() !=
7310 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7311 !getIConstantVRegVal(RHS, MRI);
7312}
7313
7315 Register LHS = MI.getOperand(1).getReg();
7316 Register RHS = MI.getOperand(2).getReg();
7317 std::optional<FPValueAndVReg> ValAndVReg;
7318 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7319 return false;
7320 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7321}
7322
7324 Observer.changingInstr(MI);
7325 unsigned LHSOpndIdx = 1;
7326 unsigned RHSOpndIdx = 2;
7327 switch (MI.getOpcode()) {
7328 case TargetOpcode::G_UADDO:
7329 case TargetOpcode::G_SADDO:
7330 case TargetOpcode::G_UMULO:
7331 case TargetOpcode::G_SMULO:
7332 LHSOpndIdx = 2;
7333 RHSOpndIdx = 3;
7334 break;
7335 default:
7336 break;
7337 }
7338 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7339 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7340 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7341 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7342 Observer.changedInstr(MI);
7343}
7344
7345bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7346 LLT SrcTy = MRI.getType(Src);
7347 if (SrcTy.isFixedVector())
7348 return isConstantSplatVector(Src, 1, AllowUndefs);
7349 if (SrcTy.isScalar()) {
7350 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7351 return true;
7352 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7353 return IConstant && IConstant->Value == 1;
7354 }
7355 return false; // scalable vector
7356}
7357
7358bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7359 LLT SrcTy = MRI.getType(Src);
7360 if (SrcTy.isFixedVector())
7361 return isConstantSplatVector(Src, 0, AllowUndefs);
7362 if (SrcTy.isScalar()) {
7363 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7364 return true;
7365 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7366 return IConstant && IConstant->Value == 0;
7367 }
7368 return false; // scalable vector
7369}
7370
7371// Ignores COPYs during conformance checks.
7372// FIXME scalable vectors.
7373bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7374 bool AllowUndefs) const {
7375 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7376 if (!BuildVector)
7377 return false;
7378 unsigned NumSources = BuildVector->getNumSources();
7379
7380 for (unsigned I = 0; I < NumSources; ++I) {
7381 GImplicitDef *ImplicitDef =
7383 if (ImplicitDef && AllowUndefs)
7384 continue;
7385 if (ImplicitDef && !AllowUndefs)
7386 return false;
7387 std::optional<ValueAndVReg> IConstant =
7389 if (IConstant && IConstant->Value == SplatValue)
7390 continue;
7391 return false;
7392 }
7393 return true;
7394}
7395
7396// Ignores COPYs during lookups.
7397// FIXME scalable vectors
7398std::optional<APInt>
7399CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7400 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7401 if (IConstant)
7402 return IConstant->Value;
7403
7404 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7405 if (!BuildVector)
7406 return std::nullopt;
7407 unsigned NumSources = BuildVector->getNumSources();
7408
7409 std::optional<APInt> Value = std::nullopt;
7410 for (unsigned I = 0; I < NumSources; ++I) {
7411 std::optional<ValueAndVReg> IConstant =
7413 if (!IConstant)
7414 return std::nullopt;
7415 if (!Value)
7416 Value = IConstant->Value;
7417 else if (*Value != IConstant->Value)
7418 return std::nullopt;
7419 }
7420 return Value;
7421}
7422
7423// FIXME G_SPLAT_VECTOR
7424bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7425 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7426 if (IConstant)
7427 return true;
7428
7429 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7430 if (!BuildVector)
7431 return false;
7432
7433 unsigned NumSources = BuildVector->getNumSources();
7434 for (unsigned I = 0; I < NumSources; ++I) {
7435 std::optional<ValueAndVReg> IConstant =
7437 if (!IConstant)
7438 return false;
7439 }
7440 return true;
7441}
7442
7443// TODO: use knownbits to determine zeros
7444bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7445 BuildFnTy &MatchInfo) const {
7446 uint32_t Flags = Select->getFlags();
7447 Register Dest = Select->getReg(0);
7448 Register Cond = Select->getCondReg();
7449 Register True = Select->getTrueReg();
7450 Register False = Select->getFalseReg();
7451 LLT CondTy = MRI.getType(Select->getCondReg());
7452 LLT TrueTy = MRI.getType(Select->getTrueReg());
7453
7454 // We only do this combine for scalar boolean conditions.
7455 if (CondTy != LLT::scalar(1))
7456 return false;
7457
7458 if (TrueTy.isPointer())
7459 return false;
7460
7461 // Both are scalars.
7462 std::optional<ValueAndVReg> TrueOpt =
7464 std::optional<ValueAndVReg> FalseOpt =
7466
7467 if (!TrueOpt || !FalseOpt)
7468 return false;
7469
7470 APInt TrueValue = TrueOpt->Value;
7471 APInt FalseValue = FalseOpt->Value;
7472
7473 // select Cond, 1, 0 --> zext (Cond)
7474 if (TrueValue.isOne() && FalseValue.isZero()) {
7475 MatchInfo = [=](MachineIRBuilder &B) {
7476 B.setInstrAndDebugLoc(*Select);
7477 B.buildZExtOrTrunc(Dest, Cond);
7478 };
7479 return true;
7480 }
7481
7482 // select Cond, -1, 0 --> sext (Cond)
7483 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7484 MatchInfo = [=](MachineIRBuilder &B) {
7485 B.setInstrAndDebugLoc(*Select);
7486 B.buildSExtOrTrunc(Dest, Cond);
7487 };
7488 return true;
7489 }
7490
7491 // select Cond, 0, 1 --> zext (!Cond)
7492 if (TrueValue.isZero() && FalseValue.isOne()) {
7493 MatchInfo = [=](MachineIRBuilder &B) {
7494 B.setInstrAndDebugLoc(*Select);
7495 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7496 B.buildNot(Inner, Cond);
7497 B.buildZExtOrTrunc(Dest, Inner);
7498 };
7499 return true;
7500 }
7501
7502 // select Cond, 0, -1 --> sext (!Cond)
7503 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7504 MatchInfo = [=](MachineIRBuilder &B) {
7505 B.setInstrAndDebugLoc(*Select);
7506 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7507 B.buildNot(Inner, Cond);
7508 B.buildSExtOrTrunc(Dest, Inner);
7509 };
7510 return true;
7511 }
7512
7513 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7514 if (TrueValue - 1 == FalseValue) {
7515 MatchInfo = [=](MachineIRBuilder &B) {
7516 B.setInstrAndDebugLoc(*Select);
7517 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7518 B.buildZExtOrTrunc(Inner, Cond);
7519 B.buildAdd(Dest, Inner, False);
7520 };
7521 return true;
7522 }
7523
7524 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7525 if (TrueValue + 1 == FalseValue) {
7526 MatchInfo = [=](MachineIRBuilder &B) {
7527 B.setInstrAndDebugLoc(*Select);
7528 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7529 B.buildSExtOrTrunc(Inner, Cond);
7530 B.buildAdd(Dest, Inner, False);
7531 };
7532 return true;
7533 }
7534
7535 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7536 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7537 MatchInfo = [=](MachineIRBuilder &B) {
7538 B.setInstrAndDebugLoc(*Select);
7539 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7540 B.buildZExtOrTrunc(Inner, Cond);
7541 // The shift amount must be scalar.
7542 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7543 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7544 B.buildShl(Dest, Inner, ShAmtC, Flags);
7545 };
7546 return true;
7547 }
7548
7549 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7550 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7551 MatchInfo = [=](MachineIRBuilder &B) {
7552 B.setInstrAndDebugLoc(*Select);
7553 Register Not = MRI.createGenericVirtualRegister(CondTy);
7554 B.buildNot(Not, Cond);
7555 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7556 B.buildZExtOrTrunc(Inner, Not);
7557 // The shift amount must be scalar.
7558 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7559 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7560 B.buildShl(Dest, Inner, ShAmtC, Flags);
7561 };
7562 return true;
7563 }
7564
7565 // select Cond, -1, C --> or (sext Cond), C
7566 if (TrueValue.isAllOnes()) {
7567 MatchInfo = [=](MachineIRBuilder &B) {
7568 B.setInstrAndDebugLoc(*Select);
7569 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7570 B.buildSExtOrTrunc(Inner, Cond);
7571 B.buildOr(Dest, Inner, False, Flags);
7572 };
7573 return true;
7574 }
7575
7576 // select Cond, C, -1 --> or (sext (not Cond)), C
7577 if (FalseValue.isAllOnes()) {
7578 MatchInfo = [=](MachineIRBuilder &B) {
7579 B.setInstrAndDebugLoc(*Select);
7580 Register Not = MRI.createGenericVirtualRegister(CondTy);
7581 B.buildNot(Not, Cond);
7582 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7583 B.buildSExtOrTrunc(Inner, Not);
7584 B.buildOr(Dest, Inner, True, Flags);
7585 };
7586 return true;
7587 }
7588
7589 return false;
7590}
7591
7592// TODO: use knownbits to determine zeros
7593bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7594 BuildFnTy &MatchInfo) const {
7595 uint32_t Flags = Select->getFlags();
7596 Register DstReg = Select->getReg(0);
7597 Register Cond = Select->getCondReg();
7598 Register True = Select->getTrueReg();
7599 Register False = Select->getFalseReg();
7600 LLT CondTy = MRI.getType(Select->getCondReg());
7601 LLT TrueTy = MRI.getType(Select->getTrueReg());
7602
7603 // Boolean or fixed vector of booleans.
7604 if (CondTy.isScalableVector() ||
7605 (CondTy.isFixedVector() &&
7606 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7607 CondTy.getScalarSizeInBits() != 1)
7608 return false;
7609
7610 if (CondTy != TrueTy)
7611 return false;
7612
7613 // select Cond, Cond, F --> or Cond, F
7614 // select Cond, 1, F --> or Cond, F
7615 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7616 MatchInfo = [=](MachineIRBuilder &B) {
7617 B.setInstrAndDebugLoc(*Select);
7618 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7619 B.buildZExtOrTrunc(Ext, Cond);
7620 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7621 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7622 };
7623 return true;
7624 }
7625
7626 // select Cond, T, Cond --> and Cond, T
7627 // select Cond, T, 0 --> and Cond, T
7628 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7629 MatchInfo = [=](MachineIRBuilder &B) {
7630 B.setInstrAndDebugLoc(*Select);
7631 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7632 B.buildZExtOrTrunc(Ext, Cond);
7633 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7634 B.buildAnd(DstReg, Ext, FreezeTrue);
7635 };
7636 return true;
7637 }
7638
7639 // select Cond, T, 1 --> or (not Cond), T
7640 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7641 MatchInfo = [=](MachineIRBuilder &B) {
7642 B.setInstrAndDebugLoc(*Select);
7643 // First the not.
7644 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7645 B.buildNot(Inner, Cond);
7646 // Then an ext to match the destination register.
7647 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7648 B.buildZExtOrTrunc(Ext, Inner);
7649 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7650 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7651 };
7652 return true;
7653 }
7654
7655 // select Cond, 0, F --> and (not Cond), F
7656 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7657 MatchInfo = [=](MachineIRBuilder &B) {
7658 B.setInstrAndDebugLoc(*Select);
7659 // First the not.
7660 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7661 B.buildNot(Inner, Cond);
7662 // Then an ext to match the destination register.
7663 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7664 B.buildZExtOrTrunc(Ext, Inner);
7665 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7666 B.buildAnd(DstReg, Ext, FreezeFalse);
7667 };
7668 return true;
7669 }
7670
7671 return false;
7672}
7673
7675 BuildFnTy &MatchInfo) const {
7676 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7677 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7678
7679 Register DstReg = Select->getReg(0);
7680 Register True = Select->getTrueReg();
7681 Register False = Select->getFalseReg();
7682 LLT DstTy = MRI.getType(DstReg);
7683
7684 if (DstTy.isPointerOrPointerVector())
7685 return false;
7686
7687 // We want to fold the icmp and replace the select.
7688 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7689 return false;
7690
7691 CmpInst::Predicate Pred = Cmp->getCond();
7692 // We need a larger or smaller predicate for
7693 // canonicalization.
7694 if (CmpInst::isEquality(Pred))
7695 return false;
7696
7697 Register CmpLHS = Cmp->getLHSReg();
7698 Register CmpRHS = Cmp->getRHSReg();
7699
7700 // We can swap CmpLHS and CmpRHS for higher hitrate.
7701 if (True == CmpRHS && False == CmpLHS) {
7702 std::swap(CmpLHS, CmpRHS);
7703 Pred = CmpInst::getSwappedPredicate(Pred);
7704 }
7705
7706 // (icmp X, Y) ? X : Y -> integer minmax.
7707 // see matchSelectPattern in ValueTracking.
7708 // Legality between G_SELECT and integer minmax can differ.
7709 if (True != CmpLHS || False != CmpRHS)
7710 return false;
7711
7712 switch (Pred) {
7713 case ICmpInst::ICMP_UGT:
7714 case ICmpInst::ICMP_UGE: {
7715 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7716 return false;
7717 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7718 return true;
7719 }
7720 case ICmpInst::ICMP_SGT:
7721 case ICmpInst::ICMP_SGE: {
7722 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7723 return false;
7724 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7725 return true;
7726 }
7727 case ICmpInst::ICMP_ULT:
7728 case ICmpInst::ICMP_ULE: {
7729 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7730 return false;
7731 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7732 return true;
7733 }
7734 case ICmpInst::ICMP_SLT:
7735 case ICmpInst::ICMP_SLE: {
7736 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7737 return false;
7738 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7739 return true;
7740 }
7741 default:
7742 return false;
7743 }
7744}
7745
7746// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7748 BuildFnTy &MatchInfo) const {
7749 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7750 Register DestReg = MI.getOperand(0).getReg();
7751 LLT DestTy = MRI.getType(DestReg);
7752
7753 Register X;
7754 Register Sub0;
7755 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7756 if (mi_match(DestReg, MRI,
7757 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7758 m_GSMax(m_Reg(X), NegPattern),
7759 m_GUMin(m_Reg(X), NegPattern),
7760 m_GUMax(m_Reg(X), NegPattern)))))) {
7761 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7762 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7763 if (isLegal({NewOpc, {DestTy}})) {
7764 MatchInfo = [=](MachineIRBuilder &B) {
7765 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7766 };
7767 return true;
7768 }
7769 }
7770
7771 return false;
7772}
7773
7776
7777 if (tryFoldSelectOfConstants(Select, MatchInfo))
7778 return true;
7779
7780 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7781 return true;
7782
7783 return false;
7784}
7785
7786/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7787/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7788/// into a single comparison using range-based reasoning.
7789/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7790bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7791 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7792 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7793 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7794 Register DstReg = Logic->getReg(0);
7795 Register LHS = Logic->getLHSReg();
7796 Register RHS = Logic->getRHSReg();
7797 unsigned Flags = Logic->getFlags();
7798
7799 // We need an G_ICMP on the LHS register.
7800 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7801 if (!Cmp1)
7802 return false;
7803
7804 // We need an G_ICMP on the RHS register.
7805 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7806 if (!Cmp2)
7807 return false;
7808
7809 // We want to fold the icmps.
7810 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7811 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7812 return false;
7813
7814 APInt C1;
7815 APInt C2;
7816 std::optional<ValueAndVReg> MaybeC1 =
7818 if (!MaybeC1)
7819 return false;
7820 C1 = MaybeC1->Value;
7821
7822 std::optional<ValueAndVReg> MaybeC2 =
7824 if (!MaybeC2)
7825 return false;
7826 C2 = MaybeC2->Value;
7827
7828 Register R1 = Cmp1->getLHSReg();
7829 Register R2 = Cmp2->getLHSReg();
7830 CmpInst::Predicate Pred1 = Cmp1->getCond();
7831 CmpInst::Predicate Pred2 = Cmp2->getCond();
7832 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7833 LLT CmpOperandTy = MRI.getType(R1);
7834
7835 if (CmpOperandTy.isPointer())
7836 return false;
7837
7838 // We build ands, adds, and constants of type CmpOperandTy.
7839 // They must be legal to build.
7840 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7841 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7842 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7843 return false;
7844
7845 // Look through add of a constant offset on R1, R2, or both operands. This
7846 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7847 std::optional<APInt> Offset1;
7848 std::optional<APInt> Offset2;
7849 if (R1 != R2) {
7850 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7851 std::optional<ValueAndVReg> MaybeOffset1 =
7853 if (MaybeOffset1) {
7854 R1 = Add->getLHSReg();
7855 Offset1 = MaybeOffset1->Value;
7856 }
7857 }
7858 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7859 std::optional<ValueAndVReg> MaybeOffset2 =
7861 if (MaybeOffset2) {
7862 R2 = Add->getLHSReg();
7863 Offset2 = MaybeOffset2->Value;
7864 }
7865 }
7866 }
7867
7868 if (R1 != R2)
7869 return false;
7870
7871 // We calculate the icmp ranges including maybe offsets.
7872 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7873 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7874 if (Offset1)
7875 CR1 = CR1.subtract(*Offset1);
7876
7877 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7878 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7879 if (Offset2)
7880 CR2 = CR2.subtract(*Offset2);
7881
7882 bool CreateMask = false;
7883 APInt LowerDiff;
7884 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7885 if (!CR) {
7886 // We need non-wrapping ranges.
7887 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7888 return false;
7889
7890 // Check whether we have equal-size ranges that only differ by one bit.
7891 // In that case we can apply a mask to map one range onto the other.
7892 LowerDiff = CR1.getLower() ^ CR2.getLower();
7893 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7894 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7895 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7896 CR1Size != CR2.getUpper() - CR2.getLower())
7897 return false;
7898
7899 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7900 CreateMask = true;
7901 }
7902
7903 if (IsAnd)
7904 CR = CR->inverse();
7905
7906 CmpInst::Predicate NewPred;
7907 APInt NewC, Offset;
7908 CR->getEquivalentICmp(NewPred, NewC, Offset);
7909
7910 // We take the result type of one of the original icmps, CmpTy, for
7911 // the to be build icmp. The operand type, CmpOperandTy, is used for
7912 // the other instructions and constants to be build. The types of
7913 // the parameters and output are the same for add and and. CmpTy
7914 // and the type of DstReg might differ. That is why we zext or trunc
7915 // the icmp into the destination register.
7916
7917 MatchInfo = [=](MachineIRBuilder &B) {
7918 if (CreateMask && Offset != 0) {
7919 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7920 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7921 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7922 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7923 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7924 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7925 B.buildZExtOrTrunc(DstReg, ICmp);
7926 } else if (CreateMask && Offset == 0) {
7927 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7928 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7929 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7930 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7931 B.buildZExtOrTrunc(DstReg, ICmp);
7932 } else if (!CreateMask && Offset != 0) {
7933 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7934 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7935 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7936 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7937 B.buildZExtOrTrunc(DstReg, ICmp);
7938 } else if (!CreateMask && Offset == 0) {
7939 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7940 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7941 B.buildZExtOrTrunc(DstReg, ICmp);
7942 } else {
7943 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7944 }
7945 };
7946 return true;
7947}
7948
7949bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7950 BuildFnTy &MatchInfo) const {
7951 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7952 Register DestReg = Logic->getReg(0);
7953 Register LHS = Logic->getLHSReg();
7954 Register RHS = Logic->getRHSReg();
7955 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7956
7957 // We need a compare on the LHS register.
7958 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7959 if (!Cmp1)
7960 return false;
7961
7962 // We need a compare on the RHS register.
7963 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7964 if (!Cmp2)
7965 return false;
7966
7967 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7968 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7969
7970 // We build one fcmp, want to fold the fcmps, replace the logic op,
7971 // and the fcmps must have the same shape.
7973 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7974 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7975 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7976 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7977 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7978 return false;
7979
7980 CmpInst::Predicate PredL = Cmp1->getCond();
7981 CmpInst::Predicate PredR = Cmp2->getCond();
7982 Register LHS0 = Cmp1->getLHSReg();
7983 Register LHS1 = Cmp1->getRHSReg();
7984 Register RHS0 = Cmp2->getLHSReg();
7985 Register RHS1 = Cmp2->getRHSReg();
7986
7987 if (LHS0 == RHS1 && LHS1 == RHS0) {
7988 // Swap RHS operands to match LHS.
7989 PredR = CmpInst::getSwappedPredicate(PredR);
7990 std::swap(RHS0, RHS1);
7991 }
7992
7993 if (LHS0 == RHS0 && LHS1 == RHS1) {
7994 // We determine the new predicate.
7995 unsigned CmpCodeL = getFCmpCode(PredL);
7996 unsigned CmpCodeR = getFCmpCode(PredR);
7997 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7998 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7999 MatchInfo = [=](MachineIRBuilder &B) {
8000 // The fcmp predicates fill the lower part of the enum.
8001 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
8002 if (Pred == FCmpInst::FCMP_FALSE &&
8004 auto False = B.buildConstant(CmpTy, 0);
8005 B.buildZExtOrTrunc(DestReg, False);
8006 } else if (Pred == FCmpInst::FCMP_TRUE &&
8008 auto True =
8009 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
8010 CmpTy.isVector() /*isVector*/,
8011 true /*isFP*/));
8012 B.buildZExtOrTrunc(DestReg, True);
8013 } else { // We take the predicate without predicate optimizations.
8014 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
8015 B.buildZExtOrTrunc(DestReg, Cmp);
8016 }
8017 };
8018 return true;
8019 }
8020
8021 return false;
8022}
8023
8025 GAnd *And = cast<GAnd>(&MI);
8026
8027 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
8028 return true;
8029
8030 if (tryFoldLogicOfFCmps(And, MatchInfo))
8031 return true;
8032
8033 return false;
8034}
8035
8037 GOr *Or = cast<GOr>(&MI);
8038
8039 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
8040 return true;
8041
8042 if (tryFoldLogicOfFCmps(Or, MatchInfo))
8043 return true;
8044
8045 return false;
8046}
8047
8049 BuildFnTy &MatchInfo) const {
8051
8052 // Addo has no flags
8053 Register Dst = Add->getReg(0);
8054 Register Carry = Add->getReg(1);
8055 Register LHS = Add->getLHSReg();
8056 Register RHS = Add->getRHSReg();
8057 bool IsSigned = Add->isSigned();
8058 LLT DstTy = MRI.getType(Dst);
8059 LLT CarryTy = MRI.getType(Carry);
8060
8061 // Fold addo, if the carry is dead -> add, undef.
8062 if (MRI.use_nodbg_empty(Carry) &&
8063 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
8064 MatchInfo = [=](MachineIRBuilder &B) {
8065 B.buildAdd(Dst, LHS, RHS);
8066 B.buildUndef(Carry);
8067 };
8068 return true;
8069 }
8070
8071 // Canonicalize constant to RHS.
8072 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
8073 if (IsSigned) {
8074 MatchInfo = [=](MachineIRBuilder &B) {
8075 B.buildSAddo(Dst, Carry, RHS, LHS);
8076 };
8077 return true;
8078 }
8079 // !IsSigned
8080 MatchInfo = [=](MachineIRBuilder &B) {
8081 B.buildUAddo(Dst, Carry, RHS, LHS);
8082 };
8083 return true;
8084 }
8085
8086 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
8087 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
8088
8089 // Fold addo(c1, c2) -> c3, carry.
8090 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
8092 bool Overflow;
8093 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
8094 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
8095 MatchInfo = [=](MachineIRBuilder &B) {
8096 B.buildConstant(Dst, Result);
8097 B.buildConstant(Carry, Overflow);
8098 };
8099 return true;
8100 }
8101
8102 // Fold (addo x, 0) -> x, no carry
8103 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8104 MatchInfo = [=](MachineIRBuilder &B) {
8105 B.buildCopy(Dst, LHS);
8106 B.buildConstant(Carry, 0);
8107 };
8108 return true;
8109 }
8110
8111 // Given 2 constant operands whose sum does not overflow:
8112 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8113 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8114 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8115 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8116 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8117 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8118 std::optional<APInt> MaybeAddRHS =
8119 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8120 if (MaybeAddRHS) {
8121 bool Overflow;
8122 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8123 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8124 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8125 if (IsSigned) {
8126 MatchInfo = [=](MachineIRBuilder &B) {
8127 auto ConstRHS = B.buildConstant(DstTy, NewC);
8128 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8129 };
8130 return true;
8131 }
8132 // !IsSigned
8133 MatchInfo = [=](MachineIRBuilder &B) {
8134 auto ConstRHS = B.buildConstant(DstTy, NewC);
8135 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8136 };
8137 return true;
8138 }
8139 }
8140 };
8141
8142 // We try to combine addo to non-overflowing add.
8143 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8145 return false;
8146
8147 // We try to combine uaddo to non-overflowing add.
8148 if (!IsSigned) {
8149 ConstantRange CRLHS =
8150 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8151 ConstantRange CRRHS =
8152 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8153
8154 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8156 return false;
8158 MatchInfo = [=](MachineIRBuilder &B) {
8159 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8160 B.buildConstant(Carry, 0);
8161 };
8162 return true;
8163 }
8166 MatchInfo = [=](MachineIRBuilder &B) {
8167 B.buildAdd(Dst, LHS, RHS);
8168 B.buildConstant(Carry, 1);
8169 };
8170 return true;
8171 }
8172 }
8173 return false;
8174 }
8175
8176 // We try to combine saddo to non-overflowing add.
8177
8178 // If LHS and RHS each have at least two sign bits, then there is no signed
8179 // overflow.
8180 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8181 MatchInfo = [=](MachineIRBuilder &B) {
8182 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8183 B.buildConstant(Carry, 0);
8184 };
8185 return true;
8186 }
8187
8188 ConstantRange CRLHS =
8189 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8190 ConstantRange CRRHS =
8191 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8192
8193 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8195 return false;
8197 MatchInfo = [=](MachineIRBuilder &B) {
8198 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8199 B.buildConstant(Carry, 0);
8200 };
8201 return true;
8202 }
8205 MatchInfo = [=](MachineIRBuilder &B) {
8206 B.buildAdd(Dst, LHS, RHS);
8207 B.buildConstant(Carry, 1);
8208 };
8209 return true;
8210 }
8211 }
8212
8213 return false;
8214}
8215
8217 BuildFnTy &MatchInfo) const {
8219 MatchInfo(Builder);
8220 Root->eraseFromParent();
8221}
8222
8224 int64_t Exponent) const {
8225 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8227}
8228
8230 int64_t Exponent) const {
8231 auto [Dst, Base] = MI.getFirst2Regs();
8232 LLT Ty = MRI.getType(Dst);
8233 int64_t ExpVal = Exponent;
8234
8235 if (ExpVal == 0) {
8236 Builder.buildFConstant(Dst, 1.0);
8237 MI.removeFromParent();
8238 return;
8239 }
8240
8241 if (ExpVal < 0)
8242 ExpVal = -ExpVal;
8243
8244 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8245 // to generate the multiply sequence. There are more optimal ways to do this
8246 // (for example, powi(x,15) generates one more multiply than it should), but
8247 // this has the benefit of being both really simple and much better than a
8248 // libcall.
8249 std::optional<SrcOp> Res;
8250 SrcOp CurSquare = Base;
8251 while (ExpVal > 0) {
8252 if (ExpVal & 1) {
8253 if (!Res)
8254 Res = CurSquare;
8255 else
8256 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8257 }
8258
8259 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8260 ExpVal >>= 1;
8261 }
8262
8263 // If the original exponent was negative, invert the result, producing
8264 // 1/(x*x*x).
8265 if (Exponent < 0)
8266 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8267 MI.getFlags());
8268
8269 Builder.buildCopy(Dst, *Res);
8270 MI.eraseFromParent();
8271}
8272
8274 BuildFnTy &MatchInfo) const {
8275 // fold (A+C1)-C2 -> A+(C1-C2)
8276 const GSub *Sub = cast<GSub>(&MI);
8277 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8278
8279 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8280 return false;
8281
8282 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8283 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8284
8285 Register Dst = Sub->getReg(0);
8286 LLT DstTy = MRI.getType(Dst);
8287
8288 MatchInfo = [=](MachineIRBuilder &B) {
8289 auto Const = B.buildConstant(DstTy, C1 - C2);
8290 B.buildAdd(Dst, Add->getLHSReg(), Const);
8291 };
8292
8293 return true;
8294}
8295
8297 BuildFnTy &MatchInfo) const {
8298 // fold C2-(A+C1) -> (C2-C1)-A
8299 const GSub *Sub = cast<GSub>(&MI);
8300 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8301
8302 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8303 return false;
8304
8305 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8306 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8307
8308 Register Dst = Sub->getReg(0);
8309 LLT DstTy = MRI.getType(Dst);
8310
8311 MatchInfo = [=](MachineIRBuilder &B) {
8312 auto Const = B.buildConstant(DstTy, C2 - C1);
8313 B.buildSub(Dst, Const, Add->getLHSReg());
8314 };
8315
8316 return true;
8317}
8318
8320 BuildFnTy &MatchInfo) const {
8321 // fold (A-C1)-C2 -> A-(C1+C2)
8322 const GSub *Sub1 = cast<GSub>(&MI);
8323 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8324
8325 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8326 return false;
8327
8328 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8329 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8330
8331 Register Dst = Sub1->getReg(0);
8332 LLT DstTy = MRI.getType(Dst);
8333
8334 MatchInfo = [=](MachineIRBuilder &B) {
8335 auto Const = B.buildConstant(DstTy, C1 + C2);
8336 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8337 };
8338
8339 return true;
8340}
8341
8343 BuildFnTy &MatchInfo) const {
8344 // fold (C1-A)-C2 -> (C1-C2)-A
8345 const GSub *Sub1 = cast<GSub>(&MI);
8346 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8347
8348 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8349 return false;
8350
8351 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8352 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8353
8354 Register Dst = Sub1->getReg(0);
8355 LLT DstTy = MRI.getType(Dst);
8356
8357 MatchInfo = [=](MachineIRBuilder &B) {
8358 auto Const = B.buildConstant(DstTy, C1 - C2);
8359 B.buildSub(Dst, Const, Sub2->getRHSReg());
8360 };
8361
8362 return true;
8363}
8364
8366 BuildFnTy &MatchInfo) const {
8367 // fold ((A-C1)+C2) -> (A+(C2-C1))
8368 const GAdd *Add = cast<GAdd>(&MI);
8369 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8370
8371 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8372 return false;
8373
8374 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8375 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8376
8377 Register Dst = Add->getReg(0);
8378 LLT DstTy = MRI.getType(Dst);
8379
8380 MatchInfo = [=](MachineIRBuilder &B) {
8381 auto Const = B.buildConstant(DstTy, C2 - C1);
8382 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8383 };
8384
8385 return true;
8386}
8387
8389 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8390 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8391
8392 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8393 return false;
8394
8395 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8396
8397 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8398
8399 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8400 // $any:_(<8 x s16>) = G_ANYEXT $bv
8401 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8402 //
8403 // ->
8404 //
8405 // $any:_(s16) = G_ANYEXT $bv[0]
8406 // $any1:_(s16) = G_ANYEXT $bv[1]
8407 // $any2:_(s16) = G_ANYEXT $bv[2]
8408 // $any3:_(s16) = G_ANYEXT $bv[3]
8409 // $any4:_(s16) = G_ANYEXT $bv[4]
8410 // $any5:_(s16) = G_ANYEXT $bv[5]
8411 // $any6:_(s16) = G_ANYEXT $bv[6]
8412 // $any7:_(s16) = G_ANYEXT $bv[7]
8413 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8414 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8415
8416 // We want to unmerge into vectors.
8417 if (!DstTy.isFixedVector())
8418 return false;
8419
8420 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8421 if (!Any)
8422 return false;
8423
8424 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8425
8426 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8427 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8428
8429 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8430 return false;
8431
8432 // FIXME: check element types?
8433 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8434 return false;
8435
8436 LLT BigBvTy = MRI.getType(BV->getReg(0));
8437 LLT SmallBvTy = DstTy;
8438 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8439
8441 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8442 return false;
8443
8444 // We check the legality of scalar anyext.
8446 {TargetOpcode::G_ANYEXT,
8447 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8448 return false;
8449
8450 MatchInfo = [=](MachineIRBuilder &B) {
8451 // Build into each G_UNMERGE_VALUES def
8452 // a small build vector with anyext from the source build vector.
8453 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8455 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8456 Register SourceArray =
8457 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8458 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8459 Ops.push_back(AnyExt.getReg(0));
8460 }
8461 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8462 };
8463 };
8464 return true;
8465 };
8466
8467 return false;
8468}
8469
8471 BuildFnTy &MatchInfo) const {
8472
8473 bool Changed = false;
8474 auto &Shuffle = cast<GShuffleVector>(MI);
8475 ArrayRef<int> OrigMask = Shuffle.getMask();
8476 SmallVector<int, 16> NewMask;
8477 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8478 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8479 const unsigned NumDstElts = OrigMask.size();
8480 for (unsigned i = 0; i != NumDstElts; ++i) {
8481 int Idx = OrigMask[i];
8482 if (Idx >= (int)NumSrcElems) {
8483 Idx = -1;
8484 Changed = true;
8485 }
8486 NewMask.push_back(Idx);
8487 }
8488
8489 if (!Changed)
8490 return false;
8491
8492 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8493 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8494 std::move(NewMask));
8495 };
8496
8497 return true;
8498}
8499
8500static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8501 const unsigned MaskSize = Mask.size();
8502 for (unsigned I = 0; I < MaskSize; ++I) {
8503 int Idx = Mask[I];
8504 if (Idx < 0)
8505 continue;
8506
8507 if (Idx < (int)NumElems)
8508 Mask[I] = Idx + NumElems;
8509 else
8510 Mask[I] = Idx - NumElems;
8511 }
8512}
8513
8515 BuildFnTy &MatchInfo) const {
8516
8517 auto &Shuffle = cast<GShuffleVector>(MI);
8518 // If any of the two inputs is already undef, don't check the mask again to
8519 // prevent infinite loop
8520 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8521 return false;
8522
8523 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8524 return false;
8525
8526 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8527 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8529 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8530 return false;
8531
8532 ArrayRef<int> Mask = Shuffle.getMask();
8533 const unsigned NumSrcElems = Src1Ty.getNumElements();
8534
8535 bool TouchesSrc1 = false;
8536 bool TouchesSrc2 = false;
8537 const unsigned NumElems = Mask.size();
8538 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8539 if (Mask[Idx] < 0)
8540 continue;
8541
8542 if (Mask[Idx] < (int)NumSrcElems)
8543 TouchesSrc1 = true;
8544 else
8545 TouchesSrc2 = true;
8546 }
8547
8548 if (TouchesSrc1 == TouchesSrc2)
8549 return false;
8550
8551 Register NewSrc1 = Shuffle.getSrc1Reg();
8552 SmallVector<int, 16> NewMask(Mask);
8553 if (TouchesSrc2) {
8554 NewSrc1 = Shuffle.getSrc2Reg();
8555 commuteMask(NewMask, NumSrcElems);
8556 }
8557
8558 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8559 auto Undef = B.buildUndef(Src1Ty);
8560 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8561 };
8562
8563 return true;
8564}
8565
8567 BuildFnTy &MatchInfo) const {
8568 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8569
8570 Register Dst = Subo->getReg(0);
8571 Register LHS = Subo->getLHSReg();
8572 Register RHS = Subo->getRHSReg();
8573 Register Carry = Subo->getCarryOutReg();
8574 LLT DstTy = MRI.getType(Dst);
8575 LLT CarryTy = MRI.getType(Carry);
8576
8577 // Check legality before known bits.
8578 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8580 return false;
8581
8582 ConstantRange KBLHS =
8583 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8584 /* IsSigned=*/Subo->isSigned());
8585 ConstantRange KBRHS =
8586 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8587 /* IsSigned=*/Subo->isSigned());
8588
8589 if (Subo->isSigned()) {
8590 // G_SSUBO
8591 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8593 return false;
8595 MatchInfo = [=](MachineIRBuilder &B) {
8596 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8597 B.buildConstant(Carry, 0);
8598 };
8599 return true;
8600 }
8603 MatchInfo = [=](MachineIRBuilder &B) {
8604 B.buildSub(Dst, LHS, RHS);
8605 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8606 /*isVector=*/CarryTy.isVector(),
8607 /*isFP=*/false));
8608 };
8609 return true;
8610 }
8611 }
8612 return false;
8613 }
8614
8615 // G_USUBO
8616 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8618 return false;
8620 MatchInfo = [=](MachineIRBuilder &B) {
8621 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8622 B.buildConstant(Carry, 0);
8623 };
8624 return true;
8625 }
8628 MatchInfo = [=](MachineIRBuilder &B) {
8629 B.buildSub(Dst, LHS, RHS);
8630 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8631 /*isVector=*/CarryTy.isVector(),
8632 /*isFP=*/false));
8633 };
8634 return true;
8635 }
8636 }
8637
8638 return false;
8639}
8640
8641// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8642// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8644 BuildFnTy &MatchInfo) const {
8645 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8646 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON) &&
8647 "Expected G_CTLZ variant");
8648
8649 const Register Dst = CtlzMI.getOperand(0).getReg();
8650 Register Src = CtlzMI.getOperand(1).getReg();
8651
8652 LLT Ty = MRI.getType(Dst);
8653 LLT SrcTy = MRI.getType(Src);
8654
8655 if (!(Ty.isValid() && Ty.isScalar()))
8656 return false;
8657
8658 if (!LI)
8659 return false;
8660
8661 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8662 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8663
8664 switch (LI->getAction(Query).Action) {
8665 default:
8666 return false;
8670 break;
8671 }
8672
8673 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8674 Register V;
8675 bool NeedAdd = true;
8676 if (mi_match(Src, MRI,
8678 m_SpecificICst(1))))) {
8679 NeedAdd = false;
8680 Src = V;
8681 }
8682
8683 unsigned BitWidth = Ty.getScalarSizeInBits();
8684
8685 Register X;
8686 if (!mi_match(Src, MRI,
8689 m_SpecificICst(BitWidth - 1)))))))
8690 return false;
8691
8692 MatchInfo = [=](MachineIRBuilder &B) {
8693 if (!NeedAdd) {
8694 B.buildCTLS(Dst, X);
8695 return;
8696 }
8697
8698 auto Ctls = B.buildCTLS(Ty, X);
8699 auto One = B.buildConstant(Ty, 1);
8700
8701 B.buildAdd(Dst, Ctls, One);
8702 };
8703
8704 return true;
8705}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
const fltSemantics & getSemantics() const
Definition APFloat.h:1546
bool isNaN() const
Definition APFloat.h:1536
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1294
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1806
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
const TargetInstrInfo * TII
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
bool matchConstantFoldUnaryIntOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Constant fold a unary integer op (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON variants,...
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchBinopWithNeg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold a bitwiseop (~b +/- c) -> a bitwiseop ~(b -/+ c)
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValue() const
Definition Constants.h:464
const APFloat & getValueAPF() const
Definition Constants.h:463
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1440
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1980
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1400
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1565
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:741
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1523
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1547
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1580
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1612
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:672
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1503
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:200
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1433
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:908
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1536
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1637
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI SmallVector< APInt > ConstantFoldUnaryIntOp(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI)
Tries to constant fold a unary integer operation (G_CTLZ, G_CTTZ, G_CTPOP and their _ZERO_POISON vari...
Definition Utils.cpp:945
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1418
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:229
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...