LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
66 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
67 (void)this->VT;
68}
69
71 return *Builder.getMF().getSubtarget().getTargetLowering();
72}
73
75 return Builder.getMF();
76}
77
81
82LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
83
84/// \returns The little endian in-memory byte position of byte \p I in a
85/// \p ByteWidth bytes wide type.
86///
87/// E.g. Given a 4-byte type x, x[0] -> byte 0
88static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
89 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
90 return I;
91}
92
93/// Determines the LogBase2 value for a non-null input value using the
94/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
96 auto &MRI = *MIB.getMRI();
97 LLT Ty = MRI.getType(V);
98 auto Ctlz = MIB.buildCTLZ(Ty, V);
99 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
100 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
101}
102
103/// \returns The big endian in-memory byte position of byte \p I in a
104/// \p ByteWidth bytes wide type.
105///
106/// E.g. Given a 4-byte type x, x[0] -> byte 3
107static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
108 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
109 return ByteWidth - I - 1;
110}
111
112/// Given a map from byte offsets in memory to indices in a load/store,
113/// determine if that map corresponds to a little or big endian byte pattern.
114///
115/// \param MemOffset2Idx maps memory offsets to address offsets.
116/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
117///
118/// \returns true if the map corresponds to a big endian byte pattern, false if
119/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
120///
121/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
122/// are as follows:
123///
124/// AddrOffset Little endian Big endian
125/// 0 0 3
126/// 1 1 2
127/// 2 2 1
128/// 3 3 0
129static std::optional<bool>
131 int64_t LowestIdx) {
132 // Need at least two byte positions to decide on endianness.
133 unsigned Width = MemOffset2Idx.size();
134 if (Width < 2)
135 return std::nullopt;
136 bool BigEndian = true, LittleEndian = true;
137 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
138 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
139 if (MemOffsetAndIdx == MemOffset2Idx.end())
140 return std::nullopt;
141 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
142 assert(Idx >= 0 && "Expected non-negative byte offset?");
143 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
144 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
145 if (!BigEndian && !LittleEndian)
146 return std::nullopt;
147 }
148
149 assert((BigEndian != LittleEndian) &&
150 "Pattern cannot be both big and little endian!");
151 return BigEndian;
152}
153
155
156bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
157 assert(LI && "Must have LegalizerInfo to query isLegal!");
158 return LI->getAction(Query).Action == LegalizeActions::Legal;
159}
160
162 const LegalityQuery &Query) const {
163 return isPreLegalize() || isLegal(Query);
164}
165
167 return isLegal(Query) ||
168 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
169}
170
172 const LegalityQuery &Query) const {
173 LegalizeAction Action = LI->getAction(Query).Action;
174 return Action == LegalizeActions::Legal ||
176}
177
179 if (!Ty.isVector())
180 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
181 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
182 if (isPreLegalize())
183 return true;
184 LLT EltTy = Ty.getElementType();
185 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
186 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
187}
188
190 Register ToReg) const {
191 Observer.changingAllUsesOfReg(MRI, FromReg);
192
193 if (MRI.constrainRegAttrs(ToReg, FromReg))
194 MRI.replaceRegWith(FromReg, ToReg);
195 else
196 Builder.buildCopy(FromReg, ToReg);
197
198 Observer.finishedChangingAllUsesOfReg();
199}
200
202 MachineOperand &FromRegOp,
203 Register ToReg) const {
204 assert(FromRegOp.getParent() && "Expected an operand in an MI");
205 Observer.changingInstr(*FromRegOp.getParent());
206
207 FromRegOp.setReg(ToReg);
208
209 Observer.changedInstr(*FromRegOp.getParent());
210}
211
213 unsigned ToOpcode) const {
214 Observer.changingInstr(FromMI);
215
216 FromMI.setDesc(Builder.getTII().get(ToOpcode));
217
218 Observer.changedInstr(FromMI);
219}
220
222 return RBI->getRegBank(Reg, MRI, *TRI);
223}
224
226 const RegisterBank *RegBank) const {
227 if (RegBank)
228 MRI.setRegBank(Reg, *RegBank);
229}
230
232 if (matchCombineCopy(MI)) {
234 return true;
235 }
236 return false;
237}
239 if (MI.getOpcode() != TargetOpcode::COPY)
240 return false;
241 Register DstReg = MI.getOperand(0).getReg();
242 Register SrcReg = MI.getOperand(1).getReg();
243 return canReplaceReg(DstReg, SrcReg, MRI);
244}
246 Register DstReg = MI.getOperand(0).getReg();
247 Register SrcReg = MI.getOperand(1).getReg();
248 replaceRegWith(MRI, DstReg, SrcReg);
249 MI.eraseFromParent();
250}
251
253 MachineInstr &MI, BuildFnTy &MatchInfo) const {
254 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
255 Register DstOp = MI.getOperand(0).getReg();
256 Register OrigOp = MI.getOperand(1).getReg();
257
258 if (!MRI.hasOneNonDBGUse(OrigOp))
259 return false;
260
261 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
262 // Even if only a single operand of the PHI is not guaranteed non-poison,
263 // moving freeze() backwards across a PHI can cause optimization issues for
264 // other users of that operand.
265 //
266 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
267 // the source register is unprofitable because it makes the freeze() more
268 // strict than is necessary (it would affect the whole register instead of
269 // just the subreg being frozen).
270 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
271 return false;
272
273 if (canCreateUndefOrPoison(OrigOp, MRI,
274 /*ConsiderFlagsAndMetadata=*/false))
275 return false;
276
277 std::optional<MachineOperand> MaybePoisonOperand;
278 for (MachineOperand &Operand : OrigDef->uses()) {
279 if (!Operand.isReg())
280 return false;
281
282 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
283 continue;
284
285 if (!MaybePoisonOperand)
286 MaybePoisonOperand = Operand;
287 else {
288 // We have more than one maybe-poison operand. Moving the freeze is
289 // unsafe.
290 return false;
291 }
292 }
293
294 // Eliminate freeze if all operands are guaranteed non-poison.
295 if (!MaybePoisonOperand) {
296 MatchInfo = [=](MachineIRBuilder &B) {
297 Observer.changingInstr(*OrigDef);
298 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
299 Observer.changedInstr(*OrigDef);
300 B.buildCopy(DstOp, OrigOp);
301 };
302 return true;
303 }
304
305 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
306 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
307
308 MatchInfo = [=](MachineIRBuilder &B) mutable {
309 Observer.changingInstr(*OrigDef);
310 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
311 Observer.changedInstr(*OrigDef);
312 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
313 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
315 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
316 Freeze.getReg(0));
317 replaceRegWith(MRI, DstOp, OrigOp);
318 };
319 return true;
320}
321
324 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
325 "Invalid instruction");
326 bool IsUndef = true;
327 MachineInstr *Undef = nullptr;
328
329 // Walk over all the operands of concat vectors and check if they are
330 // build_vector themselves or undef.
331 // Then collect their operands in Ops.
332 for (const MachineOperand &MO : MI.uses()) {
333 Register Reg = MO.getReg();
334 MachineInstr *Def = MRI.getVRegDef(Reg);
335 assert(Def && "Operand not defined");
336 if (!MRI.hasOneNonDBGUse(Reg))
337 return false;
338 switch (Def->getOpcode()) {
339 case TargetOpcode::G_BUILD_VECTOR:
340 IsUndef = false;
341 // Remember the operands of the build_vector to fold
342 // them into the yet-to-build flattened concat vectors.
343 for (const MachineOperand &BuildVecMO : Def->uses())
344 Ops.push_back(BuildVecMO.getReg());
345 break;
346 case TargetOpcode::G_IMPLICIT_DEF: {
347 LLT OpType = MRI.getType(Reg);
348 // Keep one undef value for all the undef operands.
349 if (!Undef) {
350 Builder.setInsertPt(*MI.getParent(), MI);
351 Undef = Builder.buildUndef(OpType.getScalarType());
352 }
353 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
354 OpType.getScalarType() &&
355 "All undefs should have the same type");
356 // Break the undef vector in as many scalar elements as needed
357 // for the flattening.
358 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
359 EltIdx != EltEnd; ++EltIdx)
360 Ops.push_back(Undef->getOperand(0).getReg());
361 break;
362 }
363 default:
364 return false;
365 }
366 }
367
368 // Check if the combine is illegal
369 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
371 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
372 return false;
373 }
374
375 if (IsUndef)
376 Ops.clear();
377
378 return true;
379}
382 // We determined that the concat_vectors can be flatten.
383 // Generate the flattened build_vector.
384 Register DstReg = MI.getOperand(0).getReg();
385 Builder.setInsertPt(*MI.getParent(), MI);
386 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
387
388 // Note: IsUndef is sort of redundant. We could have determine it by
389 // checking that at all Ops are undef. Alternatively, we could have
390 // generate a build_vector of undefs and rely on another combine to
391 // clean that up. For now, given we already gather this information
392 // in matchCombineConcatVectors, just save compile time and issue the
393 // right thing.
394 if (Ops.empty())
395 Builder.buildUndef(NewDstReg);
396 else
397 Builder.buildBuildVector(NewDstReg, Ops);
398 replaceRegWith(MRI, DstReg, NewDstReg);
399 MI.eraseFromParent();
400}
401
403 auto &Shuffle = cast<GShuffleVector>(MI);
404
405 Register SrcVec1 = Shuffle.getSrc1Reg();
406 Register SrcVec2 = Shuffle.getSrc2Reg();
407 LLT EltTy = MRI.getType(SrcVec1).getElementType();
408 int Width = MRI.getType(SrcVec1).getNumElements();
409
410 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
411 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
412
413 SmallVector<Register> Extracts;
414 // Select only applicable elements from unmerged values.
415 for (int Val : Shuffle.getMask()) {
416 if (Val == -1)
417 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
418 else if (Val < Width)
419 Extracts.push_back(Unmerge1.getReg(Val));
420 else
421 Extracts.push_back(Unmerge2.getReg(Val - Width));
422 }
423 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
424 if (Extracts.size() == 1)
425 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
426 else
427 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
428 MI.eraseFromParent();
429}
430
433 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
434 auto ConcatMI1 =
435 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
436 auto ConcatMI2 =
437 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
438 if (!ConcatMI1 || !ConcatMI2)
439 return false;
440
441 // Check that the sources of the Concat instructions have the same type
442 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
443 MRI.getType(ConcatMI2->getSourceReg(0)))
444 return false;
445
446 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
447 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
448 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
449 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
450 // Check if the index takes a whole source register from G_CONCAT_VECTORS
451 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
452 if (Mask[i] == -1) {
453 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
454 if (i + j >= Mask.size())
455 return false;
456 if (Mask[i + j] != -1)
457 return false;
458 }
460 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
461 return false;
462 Ops.push_back(0);
463 } else if (Mask[i] % ConcatSrcNumElt == 0) {
464 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
465 if (i + j >= Mask.size())
466 return false;
467 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
468 return false;
469 }
470 // Retrieve the source register from its respective G_CONCAT_VECTORS
471 // instruction
472 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
473 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
474 } else {
475 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
476 ConcatMI1->getNumSources()));
477 }
478 } else {
479 return false;
480 }
481 }
482
484 {TargetOpcode::G_CONCAT_VECTORS,
485 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
486 return false;
487
488 return !Ops.empty();
489}
490
493 LLT SrcTy;
494 for (Register &Reg : Ops) {
495 if (Reg != 0)
496 SrcTy = MRI.getType(Reg);
497 }
498 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
499
500 Register UndefReg = 0;
501
502 for (Register &Reg : Ops) {
503 if (Reg == 0) {
504 if (UndefReg == 0)
505 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
506 Reg = UndefReg;
507 }
508 }
509
510 if (Ops.size() > 1)
511 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
512 else
513 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
514 MI.eraseFromParent();
515}
516
521 return true;
522 }
523 return false;
524}
525
528 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
529 "Invalid instruction kind");
530 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
531 Register Src1 = MI.getOperand(1).getReg();
532 LLT SrcType = MRI.getType(Src1);
533
534 unsigned DstNumElts = DstType.getNumElements();
535 unsigned SrcNumElts = SrcType.getNumElements();
536
537 // If the resulting vector is smaller than the size of the source
538 // vectors being concatenated, we won't be able to replace the
539 // shuffle vector into a concat_vectors.
540 //
541 // Note: We may still be able to produce a concat_vectors fed by
542 // extract_vector_elt and so on. It is less clear that would
543 // be better though, so don't bother for now.
544 //
545 // If the destination is a scalar, the size of the sources doesn't
546 // matter. we will lower the shuffle to a plain copy. This will
547 // work only if the source and destination have the same size. But
548 // that's covered by the next condition.
549 //
550 // TODO: If the size between the source and destination don't match
551 // we could still emit an extract vector element in that case.
552 if (DstNumElts < 2 * SrcNumElts)
553 return false;
554
555 // Check that the shuffle mask can be broken evenly between the
556 // different sources.
557 if (DstNumElts % SrcNumElts != 0)
558 return false;
559
560 // Mask length is a multiple of the source vector length.
561 // Check if the shuffle is some kind of concatenation of the input
562 // vectors.
563 unsigned NumConcat = DstNumElts / SrcNumElts;
564 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
565 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
566 for (unsigned i = 0; i != DstNumElts; ++i) {
567 int Idx = Mask[i];
568 // Undef value.
569 if (Idx < 0)
570 continue;
571 // Ensure the indices in each SrcType sized piece are sequential and that
572 // the same source is used for the whole piece.
573 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
574 (ConcatSrcs[i / SrcNumElts] >= 0 &&
575 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
576 return false;
577 // Remember which source this index came from.
578 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
579 }
580
581 // The shuffle is concatenating multiple vectors together.
582 // Collect the different operands for that.
583 Register UndefReg;
584 Register Src2 = MI.getOperand(2).getReg();
585 for (auto Src : ConcatSrcs) {
586 if (Src < 0) {
587 if (!UndefReg) {
588 Builder.setInsertPt(*MI.getParent(), MI);
589 UndefReg = Builder.buildUndef(SrcType).getReg(0);
590 }
591 Ops.push_back(UndefReg);
592 } else if (Src == 0)
593 Ops.push_back(Src1);
594 else
595 Ops.push_back(Src2);
596 }
597 return true;
598}
599
601 ArrayRef<Register> Ops) const {
602 Register DstReg = MI.getOperand(0).getReg();
603 Builder.setInsertPt(*MI.getParent(), MI);
604 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
605
606 if (Ops.size() == 1)
607 Builder.buildCopy(NewDstReg, Ops[0]);
608 else
609 Builder.buildMergeLikeInstr(NewDstReg, Ops);
610
611 replaceRegWith(MRI, DstReg, NewDstReg);
612 MI.eraseFromParent();
613}
614
615namespace {
616
617/// Select a preference between two uses. CurrentUse is the current preference
618/// while *ForCandidate is attributes of the candidate under consideration.
619PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
620 PreferredTuple &CurrentUse,
621 const LLT TyForCandidate,
622 unsigned OpcodeForCandidate,
623 MachineInstr *MIForCandidate) {
624 if (!CurrentUse.Ty.isValid()) {
625 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
626 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
627 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
628 return CurrentUse;
629 }
630
631 // We permit the extend to hoist through basic blocks but this is only
632 // sensible if the target has extending loads. If you end up lowering back
633 // into a load and extend during the legalizer then the end result is
634 // hoisting the extend up to the load.
635
636 // Prefer defined extensions to undefined extensions as these are more
637 // likely to reduce the number of instructions.
638 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
639 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
640 return CurrentUse;
641 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
642 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
643 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
644
645 // Prefer sign extensions to zero extensions as sign-extensions tend to be
646 // more expensive. Don't do this if the load is already a zero-extend load
647 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
648 // later.
649 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
650 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
651 OpcodeForCandidate == TargetOpcode::G_ZEXT)
652 return CurrentUse;
653 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
654 OpcodeForCandidate == TargetOpcode::G_SEXT)
655 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
656 }
657
658 // This is potentially target specific. We've chosen the largest type
659 // because G_TRUNC is usually free. One potential catch with this is that
660 // some targets have a reduced number of larger registers than smaller
661 // registers and this choice potentially increases the live-range for the
662 // larger value.
663 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
664 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
665 }
666 return CurrentUse;
667}
668
669/// Find a suitable place to insert some instructions and insert them. This
670/// function accounts for special cases like inserting before a PHI node.
671/// The current strategy for inserting before PHI's is to duplicate the
672/// instructions for each predecessor. However, while that's ok for G_TRUNC
673/// on most targets since it generally requires no code, other targets/cases may
674/// want to try harder to find a dominating block.
675static void InsertInsnsWithoutSideEffectsBeforeUse(
678 MachineOperand &UseMO)>
679 Inserter) {
680 MachineInstr &UseMI = *UseMO.getParent();
681
682 MachineBasicBlock *InsertBB = UseMI.getParent();
683
684 // If the use is a PHI then we want the predecessor block instead.
685 if (UseMI.isPHI()) {
686 MachineOperand *PredBB = std::next(&UseMO);
687 InsertBB = PredBB->getMBB();
688 }
689
690 // If the block is the same block as the def then we want to insert just after
691 // the def instead of at the start of the block.
692 if (InsertBB == DefMI.getParent()) {
694 Inserter(InsertBB, std::next(InsertPt), UseMO);
695 return;
696 }
697
698 // Otherwise we want the start of the BB
699 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
700}
701} // end anonymous namespace
702
704 PreferredTuple Preferred;
705 if (matchCombineExtendingLoads(MI, Preferred)) {
706 applyCombineExtendingLoads(MI, Preferred);
707 return true;
708 }
709 return false;
710}
711
712static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
713 unsigned CandidateLoadOpc;
714 switch (ExtOpc) {
715 case TargetOpcode::G_ANYEXT:
716 CandidateLoadOpc = TargetOpcode::G_LOAD;
717 break;
718 case TargetOpcode::G_SEXT:
719 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
720 break;
721 case TargetOpcode::G_ZEXT:
722 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
723 break;
724 default:
725 llvm_unreachable("Unexpected extend opc");
726 }
727 return CandidateLoadOpc;
728}
729
731 MachineInstr &MI, PreferredTuple &Preferred) const {
732 // We match the loads and follow the uses to the extend instead of matching
733 // the extends and following the def to the load. This is because the load
734 // must remain in the same position for correctness (unless we also add code
735 // to find a safe place to sink it) whereas the extend is freely movable.
736 // It also prevents us from duplicating the load for the volatile case or just
737 // for performance.
738 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
739 if (!LoadMI)
740 return false;
741
742 Register LoadReg = LoadMI->getDstReg();
743
744 LLT LoadValueTy = MRI.getType(LoadReg);
745 if (!LoadValueTy.isScalar())
746 return false;
747
748 // Most architectures are going to legalize <s8 loads into at least a 1 byte
749 // load, and the MMOs can only describe memory accesses in multiples of bytes.
750 // If we try to perform extload combining on those, we can end up with
751 // %a(s8) = extload %ptr (load 1 byte from %ptr)
752 // ... which is an illegal extload instruction.
753 if (LoadValueTy.getSizeInBits() < 8)
754 return false;
755
756 // For non power-of-2 types, they will very likely be legalized into multiple
757 // loads. Don't bother trying to match them into extending loads.
759 return false;
760
761 // Find the preferred type aside from the any-extends (unless it's the only
762 // one) and non-extending ops. We'll emit an extending load to that type and
763 // and emit a variant of (extend (trunc X)) for the others according to the
764 // relative type sizes. At the same time, pick an extend to use based on the
765 // extend involved in the chosen type.
766 unsigned PreferredOpcode =
767 isa<GLoad>(&MI)
768 ? TargetOpcode::G_ANYEXT
769 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
770 Preferred = {LLT(), PreferredOpcode, nullptr};
771 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
772 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
773 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
774 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
775 const auto &MMO = LoadMI->getMMO();
776 // Don't do anything for atomics.
777 if (MMO.isAtomic())
778 continue;
779 // Check for legality.
780 if (!isPreLegalize()) {
781 LegalityQuery::MemDesc MMDesc(MMO);
782 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
783 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
784 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
785 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
786 .Action != LegalizeActions::Legal)
787 continue;
788 }
789 Preferred = ChoosePreferredUse(MI, Preferred,
790 MRI.getType(UseMI.getOperand(0).getReg()),
791 UseMI.getOpcode(), &UseMI);
792 }
793 }
794
795 // There were no extends
796 if (!Preferred.MI)
797 return false;
798 // It should be impossible to chose an extend without selecting a different
799 // type since by definition the result of an extend is larger.
800 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
801
802 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
803 return true;
804}
805
807 MachineInstr &MI, PreferredTuple &Preferred) const {
808 // Rewrite the load to the chosen extending load.
809 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
810
811 // Inserter to insert a truncate back to the original type at a given point
812 // with some basic CSE to limit truncate duplication to one per BB.
814 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
815 MachineBasicBlock::iterator InsertBefore,
816 MachineOperand &UseMO) {
817 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
818 if (PreviouslyEmitted) {
819 Observer.changingInstr(*UseMO.getParent());
820 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
821 Observer.changedInstr(*UseMO.getParent());
822 return;
823 }
824
825 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
826 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
827 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
828 EmittedInsns[InsertIntoBB] = NewMI;
829 replaceRegOpWith(MRI, UseMO, NewDstReg);
830 };
831
832 Observer.changingInstr(MI);
833 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
834 MI.setDesc(Builder.getTII().get(LoadOpc));
835
836 // Rewrite all the uses to fix up the types.
837 auto &LoadValue = MI.getOperand(0);
839 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
840
841 for (auto *UseMO : Uses) {
842 MachineInstr *UseMI = UseMO->getParent();
843
844 // If the extend is compatible with the preferred extend then we should fix
845 // up the type and extend so that it uses the preferred use.
846 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
847 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
848 Register UseDstReg = UseMI->getOperand(0).getReg();
849 MachineOperand &UseSrcMO = UseMI->getOperand(1);
850 const LLT UseDstTy = MRI.getType(UseDstReg);
851 if (UseDstReg != ChosenDstReg) {
852 if (Preferred.Ty == UseDstTy) {
853 // If the use has the same type as the preferred use, then merge
854 // the vregs and erase the extend. For example:
855 // %1:_(s8) = G_LOAD ...
856 // %2:_(s32) = G_SEXT %1(s8)
857 // %3:_(s32) = G_ANYEXT %1(s8)
858 // ... = ... %3(s32)
859 // rewrites to:
860 // %2:_(s32) = G_SEXTLOAD ...
861 // ... = ... %2(s32)
862 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
863 Observer.erasingInstr(*UseMO->getParent());
864 UseMO->getParent()->eraseFromParent();
865 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
866 // If the preferred size is smaller, then keep the extend but extend
867 // from the result of the extending load. For example:
868 // %1:_(s8) = G_LOAD ...
869 // %2:_(s32) = G_SEXT %1(s8)
870 // %3:_(s64) = G_ANYEXT %1(s8)
871 // ... = ... %3(s64)
872 /// rewrites to:
873 // %2:_(s32) = G_SEXTLOAD ...
874 // %3:_(s64) = G_ANYEXT %2:_(s32)
875 // ... = ... %3(s64)
876 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
877 } else {
878 // If the preferred size is large, then insert a truncate. For
879 // example:
880 // %1:_(s8) = G_LOAD ...
881 // %2:_(s64) = G_SEXT %1(s8)
882 // %3:_(s32) = G_ZEXT %1(s8)
883 // ... = ... %3(s32)
884 /// rewrites to:
885 // %2:_(s64) = G_SEXTLOAD ...
886 // %4:_(s8) = G_TRUNC %2:_(s32)
887 // %3:_(s64) = G_ZEXT %2:_(s8)
888 // ... = ... %3(s64)
889 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
890 InsertTruncAt);
891 }
892 continue;
893 }
894 // The use is (one of) the uses of the preferred use we chose earlier.
895 // We're going to update the load to def this value later so just erase
896 // the old extend.
897 Observer.erasingInstr(*UseMO->getParent());
898 UseMO->getParent()->eraseFromParent();
899 continue;
900 }
901
902 // The use isn't an extend. Truncate back to the type we originally loaded.
903 // This is free on many targets.
904 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
905 }
906
907 MI.getOperand(0).setReg(ChosenDstReg);
908 Observer.changedInstr(MI);
909}
910
912 BuildFnTy &MatchInfo) const {
913 assert(MI.getOpcode() == TargetOpcode::G_AND);
914
915 // If we have the following code:
916 // %mask = G_CONSTANT 255
917 // %ld = G_LOAD %ptr, (load s16)
918 // %and = G_AND %ld, %mask
919 //
920 // Try to fold it into
921 // %ld = G_ZEXTLOAD %ptr, (load s8)
922
923 Register Dst = MI.getOperand(0).getReg();
924 if (MRI.getType(Dst).isVector())
925 return false;
926
927 auto MaybeMask =
928 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
929 if (!MaybeMask)
930 return false;
931
932 APInt MaskVal = MaybeMask->Value;
933
934 if (!MaskVal.isMask())
935 return false;
936
937 Register SrcReg = MI.getOperand(1).getReg();
938 // Don't use getOpcodeDef() here since intermediate instructions may have
939 // multiple users.
940 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
941 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
942 return false;
943
944 Register LoadReg = LoadMI->getDstReg();
945 LLT RegTy = MRI.getType(LoadReg);
946 Register PtrReg = LoadMI->getPointerReg();
947 unsigned RegSize = RegTy.getSizeInBits();
948 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
949 unsigned MaskSizeBits = MaskVal.countr_one();
950
951 // The mask may not be larger than the in-memory type, as it might cover sign
952 // extended bits
953 if (MaskSizeBits > LoadSizeBits.getValue())
954 return false;
955
956 // If the mask covers the whole destination register, there's nothing to
957 // extend
958 if (MaskSizeBits >= RegSize)
959 return false;
960
961 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
962 // at least byte loads. Avoid creating such loads here
963 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
964 return false;
965
966 const MachineMemOperand &MMO = LoadMI->getMMO();
967 LegalityQuery::MemDesc MemDesc(MMO);
968
969 // Don't modify the memory access size if this is atomic/volatile, but we can
970 // still adjust the opcode to indicate the high bit behavior.
971 if (LoadMI->isSimple())
972 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
973 else if (LoadSizeBits.getValue() > MaskSizeBits ||
974 LoadSizeBits.getValue() == RegSize)
975 return false;
976
977 // TODO: Could check if it's legal with the reduced or original memory size.
979 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
980 return false;
981
982 MatchInfo = [=](MachineIRBuilder &B) {
983 B.setInstrAndDebugLoc(*LoadMI);
984 auto &MF = B.getMF();
985 auto PtrInfo = MMO.getPointerInfo();
986 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
987 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
988 LoadMI->eraseFromParent();
989 };
990 return true;
991}
992
994 const MachineInstr &UseMI) const {
995 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
996 "shouldn't consider debug uses");
997 assert(DefMI.getParent() == UseMI.getParent());
998 if (&DefMI == &UseMI)
999 return true;
1000 const MachineBasicBlock &MBB = *DefMI.getParent();
1001 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1002 return &MI == &DefMI || &MI == &UseMI;
1003 });
1004 if (DefOrUse == MBB.end())
1005 llvm_unreachable("Block must contain both DefMI and UseMI!");
1006 return &*DefOrUse == &DefMI;
1007}
1008
1010 const MachineInstr &UseMI) const {
1011 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1012 "shouldn't consider debug uses");
1013 if (MDT)
1014 return MDT->dominates(&DefMI, &UseMI);
1015 else if (DefMI.getParent() != UseMI.getParent())
1016 return false;
1017
1018 return isPredecessor(DefMI, UseMI);
1019}
1020
1022 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1023 Register SrcReg = MI.getOperand(1).getReg();
1024 Register LoadUser = SrcReg;
1025
1026 if (MRI.getType(SrcReg).isVector())
1027 return false;
1028
1029 Register TruncSrc;
1030 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1031 LoadUser = TruncSrc;
1032
1033 uint64_t SizeInBits = MI.getOperand(2).getImm();
1034 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1035 // need any extend at all, just a truncate.
1036 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1037 // If truncating more than the original extended value, abort.
1038 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1039 if (TruncSrc &&
1040 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1041 return false;
1042 if (LoadSizeBits == SizeInBits)
1043 return true;
1044 }
1045 return false;
1046}
1047
1049 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1050 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1051 MI.eraseFromParent();
1052}
1053
1055 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1056 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1057
1058 Register DstReg = MI.getOperand(0).getReg();
1059 LLT RegTy = MRI.getType(DstReg);
1060
1061 // Only supports scalars for now.
1062 if (RegTy.isVector())
1063 return false;
1064
1065 Register SrcReg = MI.getOperand(1).getReg();
1066 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1067 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1068 return false;
1069
1070 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1071
1072 // If the sign extend extends from a narrower width than the load's width,
1073 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1074 // Avoid widening the load at all.
1075 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1076
1077 // Don't generate G_SEXTLOADs with a < 1 byte width.
1078 if (NewSizeBits < 8)
1079 return false;
1080 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1081 // anyway for most targets.
1082 if (!isPowerOf2_32(NewSizeBits))
1083 return false;
1084
1085 const MachineMemOperand &MMO = LoadDef->getMMO();
1086 LegalityQuery::MemDesc MMDesc(MMO);
1087
1088 // Don't modify the memory access size if this is atomic/volatile, but we can
1089 // still adjust the opcode to indicate the high bit behavior.
1090 if (LoadDef->isSimple())
1091 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1092 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1093 return false;
1094
1095 // TODO: Could check if it's legal with the reduced or original memory size.
1096 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1097 {MRI.getType(LoadDef->getDstReg()),
1098 MRI.getType(LoadDef->getPointerReg())},
1099 {MMDesc}}))
1100 return false;
1101
1102 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1103 return true;
1104}
1105
1107 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1108 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1109 Register LoadReg;
1110 unsigned ScalarSizeBits;
1111 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1112 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1113
1114 // If we have the following:
1115 // %ld = G_LOAD %ptr, (load 2)
1116 // %ext = G_SEXT_INREG %ld, 8
1117 // ==>
1118 // %ld = G_SEXTLOAD %ptr (load 1)
1119
1120 auto &MMO = LoadDef->getMMO();
1121 Builder.setInstrAndDebugLoc(*LoadDef);
1122 auto &MF = Builder.getMF();
1123 auto PtrInfo = MMO.getPointerInfo();
1124 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1125 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1126 LoadDef->getPointerReg(), *NewMMO);
1127 MI.eraseFromParent();
1128
1129 // Not all loads can be deleted, so make sure the old one is removed.
1130 LoadDef->eraseFromParent();
1131}
1132
1133/// Return true if 'MI' is a load or a store that may be fold it's address
1134/// operand into the load / store addressing mode.
1136 MachineRegisterInfo &MRI) {
1138 auto *MF = MI->getMF();
1139 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1140 if (!Addr)
1141 return false;
1142
1143 AM.HasBaseReg = true;
1144 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1145 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1146 else
1147 AM.Scale = 1; // [reg +/- reg]
1148
1149 return TLI.isLegalAddressingMode(
1150 MF->getDataLayout(), AM,
1151 getTypeForLLT(MI->getMMO().getMemoryType(),
1152 MF->getFunction().getContext()),
1153 MI->getMMO().getAddrSpace());
1154}
1155
1156static unsigned getIndexedOpc(unsigned LdStOpc) {
1157 switch (LdStOpc) {
1158 case TargetOpcode::G_LOAD:
1159 return TargetOpcode::G_INDEXED_LOAD;
1160 case TargetOpcode::G_STORE:
1161 return TargetOpcode::G_INDEXED_STORE;
1162 case TargetOpcode::G_ZEXTLOAD:
1163 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1164 case TargetOpcode::G_SEXTLOAD:
1165 return TargetOpcode::G_INDEXED_SEXTLOAD;
1166 default:
1167 llvm_unreachable("Unexpected opcode");
1168 }
1169}
1170
1171bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1172 // Check for legality.
1173 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1174 LLT Ty = MRI.getType(LdSt.getReg(0));
1175 LLT MemTy = LdSt.getMMO().getMemoryType();
1177 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1179 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1180 SmallVector<LLT> OpTys;
1181 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1182 OpTys = {PtrTy, Ty, Ty};
1183 else
1184 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1185
1186 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1187 return isLegal(Q);
1188}
1189
1191 "post-index-use-threshold", cl::Hidden, cl::init(32),
1192 cl::desc("Number of uses of a base pointer to check before it is no longer "
1193 "considered for post-indexing."));
1194
1195bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1197 bool &RematOffset) const {
1198 // We're looking for the following pattern, for either load or store:
1199 // %baseptr:_(p0) = ...
1200 // G_STORE %val(s64), %baseptr(p0)
1201 // %offset:_(s64) = G_CONSTANT i64 -256
1202 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1203 const auto &TLI = getTargetLowering();
1204
1205 Register Ptr = LdSt.getPointerReg();
1206 // If the store is the only use, don't bother.
1207 if (MRI.hasOneNonDBGUse(Ptr))
1208 return false;
1209
1210 if (!isIndexedLoadStoreLegal(LdSt))
1211 return false;
1212
1213 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1214 return false;
1215
1216 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1217 auto *PtrDef = MRI.getVRegDef(Ptr);
1218
1219 unsigned NumUsesChecked = 0;
1220 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1221 if (++NumUsesChecked > PostIndexUseThreshold)
1222 return false; // Try to avoid exploding compile time.
1223
1224 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1225 // The use itself might be dead. This can happen during combines if DCE
1226 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1227 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1228 continue;
1229
1230 // Check the user of this isn't the store, otherwise we'd be generate a
1231 // indexed store defining its own use.
1232 if (StoredValDef == &Use)
1233 continue;
1234
1235 Offset = PtrAdd->getOffsetReg();
1236 if (!ForceLegalIndexing &&
1237 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1238 /*IsPre*/ false, MRI))
1239 continue;
1240
1241 // Make sure the offset calculation is before the potentially indexed op.
1242 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1243 RematOffset = false;
1244 if (!dominates(*OffsetDef, LdSt)) {
1245 // If the offset however is just a G_CONSTANT, we can always just
1246 // rematerialize it where we need it.
1247 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1248 continue;
1249 RematOffset = true;
1250 }
1251
1252 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1253 if (&BasePtrUse == PtrDef)
1254 continue;
1255
1256 // If the user is a later load/store that can be post-indexed, then don't
1257 // combine this one.
1258 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1259 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1260 dominates(LdSt, *BasePtrLdSt) &&
1261 isIndexedLoadStoreLegal(*BasePtrLdSt))
1262 return false;
1263
1264 // Now we're looking for the key G_PTR_ADD instruction, which contains
1265 // the offset add that we want to fold.
1266 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1267 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1268 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1269 // If the use is in a different block, then we may produce worse code
1270 // due to the extra register pressure.
1271 if (BaseUseUse.getParent() != LdSt.getParent())
1272 return false;
1273
1274 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1275 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1276 return false;
1277 }
1278 if (!dominates(LdSt, BasePtrUse))
1279 return false; // All use must be dominated by the load/store.
1280 }
1281 }
1282
1283 Addr = PtrAdd->getReg(0);
1284 Base = PtrAdd->getBaseReg();
1285 return true;
1286 }
1287
1288 return false;
1289}
1290
1291bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1292 Register &Base,
1293 Register &Offset) const {
1294 auto &MF = *LdSt.getParent()->getParent();
1295 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1296
1297 Addr = LdSt.getPointerReg();
1298 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1299 MRI.hasOneNonDBGUse(Addr))
1300 return false;
1301
1302 if (!ForceLegalIndexing &&
1303 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1304 return false;
1305
1306 if (!isIndexedLoadStoreLegal(LdSt))
1307 return false;
1308
1309 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1310 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1311 return false;
1312
1313 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1314 // Would require a copy.
1315 if (Base == St->getValueReg())
1316 return false;
1317
1318 // We're expecting one use of Addr in MI, but it could also be the
1319 // value stored, which isn't actually dominated by the instruction.
1320 if (St->getValueReg() == Addr)
1321 return false;
1322 }
1323
1324 // Avoid increasing cross-block register pressure.
1325 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1326 if (AddrUse.getParent() != LdSt.getParent())
1327 return false;
1328
1329 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1330 // That might allow us to end base's liveness here by adjusting the constant.
1331 bool RealUse = false;
1332 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1333 if (!dominates(LdSt, AddrUse))
1334 return false; // All use must be dominated by the load/store.
1335
1336 // If Ptr may be folded in addressing mode of other use, then it's
1337 // not profitable to do this transformation.
1338 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1339 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1340 RealUse = true;
1341 } else {
1342 RealUse = true;
1343 }
1344 }
1345 return RealUse;
1346}
1347
1349 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1350 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1351
1352 // Check if there is a load that defines the vector being extracted from.
1353 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1354 if (!LoadMI)
1355 return false;
1356
1357 Register Vector = MI.getOperand(1).getReg();
1358 LLT VecEltTy = MRI.getType(Vector).getElementType();
1359
1360 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1361
1362 // Checking whether we should reduce the load width.
1363 if (!MRI.hasOneNonDBGUse(Vector))
1364 return false;
1365
1366 // Check if the defining load is simple.
1367 if (!LoadMI->isSimple())
1368 return false;
1369
1370 // If the vector element type is not a multiple of a byte then we are unable
1371 // to correctly compute an address to load only the extracted element as a
1372 // scalar.
1373 if (!VecEltTy.isByteSized())
1374 return false;
1375
1376 // Check for load fold barriers between the extraction and the load.
1377 if (MI.getParent() != LoadMI->getParent())
1378 return false;
1379 const unsigned MaxIter = 20;
1380 unsigned Iter = 0;
1381 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1382 if (II->isLoadFoldBarrier())
1383 return false;
1384 if (Iter++ == MaxIter)
1385 return false;
1386 }
1387
1388 // Check if the new load that we are going to create is legal
1389 // if we are in the post-legalization phase.
1390 MachineMemOperand MMO = LoadMI->getMMO();
1391 Align Alignment = MMO.getAlign();
1392 MachinePointerInfo PtrInfo;
1394
1395 // Finding the appropriate PtrInfo if offset is a known constant.
1396 // This is required to create the memory operand for the narrowed load.
1397 // This machine memory operand object helps us infer about legality
1398 // before we proceed to combine the instruction.
1399 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1400 int Elt = CVal->getZExtValue();
1401 // FIXME: should be (ABI size)*Elt.
1402 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1403 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1404 } else {
1405 // Discard the pointer info except the address space because the memory
1406 // operand can't represent this new access since the offset is variable.
1407 Offset = VecEltTy.getSizeInBits() / 8;
1409 }
1410
1411 Alignment = commonAlignment(Alignment, Offset);
1412
1413 Register VecPtr = LoadMI->getPointerReg();
1414 LLT PtrTy = MRI.getType(VecPtr);
1415
1416 MachineFunction &MF = *MI.getMF();
1417 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1418
1419 LegalityQuery::MemDesc MMDesc(*NewMMO);
1420
1422 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1423 return false;
1424
1425 // Load must be allowed and fast on the target.
1427 auto &DL = MF.getDataLayout();
1428 unsigned Fast = 0;
1429 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1430 &Fast) ||
1431 !Fast)
1432 return false;
1433
1434 Register Result = MI.getOperand(0).getReg();
1435 Register Index = MI.getOperand(2).getReg();
1436
1437 MatchInfo = [=](MachineIRBuilder &B) {
1438 GISelObserverWrapper DummyObserver;
1439 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1440 //// Get pointer to the vector element.
1441 Register finalPtr = Helper.getVectorElementPointer(
1442 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1443 Index);
1444 // New G_LOAD instruction.
1445 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1446 // Remove original GLOAD instruction.
1447 LoadMI->eraseFromParent();
1448 };
1449
1450 return true;
1451}
1452
1454 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1455 auto &LdSt = cast<GLoadStore>(MI);
1456
1457 if (LdSt.isAtomic())
1458 return false;
1459
1460 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1461 MatchInfo.Offset);
1462 if (!MatchInfo.IsPre &&
1463 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1464 MatchInfo.Offset, MatchInfo.RematOffset))
1465 return false;
1466
1467 return true;
1468}
1469
1471 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1472 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1473 unsigned Opcode = MI.getOpcode();
1474 bool IsStore = Opcode == TargetOpcode::G_STORE;
1475 unsigned NewOpcode = getIndexedOpc(Opcode);
1476
1477 // If the offset constant didn't happen to dominate the load/store, we can
1478 // just clone it as needed.
1479 if (MatchInfo.RematOffset) {
1480 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1481 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1482 *OldCst->getOperand(1).getCImm());
1483 MatchInfo.Offset = NewCst.getReg(0);
1484 }
1485
1486 auto MIB = Builder.buildInstr(NewOpcode);
1487 if (IsStore) {
1488 MIB.addDef(MatchInfo.Addr);
1489 MIB.addUse(MI.getOperand(0).getReg());
1490 } else {
1491 MIB.addDef(MI.getOperand(0).getReg());
1492 MIB.addDef(MatchInfo.Addr);
1493 }
1494
1495 MIB.addUse(MatchInfo.Base);
1496 MIB.addUse(MatchInfo.Offset);
1497 MIB.addImm(MatchInfo.IsPre);
1498 MIB->cloneMemRefs(*MI.getMF(), MI);
1499 MI.eraseFromParent();
1500 AddrDef.eraseFromParent();
1501
1502 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1503}
1504
1506 MachineInstr *&OtherMI) const {
1507 unsigned Opcode = MI.getOpcode();
1508 bool IsDiv, IsSigned;
1509
1510 switch (Opcode) {
1511 default:
1512 llvm_unreachable("Unexpected opcode!");
1513 case TargetOpcode::G_SDIV:
1514 case TargetOpcode::G_UDIV: {
1515 IsDiv = true;
1516 IsSigned = Opcode == TargetOpcode::G_SDIV;
1517 break;
1518 }
1519 case TargetOpcode::G_SREM:
1520 case TargetOpcode::G_UREM: {
1521 IsDiv = false;
1522 IsSigned = Opcode == TargetOpcode::G_SREM;
1523 break;
1524 }
1525 }
1526
1527 Register Src1 = MI.getOperand(1).getReg();
1528 unsigned DivOpcode, RemOpcode, DivremOpcode;
1529 if (IsSigned) {
1530 DivOpcode = TargetOpcode::G_SDIV;
1531 RemOpcode = TargetOpcode::G_SREM;
1532 DivremOpcode = TargetOpcode::G_SDIVREM;
1533 } else {
1534 DivOpcode = TargetOpcode::G_UDIV;
1535 RemOpcode = TargetOpcode::G_UREM;
1536 DivremOpcode = TargetOpcode::G_UDIVREM;
1537 }
1538
1539 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1540 return false;
1541
1542 // Combine:
1543 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1544 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1545 // into:
1546 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1547
1548 // Combine:
1549 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1550 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1551 // into:
1552 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1553
1554 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1555 if (MI.getParent() == UseMI.getParent() &&
1556 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1557 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1558 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1559 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1560 OtherMI = &UseMI;
1561 return true;
1562 }
1563 }
1564
1565 return false;
1566}
1567
1569 MachineInstr *&OtherMI) const {
1570 unsigned Opcode = MI.getOpcode();
1571 assert(OtherMI && "OtherMI shouldn't be empty.");
1572
1573 Register DestDivReg, DestRemReg;
1574 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1575 DestDivReg = MI.getOperand(0).getReg();
1576 DestRemReg = OtherMI->getOperand(0).getReg();
1577 } else {
1578 DestDivReg = OtherMI->getOperand(0).getReg();
1579 DestRemReg = MI.getOperand(0).getReg();
1580 }
1581
1582 bool IsSigned =
1583 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1584
1585 // Check which instruction is first in the block so we don't break def-use
1586 // deps by "moving" the instruction incorrectly. Also keep track of which
1587 // instruction is first so we pick it's operands, avoiding use-before-def
1588 // bugs.
1589 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1590 Builder.setInstrAndDebugLoc(*FirstInst);
1591
1592 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1593 : TargetOpcode::G_UDIVREM,
1594 {DestDivReg, DestRemReg},
1595 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1596 MI.eraseFromParent();
1597 OtherMI->eraseFromParent();
1598}
1599
1601 MachineInstr &MI, MachineInstr *&BrCond) const {
1602 assert(MI.getOpcode() == TargetOpcode::G_BR);
1603
1604 // Try to match the following:
1605 // bb1:
1606 // G_BRCOND %c1, %bb2
1607 // G_BR %bb3
1608 // bb2:
1609 // ...
1610 // bb3:
1611
1612 // The above pattern does not have a fall through to the successor bb2, always
1613 // resulting in a branch no matter which path is taken. Here we try to find
1614 // and replace that pattern with conditional branch to bb3 and otherwise
1615 // fallthrough to bb2. This is generally better for branch predictors.
1616
1617 MachineBasicBlock *MBB = MI.getParent();
1619 if (BrIt == MBB->begin())
1620 return false;
1621 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1622
1623 BrCond = &*std::prev(BrIt);
1624 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1625 return false;
1626
1627 // Check that the next block is the conditional branch target. Also make sure
1628 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1629 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1630 return BrCondTarget != MI.getOperand(0).getMBB() &&
1631 MBB->isLayoutSuccessor(BrCondTarget);
1632}
1633
1635 MachineInstr &MI, MachineInstr *&BrCond) const {
1636 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1637 Builder.setInstrAndDebugLoc(*BrCond);
1638 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1639 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1640 // this to i1 only since we might not know for sure what kind of
1641 // compare generated the condition value.
1642 auto True = Builder.buildConstant(
1643 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1644 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1645
1646 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1647 Observer.changingInstr(MI);
1648 MI.getOperand(0).setMBB(FallthroughBB);
1649 Observer.changedInstr(MI);
1650
1651 // Change the conditional branch to use the inverted condition and
1652 // new target block.
1653 Observer.changingInstr(*BrCond);
1654 BrCond->getOperand(0).setReg(Xor.getReg(0));
1655 BrCond->getOperand(1).setMBB(BrTarget);
1656 Observer.changedInstr(*BrCond);
1657}
1658
1660 MachineIRBuilder HelperBuilder(MI);
1661 GISelObserverWrapper DummyObserver;
1662 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1663 return Helper.lowerMemcpyInline(MI) ==
1665}
1666
1668 unsigned MaxLen) const {
1669 MachineIRBuilder HelperBuilder(MI);
1670 GISelObserverWrapper DummyObserver;
1671 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1672 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1674}
1675
1677 const MachineRegisterInfo &MRI,
1678 const APFloat &Val) {
1679 APFloat Result(Val);
1680 switch (MI.getOpcode()) {
1681 default:
1682 llvm_unreachable("Unexpected opcode!");
1683 case TargetOpcode::G_FNEG: {
1684 Result.changeSign();
1685 return Result;
1686 }
1687 case TargetOpcode::G_FABS: {
1688 Result.clearSign();
1689 return Result;
1690 }
1691 case TargetOpcode::G_FCEIL:
1692 Result.roundToIntegral(APFloat::rmTowardPositive);
1693 return Result;
1694 case TargetOpcode::G_FFLOOR:
1695 Result.roundToIntegral(APFloat::rmTowardNegative);
1696 return Result;
1697 case TargetOpcode::G_INTRINSIC_TRUNC:
1698 Result.roundToIntegral(APFloat::rmTowardZero);
1699 return Result;
1700 case TargetOpcode::G_INTRINSIC_ROUND:
1701 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1702 return Result;
1703 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1704 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1705 return Result;
1706 case TargetOpcode::G_FRINT:
1707 case TargetOpcode::G_FNEARBYINT:
1708 // Use default rounding mode (round to nearest, ties to even)
1709 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1710 return Result;
1711 case TargetOpcode::G_FPEXT:
1712 case TargetOpcode::G_FPTRUNC: {
1713 bool Unused;
1714 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1716 &Unused);
1717 return Result;
1718 }
1719 case TargetOpcode::G_FSQRT: {
1720 bool Unused;
1722 &Unused);
1723 Result = APFloat(sqrt(Result.convertToDouble()));
1724 break;
1725 }
1726 case TargetOpcode::G_FLOG2: {
1727 bool Unused;
1729 &Unused);
1730 Result = APFloat(log2(Result.convertToDouble()));
1731 break;
1732 }
1733 }
1734 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1735 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1736 // `G_FLOG2` reach here.
1737 bool Unused;
1738 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1739 return Result;
1740}
1741
1743 MachineInstr &MI, const ConstantFP *Cst) const {
1744 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1745 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1746 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1747 MI.eraseFromParent();
1748}
1749
1751 PtrAddChain &MatchInfo) const {
1752 // We're trying to match the following pattern:
1753 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1754 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1755 // -->
1756 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1757
1758 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1759 return false;
1760
1761 Register Add2 = MI.getOperand(1).getReg();
1762 Register Imm1 = MI.getOperand(2).getReg();
1763 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1764 if (!MaybeImmVal)
1765 return false;
1766
1767 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1768 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1769 return false;
1770
1771 Register Base = Add2Def->getOperand(1).getReg();
1772 Register Imm2 = Add2Def->getOperand(2).getReg();
1773 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1774 if (!MaybeImm2Val)
1775 return false;
1776
1777 // Check if the new combined immediate forms an illegal addressing mode.
1778 // Do not combine if it was legal before but would get illegal.
1779 // To do so, we need to find a load/store user of the pointer to get
1780 // the access type.
1781 Type *AccessTy = nullptr;
1782 auto &MF = *MI.getMF();
1783 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1784 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1785 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1786 MF.getFunction().getContext());
1787 break;
1788 }
1789 }
1791 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1792 AMNew.BaseOffs = CombinedImm.getSExtValue();
1793 if (AccessTy) {
1794 AMNew.HasBaseReg = true;
1796 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1797 AMOld.HasBaseReg = true;
1798 unsigned AS = MRI.getType(Add2).getAddressSpace();
1799 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1800 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1801 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1802 return false;
1803 }
1804
1805 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1806 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1807 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1808 // largest signed integer that fits into the index type, which is the maximum
1809 // size of allocated objects according to the IR Language Reference.
1810 unsigned PtrAddFlags = MI.getFlags();
1811 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1812 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1813 bool IsInBounds =
1814 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1815 unsigned Flags = 0;
1816 if (IsNoUWrap)
1818 if (IsInBounds) {
1821 }
1822
1823 // Pass the combined immediate to the apply function.
1824 MatchInfo.Imm = AMNew.BaseOffs;
1825 MatchInfo.Base = Base;
1826 MatchInfo.Bank = getRegBank(Imm2);
1827 MatchInfo.Flags = Flags;
1828 return true;
1829}
1830
1832 PtrAddChain &MatchInfo) const {
1833 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1834 MachineIRBuilder MIB(MI);
1835 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1836 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1837 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1838 Observer.changingInstr(MI);
1839 MI.getOperand(1).setReg(MatchInfo.Base);
1840 MI.getOperand(2).setReg(NewOffset.getReg(0));
1841 MI.setFlags(MatchInfo.Flags);
1842 Observer.changedInstr(MI);
1843}
1844
1846 RegisterImmPair &MatchInfo) const {
1847 // We're trying to match the following pattern with any of
1848 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1849 // %t1 = SHIFT %base, G_CONSTANT imm1
1850 // %root = SHIFT %t1, G_CONSTANT imm2
1851 // -->
1852 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1853
1854 unsigned Opcode = MI.getOpcode();
1855 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1856 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1857 Opcode == TargetOpcode::G_USHLSAT) &&
1858 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1859
1860 Register Shl2 = MI.getOperand(1).getReg();
1861 Register Imm1 = MI.getOperand(2).getReg();
1862 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1863 if (!MaybeImmVal)
1864 return false;
1865
1866 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1867 if (Shl2Def->getOpcode() != Opcode)
1868 return false;
1869
1870 Register Base = Shl2Def->getOperand(1).getReg();
1871 Register Imm2 = Shl2Def->getOperand(2).getReg();
1872 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1873 if (!MaybeImm2Val)
1874 return false;
1875
1876 // Pass the combined immediate to the apply function.
1877 MatchInfo.Imm =
1878 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1879 MatchInfo.Reg = Base;
1880
1881 // There is no simple replacement for a saturating unsigned left shift that
1882 // exceeds the scalar size.
1883 if (Opcode == TargetOpcode::G_USHLSAT &&
1884 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1885 return false;
1886
1887 return true;
1888}
1889
1891 RegisterImmPair &MatchInfo) const {
1892 unsigned Opcode = MI.getOpcode();
1893 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1894 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1895 Opcode == TargetOpcode::G_USHLSAT) &&
1896 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1897
1898 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1899 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1900 auto Imm = MatchInfo.Imm;
1901
1902 if (Imm >= ScalarSizeInBits) {
1903 // Any logical shift that exceeds scalar size will produce zero.
1904 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1905 Builder.buildConstant(MI.getOperand(0), 0);
1906 MI.eraseFromParent();
1907 return;
1908 }
1909 // Arithmetic shift and saturating signed left shift have no effect beyond
1910 // scalar size.
1911 Imm = ScalarSizeInBits - 1;
1912 }
1913
1914 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1915 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1916 Observer.changingInstr(MI);
1917 MI.getOperand(1).setReg(MatchInfo.Reg);
1918 MI.getOperand(2).setReg(NewImm);
1919 Observer.changedInstr(MI);
1920}
1921
1923 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1924 // We're trying to match the following pattern with any of
1925 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1926 // with any of G_AND/G_OR/G_XOR logic instructions.
1927 // %t1 = SHIFT %X, G_CONSTANT C0
1928 // %t2 = LOGIC %t1, %Y
1929 // %root = SHIFT %t2, G_CONSTANT C1
1930 // -->
1931 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1932 // %t4 = SHIFT %Y, G_CONSTANT C1
1933 // %root = LOGIC %t3, %t4
1934 unsigned ShiftOpcode = MI.getOpcode();
1935 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1936 ShiftOpcode == TargetOpcode::G_ASHR ||
1937 ShiftOpcode == TargetOpcode::G_LSHR ||
1938 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1939 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1940 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1941
1942 // Match a one-use bitwise logic op.
1943 Register LogicDest = MI.getOperand(1).getReg();
1944 if (!MRI.hasOneNonDBGUse(LogicDest))
1945 return false;
1946
1947 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1948 unsigned LogicOpcode = LogicMI->getOpcode();
1949 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1950 LogicOpcode != TargetOpcode::G_XOR)
1951 return false;
1952
1953 // Find a matching one-use shift by constant.
1954 const Register C1 = MI.getOperand(2).getReg();
1955 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1956 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1957 return false;
1958
1959 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1960
1961 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1962 // Shift should match previous one and should be a one-use.
1963 if (MI->getOpcode() != ShiftOpcode ||
1964 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1965 return false;
1966
1967 // Must be a constant.
1968 auto MaybeImmVal =
1969 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1970 if (!MaybeImmVal)
1971 return false;
1972
1973 ShiftVal = MaybeImmVal->Value.getSExtValue();
1974 return true;
1975 };
1976
1977 // Logic ops are commutative, so check each operand for a match.
1978 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1979 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1980 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1981 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1982 uint64_t C0Val;
1983
1984 if (matchFirstShift(LogicMIOp1, C0Val)) {
1985 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1986 MatchInfo.Shift2 = LogicMIOp1;
1987 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1988 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1989 MatchInfo.Shift2 = LogicMIOp2;
1990 } else
1991 return false;
1992
1993 MatchInfo.ValSum = C0Val + C1Val;
1994
1995 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1996 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1997 return false;
1998
1999 MatchInfo.Logic = LogicMI;
2000 return true;
2001}
2002
2004 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2005 unsigned Opcode = MI.getOpcode();
2006 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2007 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2008 Opcode == TargetOpcode::G_SSHLSAT) &&
2009 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2010
2011 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2012 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2013
2014 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2015
2016 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2017 Register Shift1 =
2018 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2019
2020 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2021 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2022 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2023 // remove old shift1. And it will cause crash later. So erase it earlier to
2024 // avoid the crash.
2025 MatchInfo.Shift2->eraseFromParent();
2026
2027 Register Shift2Const = MI.getOperand(2).getReg();
2028 Register Shift2 = Builder
2029 .buildInstr(Opcode, {DestType},
2030 {MatchInfo.LogicNonShiftReg, Shift2Const})
2031 .getReg(0);
2032
2033 Register Dest = MI.getOperand(0).getReg();
2034 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2035
2036 // This was one use so it's safe to remove it.
2037 MatchInfo.Logic->eraseFromParent();
2038
2039 MI.eraseFromParent();
2040}
2041
2043 BuildFnTy &MatchInfo) const {
2044 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2045 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2046 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2047 auto &Shl = cast<GenericMachineInstr>(MI);
2048 Register DstReg = Shl.getReg(0);
2049 Register SrcReg = Shl.getReg(1);
2050 Register ShiftReg = Shl.getReg(2);
2051 Register X, C1;
2052
2053 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2054 return false;
2055
2056 if (!mi_match(SrcReg, MRI,
2058 m_GOr(m_Reg(X), m_Reg(C1))))))
2059 return false;
2060
2061 APInt C1Val, C2Val;
2062 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2063 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2064 return false;
2065
2066 auto *SrcDef = MRI.getVRegDef(SrcReg);
2067 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2068 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2069 LLT SrcTy = MRI.getType(SrcReg);
2070 MatchInfo = [=](MachineIRBuilder &B) {
2071 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2072 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2073 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2074 };
2075 return true;
2076}
2077
2079 LshrOfTruncOfLshr &MatchInfo,
2080 MachineInstr &ShiftMI) const {
2081 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2082
2083 Register N0 = MI.getOperand(1).getReg();
2084 Register N1 = MI.getOperand(2).getReg();
2085 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2086
2087 APInt N1C, N001C;
2088 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2089 return false;
2090 auto N001 = ShiftMI.getOperand(2).getReg();
2091 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2092 return false;
2093
2094 if (N001C.getBitWidth() > N1C.getBitWidth())
2095 N1C = N1C.zext(N001C.getBitWidth());
2096 else
2097 N001C = N001C.zext(N1C.getBitWidth());
2098
2099 Register InnerShift = ShiftMI.getOperand(0).getReg();
2100 LLT InnerShiftTy = MRI.getType(InnerShift);
2101 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2102 if ((N1C + N001C).ult(InnerShiftSize)) {
2103 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2104 MatchInfo.ShiftAmt = N1C + N001C;
2105 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2106 MatchInfo.InnerShiftTy = InnerShiftTy;
2107
2108 if ((N001C + OpSizeInBits) == InnerShiftSize)
2109 return true;
2110 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2111 MatchInfo.Mask = true;
2112 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2113 return true;
2114 }
2115 }
2116 return false;
2117}
2118
2120 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2121 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2122
2123 Register Dst = MI.getOperand(0).getReg();
2124 auto ShiftAmt =
2125 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2126 auto Shift =
2127 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2128 if (MatchInfo.Mask == true) {
2129 APInt MaskVal =
2131 MatchInfo.MaskVal.getZExtValue());
2132 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2133 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2134 Builder.buildTrunc(Dst, And);
2135 } else
2136 Builder.buildTrunc(Dst, Shift);
2137 MI.eraseFromParent();
2138}
2139
2141 unsigned &ShiftVal) const {
2142 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2143 auto MaybeImmVal =
2144 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2145 if (!MaybeImmVal)
2146 return false;
2147
2148 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2149 return (static_cast<int32_t>(ShiftVal) != -1);
2150}
2151
2153 unsigned &ShiftVal) const {
2154 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2155 MachineIRBuilder MIB(MI);
2156 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2157 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2158 Observer.changingInstr(MI);
2159 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2160 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2161 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2163 Observer.changedInstr(MI);
2164}
2165
2167 BuildFnTy &MatchInfo) const {
2168 GSub &Sub = cast<GSub>(MI);
2169
2170 LLT Ty = MRI.getType(Sub.getReg(0));
2171
2172 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2173 return false;
2174
2176 return false;
2177
2178 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2179
2180 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2181 auto NegCst = B.buildConstant(Ty, -Imm);
2182 Observer.changingInstr(MI);
2183 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2184 MI.getOperand(2).setReg(NegCst.getReg(0));
2186 if (Imm.isMinSignedValue())
2188 Observer.changedInstr(MI);
2189 };
2190 return true;
2191}
2192
2193// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2195 RegisterImmPair &MatchData) const {
2196 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2197 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2198 return false;
2199
2200 Register LHS = MI.getOperand(1).getReg();
2201
2202 Register ExtSrc;
2203 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2204 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2205 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2206 return false;
2207
2208 Register RHS = MI.getOperand(2).getReg();
2209 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2210 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2211 if (!MaybeShiftAmtVal)
2212 return false;
2213
2214 if (LI) {
2215 LLT SrcTy = MRI.getType(ExtSrc);
2216
2217 // We only really care about the legality with the shifted value. We can
2218 // pick any type the constant shift amount, so ask the target what to
2219 // use. Otherwise we would have to guess and hope it is reported as legal.
2220 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2221 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2222 return false;
2223 }
2224
2225 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2226 MatchData.Reg = ExtSrc;
2227 MatchData.Imm = ShiftAmt;
2228
2229 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2230 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2231 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2232}
2233
2235 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2236 Register ExtSrcReg = MatchData.Reg;
2237 int64_t ShiftAmtVal = MatchData.Imm;
2238
2239 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2240 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2241 auto NarrowShift =
2242 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2243 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2244 MI.eraseFromParent();
2245}
2246
2248 Register &MatchInfo) const {
2250 SmallVector<Register, 16> MergedValues;
2251 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2252 MergedValues.emplace_back(Merge.getSourceReg(I));
2253
2254 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2255 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2256 return false;
2257
2258 for (unsigned I = 0; I < MergedValues.size(); ++I)
2259 if (MergedValues[I] != Unmerge->getReg(I))
2260 return false;
2261
2262 MatchInfo = Unmerge->getSourceReg();
2263 return true;
2264}
2265
2267 const MachineRegisterInfo &MRI) {
2268 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2269 ;
2270
2271 return Reg;
2272}
2273
2275 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2276 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2277 "Expected an unmerge");
2278 auto &Unmerge = cast<GUnmerge>(MI);
2279 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2280
2281 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2282 if (!SrcInstr)
2283 return false;
2284
2285 // Check the source type of the merge.
2286 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2287 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2288 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2289 if (SrcMergeTy != Dst0Ty && !SameSize)
2290 return false;
2291 // They are the same now (modulo a bitcast).
2292 // We can collect all the src registers.
2293 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2294 Operands.push_back(SrcInstr->getSourceReg(Idx));
2295 return true;
2296}
2297
2299 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2300 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2301 "Expected an unmerge");
2302 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2303 "Not enough operands to replace all defs");
2304 unsigned NumElems = MI.getNumOperands() - 1;
2305
2306 LLT SrcTy = MRI.getType(Operands[0]);
2307 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2308 bool CanReuseInputDirectly = DstTy == SrcTy;
2309 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2310 Register DstReg = MI.getOperand(Idx).getReg();
2311 Register SrcReg = Operands[Idx];
2312
2313 // This combine may run after RegBankSelect, so we need to be aware of
2314 // register banks.
2315 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2316 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2317 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2318 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2319 }
2320
2321 if (CanReuseInputDirectly)
2322 replaceRegWith(MRI, DstReg, SrcReg);
2323 else
2324 Builder.buildCast(DstReg, SrcReg);
2325 }
2326 MI.eraseFromParent();
2327}
2328
2330 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2331 unsigned SrcIdx = MI.getNumOperands() - 1;
2332 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2333 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2334 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2335 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2336 return false;
2337 // Break down the big constant in smaller ones.
2338 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2339 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2340 ? CstVal.getCImm()->getValue()
2341 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2342
2343 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2344 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2345 // Unmerge a constant.
2346 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2347 Csts.emplace_back(Val.trunc(ShiftAmt));
2348 Val = Val.lshr(ShiftAmt);
2349 }
2350
2351 return true;
2352}
2353
2355 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2356 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2357 "Expected an unmerge");
2358 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2359 "Not enough operands to replace all defs");
2360 unsigned NumElems = MI.getNumOperands() - 1;
2361 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2362 Register DstReg = MI.getOperand(Idx).getReg();
2363 Builder.buildConstant(DstReg, Csts[Idx]);
2364 }
2365
2366 MI.eraseFromParent();
2367}
2368
2371 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2372 unsigned SrcIdx = MI.getNumOperands() - 1;
2373 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2374 MatchInfo = [&MI](MachineIRBuilder &B) {
2375 unsigned NumElems = MI.getNumOperands() - 1;
2376 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2377 Register DstReg = MI.getOperand(Idx).getReg();
2378 B.buildUndef(DstReg);
2379 }
2380 };
2381 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2382}
2383
2385 MachineInstr &MI) const {
2386 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2387 "Expected an unmerge");
2388 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2389 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2390 return false;
2391 // Check that all the lanes are dead except the first one.
2392 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2393 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2394 return false;
2395 }
2396 return true;
2397}
2398
2400 MachineInstr &MI) const {
2401 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2402 Register Dst0Reg = MI.getOperand(0).getReg();
2403 Builder.buildTrunc(Dst0Reg, SrcReg);
2404 MI.eraseFromParent();
2405}
2406
2408 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2409 "Expected an unmerge");
2410 Register Dst0Reg = MI.getOperand(0).getReg();
2411 LLT Dst0Ty = MRI.getType(Dst0Reg);
2412 // G_ZEXT on vector applies to each lane, so it will
2413 // affect all destinations. Therefore we won't be able
2414 // to simplify the unmerge to just the first definition.
2415 if (Dst0Ty.isVector())
2416 return false;
2417 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2418 LLT SrcTy = MRI.getType(SrcReg);
2419 if (SrcTy.isVector())
2420 return false;
2421
2422 Register ZExtSrcReg;
2423 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2424 return false;
2425
2426 // Finally we can replace the first definition with
2427 // a zext of the source if the definition is big enough to hold
2428 // all of ZExtSrc bits.
2429 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2430 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2431}
2432
2434 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2435 "Expected an unmerge");
2436
2437 Register Dst0Reg = MI.getOperand(0).getReg();
2438
2439 MachineInstr *ZExtInstr =
2440 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2441 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2442 "Expecting a G_ZEXT");
2443
2444 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2445 LLT Dst0Ty = MRI.getType(Dst0Reg);
2446 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2447
2448 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2449 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2450 } else {
2451 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2452 "ZExt src doesn't fit in destination");
2453 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2454 }
2455
2456 Register ZeroReg;
2457 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2458 if (!ZeroReg)
2459 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2460 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2461 }
2462 MI.eraseFromParent();
2463}
2464
2466 unsigned TargetShiftSize,
2467 unsigned &ShiftVal) const {
2468 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2469 MI.getOpcode() == TargetOpcode::G_LSHR ||
2470 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2471
2472 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2473 if (Ty.isVector()) // TODO:
2474 return false;
2475
2476 // Don't narrow further than the requested size.
2477 unsigned Size = Ty.getSizeInBits();
2478 if (Size <= TargetShiftSize)
2479 return false;
2480
2481 auto MaybeImmVal =
2482 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2483 if (!MaybeImmVal)
2484 return false;
2485
2486 ShiftVal = MaybeImmVal->Value.getSExtValue();
2487 return ShiftVal >= Size / 2 && ShiftVal < Size;
2488}
2489
2491 MachineInstr &MI, const unsigned &ShiftVal) const {
2492 Register DstReg = MI.getOperand(0).getReg();
2493 Register SrcReg = MI.getOperand(1).getReg();
2494 LLT Ty = MRI.getType(SrcReg);
2495 unsigned Size = Ty.getSizeInBits();
2496 unsigned HalfSize = Size / 2;
2497 assert(ShiftVal >= HalfSize);
2498
2499 LLT HalfTy = LLT::scalar(HalfSize);
2500
2501 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2502 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2503
2504 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2505 Register Narrowed = Unmerge.getReg(1);
2506
2507 // dst = G_LSHR s64:x, C for C >= 32
2508 // =>
2509 // lo, hi = G_UNMERGE_VALUES x
2510 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2511
2512 if (NarrowShiftAmt != 0) {
2513 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2514 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2515 }
2516
2517 auto Zero = Builder.buildConstant(HalfTy, 0);
2518 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2519 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2520 Register Narrowed = Unmerge.getReg(0);
2521 // dst = G_SHL s64:x, C for C >= 32
2522 // =>
2523 // lo, hi = G_UNMERGE_VALUES x
2524 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2525 if (NarrowShiftAmt != 0) {
2526 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2527 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2528 }
2529
2530 auto Zero = Builder.buildConstant(HalfTy, 0);
2531 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2532 } else {
2533 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2534 auto Hi = Builder.buildAShr(
2535 HalfTy, Unmerge.getReg(1),
2536 Builder.buildConstant(HalfTy, HalfSize - 1));
2537
2538 if (ShiftVal == HalfSize) {
2539 // (G_ASHR i64:x, 32) ->
2540 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2541 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2542 } else if (ShiftVal == Size - 1) {
2543 // Don't need a second shift.
2544 // (G_ASHR i64:x, 63) ->
2545 // %narrowed = (G_ASHR hi_32(x), 31)
2546 // G_MERGE_VALUES %narrowed, %narrowed
2547 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2548 } else {
2549 auto Lo = Builder.buildAShr(
2550 HalfTy, Unmerge.getReg(1),
2551 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2552
2553 // (G_ASHR i64:x, C) ->, for C >= 32
2554 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2555 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2556 }
2557 }
2558
2559 MI.eraseFromParent();
2560}
2561
2563 MachineInstr &MI, unsigned TargetShiftAmount) const {
2564 unsigned ShiftAmt;
2565 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2566 applyCombineShiftToUnmerge(MI, ShiftAmt);
2567 return true;
2568 }
2569
2570 return false;
2571}
2572
2574 Register &Reg) const {
2575 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2576 Register DstReg = MI.getOperand(0).getReg();
2577 LLT DstTy = MRI.getType(DstReg);
2578 Register SrcReg = MI.getOperand(1).getReg();
2579 return mi_match(SrcReg, MRI,
2580 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2581}
2582
2584 Register &Reg) const {
2585 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2586 Register DstReg = MI.getOperand(0).getReg();
2587 Builder.buildCopy(DstReg, Reg);
2588 MI.eraseFromParent();
2589}
2590
2592 Register &Reg) const {
2593 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2594 Register DstReg = MI.getOperand(0).getReg();
2595 Builder.buildZExtOrTrunc(DstReg, Reg);
2596 MI.eraseFromParent();
2597}
2598
2600 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2601 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2602 Register LHS = MI.getOperand(1).getReg();
2603 Register RHS = MI.getOperand(2).getReg();
2604 LLT IntTy = MRI.getType(LHS);
2605
2606 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2607 // instruction.
2608 PtrReg.second = false;
2609 for (Register SrcReg : {LHS, RHS}) {
2610 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2611 // Don't handle cases where the integer is implicitly converted to the
2612 // pointer width.
2613 LLT PtrTy = MRI.getType(PtrReg.first);
2614 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2615 return true;
2616 }
2617
2618 PtrReg.second = true;
2619 }
2620
2621 return false;
2622}
2623
2625 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2626 Register Dst = MI.getOperand(0).getReg();
2627 Register LHS = MI.getOperand(1).getReg();
2628 Register RHS = MI.getOperand(2).getReg();
2629
2630 const bool DoCommute = PtrReg.second;
2631 if (DoCommute)
2632 std::swap(LHS, RHS);
2633 LHS = PtrReg.first;
2634
2635 LLT PtrTy = MRI.getType(LHS);
2636
2637 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2638 Builder.buildPtrToInt(Dst, PtrAdd);
2639 MI.eraseFromParent();
2640}
2641
2643 APInt &NewCst) const {
2644 auto &PtrAdd = cast<GPtrAdd>(MI);
2645 Register LHS = PtrAdd.getBaseReg();
2646 Register RHS = PtrAdd.getOffsetReg();
2647 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2648
2649 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2650 APInt Cst;
2651 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2652 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2653 // G_INTTOPTR uses zero-extension
2654 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2655 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2656 return true;
2657 }
2658 }
2659
2660 return false;
2661}
2662
2664 APInt &NewCst) const {
2665 auto &PtrAdd = cast<GPtrAdd>(MI);
2666 Register Dst = PtrAdd.getReg(0);
2667
2668 Builder.buildConstant(Dst, NewCst);
2669 PtrAdd.eraseFromParent();
2670}
2671
2673 Register &Reg) const {
2674 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2675 Register DstReg = MI.getOperand(0).getReg();
2676 Register SrcReg = MI.getOperand(1).getReg();
2677 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2678 if (OriginalSrcReg.isValid())
2679 SrcReg = OriginalSrcReg;
2680 LLT DstTy = MRI.getType(DstReg);
2681 return mi_match(SrcReg, MRI,
2682 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2683 canReplaceReg(DstReg, Reg, MRI);
2684}
2685
2687 Register &Reg) const {
2688 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2689 Register DstReg = MI.getOperand(0).getReg();
2690 Register SrcReg = MI.getOperand(1).getReg();
2691 LLT DstTy = MRI.getType(DstReg);
2692 if (mi_match(SrcReg, MRI,
2693 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2694 canReplaceReg(DstReg, Reg, MRI)) {
2695 unsigned DstSize = DstTy.getScalarSizeInBits();
2696 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2697 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2698 }
2699 return false;
2700}
2701
2703 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2704 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2705
2706 // ShiftTy > 32 > TruncTy -> 32
2707 if (ShiftSize > 32 && TruncSize < 32)
2708 return ShiftTy.changeElementSize(32);
2709
2710 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2711 // Some targets like it, some don't, some only like it under certain
2712 // conditions/processor versions, etc.
2713 // A TL hook might be needed for this.
2714
2715 // Don't combine
2716 return ShiftTy;
2717}
2718
2720 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2721 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2722 Register DstReg = MI.getOperand(0).getReg();
2723 Register SrcReg = MI.getOperand(1).getReg();
2724
2725 if (!MRI.hasOneNonDBGUse(SrcReg))
2726 return false;
2727
2728 LLT SrcTy = MRI.getType(SrcReg);
2729 LLT DstTy = MRI.getType(DstReg);
2730
2731 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2732 const auto &TL = getTargetLowering();
2733
2734 LLT NewShiftTy;
2735 switch (SrcMI->getOpcode()) {
2736 default:
2737 return false;
2738 case TargetOpcode::G_SHL: {
2739 NewShiftTy = DstTy;
2740
2741 // Make sure new shift amount is legal.
2742 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2743 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2744 return false;
2745 break;
2746 }
2747 case TargetOpcode::G_LSHR:
2748 case TargetOpcode::G_ASHR: {
2749 // For right shifts, we conservatively do not do the transform if the TRUNC
2750 // has any STORE users. The reason is that if we change the type of the
2751 // shift, we may break the truncstore combine.
2752 //
2753 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2754 for (auto &User : MRI.use_instructions(DstReg))
2755 if (User.getOpcode() == TargetOpcode::G_STORE)
2756 return false;
2757
2758 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2759 if (NewShiftTy == SrcTy)
2760 return false;
2761
2762 // Make sure we won't lose information by truncating the high bits.
2763 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2764 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2765 DstTy.getScalarSizeInBits()))
2766 return false;
2767 break;
2768 }
2769 }
2770
2772 {SrcMI->getOpcode(),
2773 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2774 return false;
2775
2776 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2777 return true;
2778}
2779
2781 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2782 MachineInstr *ShiftMI = MatchInfo.first;
2783 LLT NewShiftTy = MatchInfo.second;
2784
2785 Register Dst = MI.getOperand(0).getReg();
2786 LLT DstTy = MRI.getType(Dst);
2787
2788 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2789 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2790 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2791
2792 Register NewShift =
2793 Builder
2794 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2795 .getReg(0);
2796
2797 if (NewShiftTy == DstTy)
2798 replaceRegWith(MRI, Dst, NewShift);
2799 else
2800 Builder.buildTrunc(Dst, NewShift);
2801
2802 eraseInst(MI);
2803}
2804
2806 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2807 return MO.isReg() &&
2808 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2809 });
2810}
2811
2813 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2814 return !MO.isReg() ||
2815 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2816 });
2817}
2818
2820 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2821 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2822 return all_of(Mask, [](int Elt) { return Elt < 0; });
2823}
2824
2826 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2827 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2828 MRI);
2829}
2830
2832 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2833 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2834 MRI);
2835}
2836
2838 MachineInstr &MI) const {
2839 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2840 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2841 "Expected an insert/extract element op");
2842 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2843 if (VecTy.isScalableVector())
2844 return false;
2845
2846 unsigned IdxIdx =
2847 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2848 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2849 if (!Idx)
2850 return false;
2851 return Idx->getZExtValue() >= VecTy.getNumElements();
2852}
2853
2855 unsigned &OpIdx) const {
2856 GSelect &SelMI = cast<GSelect>(MI);
2857 auto Cst =
2858 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2859 if (!Cst)
2860 return false;
2861 OpIdx = Cst->isZero() ? 3 : 2;
2862 return true;
2863}
2864
2865void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2866
2868 const MachineOperand &MOP2) const {
2869 if (!MOP1.isReg() || !MOP2.isReg())
2870 return false;
2871 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2872 if (!InstAndDef1)
2873 return false;
2874 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2875 if (!InstAndDef2)
2876 return false;
2877 MachineInstr *I1 = InstAndDef1->MI;
2878 MachineInstr *I2 = InstAndDef2->MI;
2879
2880 // Handle a case like this:
2881 //
2882 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2883 //
2884 // Even though %0 and %1 are produced by the same instruction they are not
2885 // the same values.
2886 if (I1 == I2)
2887 return MOP1.getReg() == MOP2.getReg();
2888
2889 // If we have an instruction which loads or stores, we can't guarantee that
2890 // it is identical.
2891 //
2892 // For example, we may have
2893 //
2894 // %x1 = G_LOAD %addr (load N from @somewhere)
2895 // ...
2896 // call @foo
2897 // ...
2898 // %x2 = G_LOAD %addr (load N from @somewhere)
2899 // ...
2900 // %or = G_OR %x1, %x2
2901 //
2902 // It's possible that @foo will modify whatever lives at the address we're
2903 // loading from. To be safe, let's just assume that all loads and stores
2904 // are different (unless we have something which is guaranteed to not
2905 // change.)
2906 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2907 return false;
2908
2909 // If both instructions are loads or stores, they are equal only if both
2910 // are dereferenceable invariant loads with the same number of bits.
2911 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2914 if (!LS1 || !LS2)
2915 return false;
2916
2917 if (!I2->isDereferenceableInvariantLoad() ||
2918 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2919 return false;
2920 }
2921
2922 // Check for physical registers on the instructions first to avoid cases
2923 // like this:
2924 //
2925 // %a = COPY $physreg
2926 // ...
2927 // SOMETHING implicit-def $physreg
2928 // ...
2929 // %b = COPY $physreg
2930 //
2931 // These copies are not equivalent.
2932 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2933 return MO.isReg() && MO.getReg().isPhysical();
2934 })) {
2935 // Check if we have a case like this:
2936 //
2937 // %a = COPY $physreg
2938 // %b = COPY %a
2939 //
2940 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2941 // From that, we know that they must have the same value, since they must
2942 // have come from the same COPY.
2943 return I1->isIdenticalTo(*I2);
2944 }
2945
2946 // We don't have any physical registers, so we don't necessarily need the
2947 // same vreg defs.
2948 //
2949 // On the off-chance that there's some target instruction feeding into the
2950 // instruction, let's use produceSameValue instead of isIdenticalTo.
2951 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2952 // Handle instructions with multiple defs that produce same values. Values
2953 // are same for operands with same index.
2954 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2955 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2956 // I1 and I2 are different instructions but produce same values,
2957 // %1 and %6 are same, %1 and %7 are not the same value.
2958 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2959 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2960 }
2961 return false;
2962}
2963
2965 int64_t C) const {
2966 if (!MOP.isReg())
2967 return false;
2968 auto *MI = MRI.getVRegDef(MOP.getReg());
2969 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2970 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2971 MaybeCst->getSExtValue() == C;
2972}
2973
2975 double C) const {
2976 if (!MOP.isReg())
2977 return false;
2978 std::optional<FPValueAndVReg> MaybeCst;
2979 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2980 return false;
2981
2982 return MaybeCst->Value.isExactlyValue(C);
2983}
2984
2986 unsigned OpIdx) const {
2987 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2988 Register OldReg = MI.getOperand(0).getReg();
2989 Register Replacement = MI.getOperand(OpIdx).getReg();
2990 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2991 replaceRegWith(MRI, OldReg, Replacement);
2992 MI.eraseFromParent();
2993}
2994
2996 Register Replacement) const {
2997 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2998 Register OldReg = MI.getOperand(0).getReg();
2999 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3000 replaceRegWith(MRI, OldReg, Replacement);
3001 MI.eraseFromParent();
3002}
3003
3005 unsigned ConstIdx) const {
3006 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3007 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3008
3009 // Get the shift amount
3010 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3011 if (!VRegAndVal)
3012 return false;
3013
3014 // Return true of shift amount >= Bitwidth
3015 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3016}
3017
3019 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3020 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3021 "This is not a funnel shift operation");
3022
3023 Register ConstReg = MI.getOperand(3).getReg();
3024 LLT ConstTy = MRI.getType(ConstReg);
3025 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3026
3027 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3028 assert((VRegAndVal) && "Value is not a constant");
3029
3030 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3031 APInt NewConst = VRegAndVal->Value.urem(
3032 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3033
3034 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3035 Builder.buildInstr(
3036 MI.getOpcode(), {MI.getOperand(0)},
3037 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3038
3039 MI.eraseFromParent();
3040}
3041
3043 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3044 // Match (cond ? x : x)
3045 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3046 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3047 MRI);
3048}
3049
3051 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3052 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3053 MRI);
3054}
3055
3057 unsigned OpIdx) const {
3058 MachineOperand &MO = MI.getOperand(OpIdx);
3059 return MO.isReg() &&
3060 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3061}
3062
3064 unsigned OpIdx) const {
3065 MachineOperand &MO = MI.getOperand(OpIdx);
3066 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3067}
3068
3070 double C) const {
3071 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3072 Builder.buildFConstant(MI.getOperand(0), C);
3073 MI.eraseFromParent();
3074}
3075
3077 int64_t C) const {
3078 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3079 Builder.buildConstant(MI.getOperand(0), C);
3080 MI.eraseFromParent();
3081}
3082
3084 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3085 Builder.buildConstant(MI.getOperand(0), C);
3086 MI.eraseFromParent();
3087}
3088
3090 ConstantFP *CFP) const {
3091 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3092 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3093 MI.eraseFromParent();
3094}
3095
3097 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3098 Builder.buildUndef(MI.getOperand(0));
3099 MI.eraseFromParent();
3100}
3101
3103 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3104 Register LHS = MI.getOperand(1).getReg();
3105 Register RHS = MI.getOperand(2).getReg();
3106 Register &NewLHS = std::get<0>(MatchInfo);
3107 Register &NewRHS = std::get<1>(MatchInfo);
3108
3109 // Helper lambda to check for opportunities for
3110 // ((0-A) + B) -> B - A
3111 // (A + (0-B)) -> A - B
3112 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3113 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3114 return false;
3115 NewLHS = MaybeNewLHS;
3116 return true;
3117 };
3118
3119 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3120}
3121
3123 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3124 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3125 "Invalid opcode");
3126 Register DstReg = MI.getOperand(0).getReg();
3127 LLT DstTy = MRI.getType(DstReg);
3128 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3129
3130 if (DstTy.isScalableVector())
3131 return false;
3132
3133 unsigned NumElts = DstTy.getNumElements();
3134 // If this MI is part of a sequence of insert_vec_elts, then
3135 // don't do the combine in the middle of the sequence.
3136 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3137 TargetOpcode::G_INSERT_VECTOR_ELT)
3138 return false;
3139 MachineInstr *CurrInst = &MI;
3140 MachineInstr *TmpInst;
3141 int64_t IntImm;
3142 Register TmpReg;
3143 MatchInfo.resize(NumElts);
3144 while (mi_match(
3145 CurrInst->getOperand(0).getReg(), MRI,
3146 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3147 if (IntImm >= NumElts || IntImm < 0)
3148 return false;
3149 if (!MatchInfo[IntImm])
3150 MatchInfo[IntImm] = TmpReg;
3151 CurrInst = TmpInst;
3152 }
3153 // Variable index.
3154 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3155 return false;
3156 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3157 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3158 if (!MatchInfo[I - 1].isValid())
3159 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3160 }
3161 return true;
3162 }
3163 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3164 // overwritten, bail out.
3165 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3166 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3167}
3168
3170 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3171 Register UndefReg;
3172 auto GetUndef = [&]() {
3173 if (UndefReg)
3174 return UndefReg;
3175 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3176 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3177 return UndefReg;
3178 };
3179 for (Register &Reg : MatchInfo) {
3180 if (!Reg)
3181 Reg = GetUndef();
3182 }
3183 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3184 MI.eraseFromParent();
3185}
3186
3188 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3189 Register SubLHS, SubRHS;
3190 std::tie(SubLHS, SubRHS) = MatchInfo;
3191 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3192 MI.eraseFromParent();
3193}
3194
3195bool CombinerHelper::matchBinopWithNegInner(Register MInner, Register Other,
3196 unsigned RootOpc, Register Dst,
3197 LLT Ty,
3198 BuildFnTy &MatchInfo) const {
3199 /// Helper function for matchBinopWithNeg: tries to match one commuted form
3200 /// of `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`.
3201 MachineInstr *InnerDef = MRI.getVRegDef(MInner);
3202 if (!InnerDef)
3203 return false;
3204
3205 unsigned InnerOpc = InnerDef->getOpcode();
3206 if (InnerOpc != TargetOpcode::G_ADD && InnerOpc != TargetOpcode::G_SUB)
3207 return false;
3208
3209 if (!MRI.hasOneNonDBGUse(MInner))
3210 return false;
3211
3212 Register InnerLHS = InnerDef->getOperand(1).getReg();
3213 Register InnerRHS = InnerDef->getOperand(2).getReg();
3214 Register NotSrc;
3215 Register B, C;
3216
3217 // Check if either operand is ~b
3218 auto TryMatch = [&](Register MaybeNot, Register Other) {
3219 if (mi_match(MaybeNot, MRI, m_Not(m_Reg(NotSrc)))) {
3220 if (!MRI.hasOneNonDBGUse(MaybeNot))
3221 return false;
3222 B = NotSrc;
3223 C = Other;
3224 return true;
3225 }
3226 return false;
3227 };
3228
3229 if (!TryMatch(InnerLHS, InnerRHS) && !TryMatch(InnerRHS, InnerLHS))
3230 return false;
3231
3232 // Flip add/sub
3233 unsigned FlippedOpc = (InnerOpc == TargetOpcode::G_ADD) ? TargetOpcode::G_SUB
3234 : TargetOpcode::G_ADD;
3235
3236 Register A = Other;
3237 MatchInfo = [=](MachineIRBuilder &Builder) {
3238 auto NewInner = Builder.buildInstr(FlippedOpc, {Ty}, {B, C});
3239 auto NewNot = Builder.buildNot(Ty, NewInner);
3240 Builder.buildInstr(RootOpc, {Dst}, {A, NewNot});
3241 };
3242 return true;
3243}
3244
3246 BuildFnTy &MatchInfo) const {
3247 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
3248 // Root MI is one of G_AND, G_OR, G_XOR.
3249 // We also look for commuted forms of operations. Pattern shouldn't apply
3250 // if there are multiple reasons of inner operations.
3251
3252 unsigned RootOpc = MI.getOpcode();
3253 Register Dst = MI.getOperand(0).getReg();
3254 LLT Ty = MRI.getType(Dst);
3255
3256 Register LHS = MI.getOperand(1).getReg();
3257 Register RHS = MI.getOperand(2).getReg();
3258 // Check the commuted and uncommuted forms of the operation.
3259 return matchBinopWithNegInner(LHS, RHS, RootOpc, Dst, Ty, MatchInfo) ||
3260 matchBinopWithNegInner(RHS, LHS, RootOpc, Dst, Ty, MatchInfo);
3261}
3262
3264 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3265 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3266 //
3267 // Creates the new hand + logic instruction (but does not insert them.)
3268 //
3269 // On success, MatchInfo is populated with the new instructions. These are
3270 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3271 unsigned LogicOpcode = MI.getOpcode();
3272 assert(LogicOpcode == TargetOpcode::G_AND ||
3273 LogicOpcode == TargetOpcode::G_OR ||
3274 LogicOpcode == TargetOpcode::G_XOR);
3275 MachineIRBuilder MIB(MI);
3276 Register Dst = MI.getOperand(0).getReg();
3277 Register LHSReg = MI.getOperand(1).getReg();
3278 Register RHSReg = MI.getOperand(2).getReg();
3279
3280 // Don't recompute anything.
3281 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3282 return false;
3283
3284 // Make sure we have (hand x, ...), (hand y, ...)
3285 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3286 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3287 if (!LeftHandInst || !RightHandInst)
3288 return false;
3289 unsigned HandOpcode = LeftHandInst->getOpcode();
3290 if (HandOpcode != RightHandInst->getOpcode())
3291 return false;
3292 if (LeftHandInst->getNumOperands() < 2 ||
3293 !LeftHandInst->getOperand(1).isReg() ||
3294 RightHandInst->getNumOperands() < 2 ||
3295 !RightHandInst->getOperand(1).isReg())
3296 return false;
3297
3298 // Make sure the types match up, and if we're doing this post-legalization,
3299 // we end up with legal types.
3300 Register X = LeftHandInst->getOperand(1).getReg();
3301 Register Y = RightHandInst->getOperand(1).getReg();
3302 LLT XTy = MRI.getType(X);
3303 LLT YTy = MRI.getType(Y);
3304 if (!XTy.isValid() || XTy != YTy)
3305 return false;
3306
3307 // Optional extra source register.
3308 Register ExtraHandOpSrcReg;
3309 switch (HandOpcode) {
3310 default:
3311 return false;
3312 case TargetOpcode::G_ANYEXT:
3313 case TargetOpcode::G_SEXT:
3314 case TargetOpcode::G_ZEXT: {
3315 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3316 break;
3317 }
3318 case TargetOpcode::G_TRUNC: {
3319 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3320 const MachineFunction *MF = MI.getMF();
3321 LLVMContext &Ctx = MF->getFunction().getContext();
3322
3323 LLT DstTy = MRI.getType(Dst);
3324 const TargetLowering &TLI = getTargetLowering();
3325
3326 // Be extra careful sinking truncate. If it's free, there's no benefit in
3327 // widening a binop.
3328 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3329 return false;
3330 break;
3331 }
3332 case TargetOpcode::G_AND:
3333 case TargetOpcode::G_ASHR:
3334 case TargetOpcode::G_LSHR:
3335 case TargetOpcode::G_SHL: {
3336 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3337 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3338 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3339 return false;
3340 ExtraHandOpSrcReg = ZOp.getReg();
3341 break;
3342 }
3343 }
3344
3345 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3346 return false;
3347
3348 // Record the steps to build the new instructions.
3349 //
3350 // Steps to build (logic x, y)
3351 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3352 OperandBuildSteps LogicBuildSteps = {
3353 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3354 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3355 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3356 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3357
3358 // Steps to build hand (logic x, y), ...z
3359 OperandBuildSteps HandBuildSteps = {
3360 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3361 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3362 if (ExtraHandOpSrcReg.isValid())
3363 HandBuildSteps.push_back(
3364 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3365 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3366
3367 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3368 return true;
3369}
3370
3372 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3373 assert(MatchInfo.InstrsToBuild.size() &&
3374 "Expected at least one instr to build?");
3375 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3376 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3377 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3378 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3379 for (auto &OperandFn : InstrToBuild.OperandFns)
3380 OperandFn(Instr);
3381 }
3382 MI.eraseFromParent();
3383}
3384
3386 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3387 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3388 int64_t ShlCst, AshrCst;
3389 Register Src;
3390 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3391 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3392 m_ICstOrSplat(AshrCst))))
3393 return false;
3394 if (ShlCst != AshrCst)
3395 return false;
3397 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3398 return false;
3399 MatchInfo = std::make_tuple(Src, ShlCst);
3400 return true;
3401}
3402
3404 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3405 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3406 Register Src;
3407 int64_t ShiftAmt;
3408 std::tie(Src, ShiftAmt) = MatchInfo;
3409 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3410 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3411 MI.eraseFromParent();
3412}
3413
3414/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3417 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3418 assert(MI.getOpcode() == TargetOpcode::G_AND);
3419
3420 Register Dst = MI.getOperand(0).getReg();
3421 LLT Ty = MRI.getType(Dst);
3422
3423 Register R;
3424 int64_t C1;
3425 int64_t C2;
3426 if (!mi_match(
3427 Dst, MRI,
3428 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3429 return false;
3430
3431 MatchInfo = [=](MachineIRBuilder &B) {
3432 if (C1 & C2) {
3433 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3434 return;
3435 }
3436 auto Zero = B.buildConstant(Ty, 0);
3437 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3438 };
3439 return true;
3440}
3441
3443 Register &Replacement) const {
3444 // Given
3445 //
3446 // %y:_(sN) = G_SOMETHING
3447 // %x:_(sN) = G_SOMETHING
3448 // %res:_(sN) = G_AND %x, %y
3449 //
3450 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3451 //
3452 // Patterns like this can appear as a result of legalization. E.g.
3453 //
3454 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3455 // %one:_(s32) = G_CONSTANT i32 1
3456 // %and:_(s32) = G_AND %cmp, %one
3457 //
3458 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3459 assert(MI.getOpcode() == TargetOpcode::G_AND);
3460 if (!VT)
3461 return false;
3462
3463 Register AndDst = MI.getOperand(0).getReg();
3464 Register LHS = MI.getOperand(1).getReg();
3465 Register RHS = MI.getOperand(2).getReg();
3466
3467 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3468 // we can't do anything. If we do, then it depends on whether we have
3469 // KnownBits on the LHS.
3470 KnownBits RHSBits = VT->getKnownBits(RHS);
3471 if (RHSBits.isUnknown())
3472 return false;
3473
3474 KnownBits LHSBits = VT->getKnownBits(LHS);
3475
3476 // Check that x & Mask == x.
3477 // x & 1 == x, always
3478 // x & 0 == x, only if x is also 0
3479 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3480 //
3481 // Check if we can replace AndDst with the LHS of the G_AND
3482 if (canReplaceReg(AndDst, LHS, MRI) &&
3483 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3484 Replacement = LHS;
3485 return true;
3486 }
3487
3488 // Check if we can replace AndDst with the RHS of the G_AND
3489 if (canReplaceReg(AndDst, RHS, MRI) &&
3490 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3491 Replacement = RHS;
3492 return true;
3493 }
3494
3495 return false;
3496}
3497
3499 Register &Replacement) const {
3500 // Given
3501 //
3502 // %y:_(sN) = G_SOMETHING
3503 // %x:_(sN) = G_SOMETHING
3504 // %res:_(sN) = G_OR %x, %y
3505 //
3506 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3507 assert(MI.getOpcode() == TargetOpcode::G_OR);
3508 if (!VT)
3509 return false;
3510
3511 Register OrDst = MI.getOperand(0).getReg();
3512 Register LHS = MI.getOperand(1).getReg();
3513 Register RHS = MI.getOperand(2).getReg();
3514
3515 KnownBits LHSBits = VT->getKnownBits(LHS);
3516 KnownBits RHSBits = VT->getKnownBits(RHS);
3517
3518 // Check that x | Mask == x.
3519 // x | 0 == x, always
3520 // x | 1 == x, only if x is also 1
3521 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3522 //
3523 // Check if we can replace OrDst with the LHS of the G_OR
3524 if (canReplaceReg(OrDst, LHS, MRI) &&
3525 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3526 Replacement = LHS;
3527 return true;
3528 }
3529
3530 // Check if we can replace OrDst with the RHS of the G_OR
3531 if (canReplaceReg(OrDst, RHS, MRI) &&
3532 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3533 Replacement = RHS;
3534 return true;
3535 }
3536
3537 return false;
3538}
3539
3541 // If the input is already sign extended, just drop the extension.
3542 Register Src = MI.getOperand(1).getReg();
3543 unsigned ExtBits = MI.getOperand(2).getImm();
3544 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3545 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3546}
3547
3548static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3549 int64_t Cst, bool IsVector, bool IsFP) {
3550 // For i1, Cst will always be -1 regardless of boolean contents.
3551 return (ScalarSizeBits == 1 && Cst == -1) ||
3552 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3553}
3554
3555// This pattern aims to match the following shape to avoid extra mov
3556// instructions
3557// G_BUILD_VECTOR(
3558// G_UNMERGE_VALUES(src, 0)
3559// G_UNMERGE_VALUES(src, 1)
3560// G_IMPLICIT_DEF
3561// G_IMPLICIT_DEF
3562// )
3563// ->
3564// G_CONCAT_VECTORS(
3565// src,
3566// undef
3567// )
3570 Register &UnmergeSrc) const {
3571 auto &BV = cast<GBuildVector>(MI);
3572
3573 unsigned BuildUseCount = BV.getNumSources();
3574 if (BuildUseCount % 2 != 0)
3575 return false;
3576
3577 unsigned NumUnmerge = BuildUseCount / 2;
3578
3579 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3580
3581 // Check the first operand is an unmerge and has the correct number of
3582 // operands
3583 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3584 return false;
3585
3586 UnmergeSrc = Unmerge->getSourceReg();
3587
3588 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3589 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3590
3591 if (!UnmergeSrcTy.isVector())
3592 return false;
3593
3594 // Ensure we only generate legal instructions post-legalizer
3595 if (!IsPreLegalize &&
3596 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3597 return false;
3598
3599 // Check that all of the operands before the midpoint come from the same
3600 // unmerge and are in the same order as they are used in the build_vector
3601 for (unsigned I = 0; I < NumUnmerge; ++I) {
3602 auto MaybeUnmergeReg = BV.getSourceReg(I);
3603 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3604
3605 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3606 return false;
3607
3608 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3609 return false;
3610 }
3611
3612 // Check that all of the unmerged values are used
3613 if (Unmerge->getNumDefs() != NumUnmerge)
3614 return false;
3615
3616 // Check that all of the operands after the mid point are undefs.
3617 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3618 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3619
3620 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3621 return false;
3622 }
3623
3624 return true;
3625}
3626
3630 Register &UnmergeSrc) const {
3631 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3632 B.setInstrAndDebugLoc(MI);
3633
3634 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3635 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3636
3637 MI.eraseFromParent();
3638}
3639
3640// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3641// using vector truncates instead
3642//
3643// EXAMPLE:
3644// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3645// %T_a(i16) = G_TRUNC %a(i32)
3646// %T_b(i16) = G_TRUNC %b(i32)
3647// %Undef(i16) = G_IMPLICIT_DEF(i16)
3648// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3649//
3650// ===>
3651// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3652// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3653// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3654//
3655// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3657 Register &MatchInfo) const {
3658 auto BuildMI = cast<GBuildVector>(&MI);
3659 unsigned NumOperands = BuildMI->getNumSources();
3660 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3661
3662 // Check the G_BUILD_VECTOR sources
3663 unsigned I;
3664 MachineInstr *UnmergeMI = nullptr;
3665
3666 // Check all source TRUNCs come from the same UNMERGE instruction
3667 // and that the element order matches (BUILD_VECTOR position I
3668 // corresponds to UNMERGE result I)
3669 for (I = 0; I < NumOperands; ++I) {
3670 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3671 auto SrcMIOpc = SrcMI->getOpcode();
3672
3673 // Check if the G_TRUNC instructions all come from the same MI
3674 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3675 Register TruncSrcReg = SrcMI->getOperand(1).getReg();
3676 if (!UnmergeMI) {
3677 UnmergeMI = MRI.getVRegDef(TruncSrcReg);
3678 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3679 return false;
3680 } else {
3681 auto UnmergeSrcMI = MRI.getVRegDef(TruncSrcReg);
3682 if (UnmergeMI != UnmergeSrcMI)
3683 return false;
3684 }
3685 // Verify element ordering: BUILD_VECTOR position I must use
3686 // UNMERGE result I, otherwise the fold would lose element reordering
3687 if (UnmergeMI->getOperand(I).getReg() != TruncSrcReg)
3688 return false;
3689 } else {
3690 break;
3691 }
3692 }
3693 if (I < 2)
3694 return false;
3695
3696 // Check the remaining source elements are only G_IMPLICIT_DEF
3697 for (; I < NumOperands; ++I) {
3698 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3699 auto SrcMIOpc = SrcMI->getOpcode();
3700
3701 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3702 return false;
3703 }
3704
3705 // Check the size of unmerge source
3706 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3707 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3708 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3709 return false;
3710
3711 // Check the unmerge source and destination element types match
3712 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3713 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3714 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3715 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3716 return false;
3717
3718 // Only generate legal instructions post-legalizer
3719 if (!IsPreLegalize) {
3720 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3721
3722 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3723 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3724 return false;
3725
3726 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3727 return false;
3728 }
3729
3730 return true;
3731}
3732
3734 Register &MatchInfo) const {
3735 Register MidReg;
3736 auto BuildMI = cast<GBuildVector>(&MI);
3737 Register DstReg = BuildMI->getReg(0);
3738 LLT DstTy = MRI.getType(DstReg);
3739 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3740 unsigned DstTyNumElt = DstTy.getNumElements();
3741 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3742
3743 // No need to pad vector if only G_TRUNC is needed
3744 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3745 MidReg = MatchInfo;
3746 } else {
3747 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3748 SmallVector<Register> ConcatRegs = {MatchInfo};
3749 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3750 ConcatRegs.push_back(UndefReg);
3751
3752 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3753 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3754 }
3755
3756 Builder.buildTrunc(DstReg, MidReg);
3757 MI.eraseFromParent();
3758}
3759
3761 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3762 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3763 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3764 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3765 Register XorSrc;
3766 Register CstReg;
3767 // We match xor(src, true) here.
3768 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3769 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3770 return false;
3771
3772 if (!MRI.hasOneNonDBGUse(XorSrc))
3773 return false;
3774
3775 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3776 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3777 // list of tree nodes to visit.
3778 RegsToNegate.push_back(XorSrc);
3779 // Remember whether the comparisons are all integer or all floating point.
3780 bool IsInt = false;
3781 bool IsFP = false;
3782 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3783 Register Reg = RegsToNegate[I];
3784 if (!MRI.hasOneNonDBGUse(Reg))
3785 return false;
3786 MachineInstr *Def = MRI.getVRegDef(Reg);
3787 switch (Def->getOpcode()) {
3788 default:
3789 // Don't match if the tree contains anything other than ANDs, ORs and
3790 // comparisons.
3791 return false;
3792 case TargetOpcode::G_ICMP:
3793 if (IsFP)
3794 return false;
3795 IsInt = true;
3796 // When we apply the combine we will invert the predicate.
3797 break;
3798 case TargetOpcode::G_FCMP:
3799 if (IsInt)
3800 return false;
3801 IsFP = true;
3802 // When we apply the combine we will invert the predicate.
3803 break;
3804 case TargetOpcode::G_AND:
3805 case TargetOpcode::G_OR:
3806 // Implement De Morgan's laws:
3807 // ~(x & y) -> ~x | ~y
3808 // ~(x | y) -> ~x & ~y
3809 // When we apply the combine we will change the opcode and recursively
3810 // negate the operands.
3811 RegsToNegate.push_back(Def->getOperand(1).getReg());
3812 RegsToNegate.push_back(Def->getOperand(2).getReg());
3813 break;
3814 }
3815 }
3816
3817 // Now we know whether the comparisons are integer or floating point, check
3818 // the constant in the xor.
3819 int64_t Cst;
3820 if (Ty.isVector()) {
3821 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3822 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3823 if (!MaybeCst)
3824 return false;
3825 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3826 return false;
3827 } else {
3828 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3829 return false;
3830 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3831 return false;
3832 }
3833
3834 return true;
3835}
3836
3838 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3839 for (Register Reg : RegsToNegate) {
3840 MachineInstr *Def = MRI.getVRegDef(Reg);
3841 Observer.changingInstr(*Def);
3842 // For each comparison, invert the opcode. For each AND and OR, change the
3843 // opcode.
3844 switch (Def->getOpcode()) {
3845 default:
3846 llvm_unreachable("Unexpected opcode");
3847 case TargetOpcode::G_ICMP:
3848 case TargetOpcode::G_FCMP: {
3849 MachineOperand &PredOp = Def->getOperand(1);
3852 PredOp.setPredicate(NewP);
3853 break;
3854 }
3855 case TargetOpcode::G_AND:
3856 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3857 break;
3858 case TargetOpcode::G_OR:
3859 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3860 break;
3861 }
3862 Observer.changedInstr(*Def);
3863 }
3864
3865 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3866 MI.eraseFromParent();
3867}
3868
3870 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3871 // Match (xor (and x, y), y) (or any of its commuted cases)
3872 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3873 Register &X = MatchInfo.first;
3874 Register &Y = MatchInfo.second;
3875 Register AndReg = MI.getOperand(1).getReg();
3876 Register SharedReg = MI.getOperand(2).getReg();
3877
3878 // Find a G_AND on either side of the G_XOR.
3879 // Look for one of
3880 //
3881 // (xor (and x, y), SharedReg)
3882 // (xor SharedReg, (and x, y))
3883 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3884 std::swap(AndReg, SharedReg);
3885 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3886 return false;
3887 }
3888
3889 // Only do this if we'll eliminate the G_AND.
3890 if (!MRI.hasOneNonDBGUse(AndReg))
3891 return false;
3892
3893 // We can combine if SharedReg is the same as either the LHS or RHS of the
3894 // G_AND.
3895 if (Y != SharedReg)
3896 std::swap(X, Y);
3897 return Y == SharedReg;
3898}
3899
3901 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3902 // Fold (xor (and x, y), y) -> (and (not x), y)
3903 Register X, Y;
3904 std::tie(X, Y) = MatchInfo;
3905 auto Not = Builder.buildNot(MRI.getType(X), X);
3906 Observer.changingInstr(MI);
3907 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3908 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3909 MI.getOperand(2).setReg(Y);
3910 Observer.changedInstr(MI);
3911}
3912
3914 auto &PtrAdd = cast<GPtrAdd>(MI);
3915 Register DstReg = PtrAdd.getReg(0);
3916 LLT Ty = MRI.getType(DstReg);
3917 const DataLayout &DL = Builder.getMF().getDataLayout();
3918
3919 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3920 return false;
3921
3922 if (Ty.isPointer()) {
3923 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3924 return ConstVal && *ConstVal == 0;
3925 }
3926
3927 assert(Ty.isVector() && "Expecting a vector type");
3928 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3929 return isBuildVectorAllZeros(*VecMI, MRI);
3930}
3931
3933 auto &PtrAdd = cast<GPtrAdd>(MI);
3934 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3935 PtrAdd.eraseFromParent();
3936}
3937
3938/// The second source operand is known to be a power of 2.
3940 Register DstReg = MI.getOperand(0).getReg();
3941 Register Src0 = MI.getOperand(1).getReg();
3942 Register Pow2Src1 = MI.getOperand(2).getReg();
3943 LLT Ty = MRI.getType(DstReg);
3944
3945 // Fold (urem x, pow2) -> (and x, pow2-1)
3946 auto NegOne = Builder.buildConstant(Ty, -1);
3947 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3948 Builder.buildAnd(DstReg, Src0, Add);
3949 MI.eraseFromParent();
3950}
3951
3953 unsigned &SelectOpNo) const {
3954 Register LHS = MI.getOperand(1).getReg();
3955 Register RHS = MI.getOperand(2).getReg();
3956
3957 Register OtherOperandReg = RHS;
3958 SelectOpNo = 1;
3959 MachineInstr *Select = MRI.getVRegDef(LHS);
3960
3961 // Don't do this unless the old select is going away. We want to eliminate the
3962 // binary operator, not replace a binop with a select.
3963 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3964 !MRI.hasOneNonDBGUse(LHS)) {
3965 OtherOperandReg = LHS;
3966 SelectOpNo = 2;
3967 Select = MRI.getVRegDef(RHS);
3968 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3969 !MRI.hasOneNonDBGUse(RHS))
3970 return false;
3971 }
3972
3973 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3974 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3975
3976 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3977 /*AllowFP*/ true,
3978 /*AllowOpaqueConstants*/ false))
3979 return false;
3980 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3981 /*AllowFP*/ true,
3982 /*AllowOpaqueConstants*/ false))
3983 return false;
3984
3985 unsigned BinOpcode = MI.getOpcode();
3986
3987 // We know that one of the operands is a select of constants. Now verify that
3988 // the other binary operator operand is either a constant, or we can handle a
3989 // variable.
3990 bool CanFoldNonConst =
3991 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3992 (isNullOrNullSplat(*SelectLHS, MRI) ||
3993 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3994 (isNullOrNullSplat(*SelectRHS, MRI) ||
3995 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3996 if (CanFoldNonConst)
3997 return true;
3998
3999 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
4000 /*AllowFP*/ true,
4001 /*AllowOpaqueConstants*/ false);
4002}
4003
4004/// \p SelectOperand is the operand in binary operator \p MI that is the select
4005/// to fold.
4007 MachineInstr &MI, const unsigned &SelectOperand) const {
4008 Register Dst = MI.getOperand(0).getReg();
4009 Register LHS = MI.getOperand(1).getReg();
4010 Register RHS = MI.getOperand(2).getReg();
4011 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
4012
4013 Register SelectCond = Select->getOperand(1).getReg();
4014 Register SelectTrue = Select->getOperand(2).getReg();
4015 Register SelectFalse = Select->getOperand(3).getReg();
4016
4017 LLT Ty = MRI.getType(Dst);
4018 unsigned BinOpcode = MI.getOpcode();
4019
4020 Register FoldTrue, FoldFalse;
4021
4022 // We have a select-of-constants followed by a binary operator with a
4023 // constant. Eliminate the binop by pulling the constant math into the select.
4024 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
4025 if (SelectOperand == 1) {
4026 // TODO: SelectionDAG verifies this actually constant folds before
4027 // committing to the combine.
4028
4029 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
4030 FoldFalse =
4031 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
4032 } else {
4033 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
4034 FoldFalse =
4035 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
4036 }
4037
4038 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
4039 MI.eraseFromParent();
4040}
4041
4042std::optional<SmallVector<Register, 8>>
4043CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
4044 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
4045 // We want to detect if Root is part of a tree which represents a bunch
4046 // of loads being merged into a larger load. We'll try to recognize patterns
4047 // like, for example:
4048 //
4049 // Reg Reg
4050 // \ /
4051 // OR_1 Reg
4052 // \ /
4053 // OR_2
4054 // \ Reg
4055 // .. /
4056 // Root
4057 //
4058 // Reg Reg Reg Reg
4059 // \ / \ /
4060 // OR_1 OR_2
4061 // \ /
4062 // \ /
4063 // ...
4064 // Root
4065 //
4066 // Each "Reg" may have been produced by a load + some arithmetic. This
4067 // function will save each of them.
4068 SmallVector<Register, 8> RegsToVisit;
4070
4071 // In the "worst" case, we're dealing with a load for each byte. So, there
4072 // are at most #bytes - 1 ORs.
4073 const unsigned MaxIter =
4074 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4075 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4076 if (Ors.empty())
4077 break;
4078 const MachineInstr *Curr = Ors.pop_back_val();
4079 Register OrLHS = Curr->getOperand(1).getReg();
4080 Register OrRHS = Curr->getOperand(2).getReg();
4081
4082 // In the combine, we want to elimate the entire tree.
4083 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4084 return std::nullopt;
4085
4086 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4087 // something that may be a load + arithmetic.
4088 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4089 Ors.push_back(Or);
4090 else
4091 RegsToVisit.push_back(OrLHS);
4092 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4093 Ors.push_back(Or);
4094 else
4095 RegsToVisit.push_back(OrRHS);
4096 }
4097
4098 // We're going to try and merge each register into a wider power-of-2 type,
4099 // so we ought to have an even number of registers.
4100 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4101 return std::nullopt;
4102 return RegsToVisit;
4103}
4104
4105/// Helper function for findLoadOffsetsForLoadOrCombine.
4106///
4107/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4108/// and then moving that value into a specific byte offset.
4109///
4110/// e.g. x[i] << 24
4111///
4112/// \returns The load instruction and the byte offset it is moved into.
4113static std::optional<std::pair<GZExtLoad *, int64_t>>
4114matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4115 const MachineRegisterInfo &MRI) {
4116 assert(MRI.hasOneNonDBGUse(Reg) &&
4117 "Expected Reg to only have one non-debug use?");
4118 Register MaybeLoad;
4119 int64_t Shift;
4120 if (!mi_match(Reg, MRI,
4121 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4122 Shift = 0;
4123 MaybeLoad = Reg;
4124 }
4125
4126 if (Shift % MemSizeInBits != 0)
4127 return std::nullopt;
4128
4129 // TODO: Handle other types of loads.
4130 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4131 if (!Load)
4132 return std::nullopt;
4133
4134 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4135 return std::nullopt;
4136
4137 return std::make_pair(Load, Shift / MemSizeInBits);
4138}
4139
4140std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4141CombinerHelper::findLoadOffsetsForLoadOrCombine(
4143 const SmallVector<Register, 8> &RegsToVisit,
4144 const unsigned MemSizeInBits) const {
4145
4146 // Each load found for the pattern. There should be one for each RegsToVisit.
4147 SmallSetVector<const MachineInstr *, 8> Loads;
4148
4149 // The lowest index used in any load. (The lowest "i" for each x[i].)
4150 int64_t LowestIdx = INT64_MAX;
4151
4152 // The load which uses the lowest index.
4153 GZExtLoad *LowestIdxLoad = nullptr;
4154
4155 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4156 SmallSet<int64_t, 8> SeenIdx;
4157
4158 // Ensure each load is in the same MBB.
4159 // TODO: Support multiple MachineBasicBlocks.
4160 MachineBasicBlock *MBB = nullptr;
4161 const MachineMemOperand *MMO = nullptr;
4162
4163 // Earliest instruction-order load in the pattern.
4164 GZExtLoad *EarliestLoad = nullptr;
4165
4166 // Latest instruction-order load in the pattern.
4167 GZExtLoad *LatestLoad = nullptr;
4168
4169 // Base pointer which every load should share.
4171
4172 // We want to find a load for each register. Each load should have some
4173 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4174 // track of the load which uses the lowest index. Later, we will check if we
4175 // can use its pointer in the final, combined load.
4176 for (auto Reg : RegsToVisit) {
4177 // Find the load, and find the position that it will end up in (e.g. a
4178 // shifted) value.
4179 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4180 if (!LoadAndPos)
4181 return std::nullopt;
4182 GZExtLoad *Load;
4183 int64_t DstPos;
4184 std::tie(Load, DstPos) = *LoadAndPos;
4185
4186 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4187 // it is difficult to check for stores/calls/etc between loads.
4188 MachineBasicBlock *LoadMBB = Load->getParent();
4189 if (!MBB)
4190 MBB = LoadMBB;
4191 if (LoadMBB != MBB)
4192 return std::nullopt;
4193
4194 // Make sure that the MachineMemOperands of every seen load are compatible.
4195 auto &LoadMMO = Load->getMMO();
4196 if (!MMO)
4197 MMO = &LoadMMO;
4198 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4199 return std::nullopt;
4200
4201 // Find out what the base pointer and index for the load is.
4202 Register LoadPtr;
4203 int64_t Idx;
4204 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4205 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4206 LoadPtr = Load->getOperand(1).getReg();
4207 Idx = 0;
4208 }
4209
4210 // Don't combine things like a[i], a[i] -> a bigger load.
4211 if (!SeenIdx.insert(Idx).second)
4212 return std::nullopt;
4213
4214 // Every load must share the same base pointer; don't combine things like:
4215 //
4216 // a[i], b[i + 1] -> a bigger load.
4217 if (!BasePtr.isValid())
4218 BasePtr = LoadPtr;
4219 if (BasePtr != LoadPtr)
4220 return std::nullopt;
4221
4222 if (Idx < LowestIdx) {
4223 LowestIdx = Idx;
4224 LowestIdxLoad = Load;
4225 }
4226
4227 // Keep track of the byte offset that this load ends up at. If we have seen
4228 // the byte offset, then stop here. We do not want to combine:
4229 //
4230 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4231 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4232 return std::nullopt;
4233 Loads.insert(Load);
4234
4235 // Keep track of the position of the earliest/latest loads in the pattern.
4236 // We will check that there are no load fold barriers between them later
4237 // on.
4238 //
4239 // FIXME: Is there a better way to check for load fold barriers?
4240 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4241 EarliestLoad = Load;
4242 if (!LatestLoad || dominates(*LatestLoad, *Load))
4243 LatestLoad = Load;
4244 }
4245
4246 // We found a load for each register. Let's check if each load satisfies the
4247 // pattern.
4248 assert(Loads.size() == RegsToVisit.size() &&
4249 "Expected to find a load for each register?");
4250 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4251 LatestLoad && "Expected at least two loads?");
4252
4253 // Check if there are any stores, calls, etc. between any of the loads. If
4254 // there are, then we can't safely perform the combine.
4255 //
4256 // MaxIter is chosen based off the (worst case) number of iterations it
4257 // typically takes to succeed in the LLVM test suite plus some padding.
4258 //
4259 // FIXME: Is there a better way to check for load fold barriers?
4260 const unsigned MaxIter = 20;
4261 unsigned Iter = 0;
4262 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4263 LatestLoad->getIterator())) {
4264 if (Loads.count(&MI))
4265 continue;
4266 if (MI.isLoadFoldBarrier())
4267 return std::nullopt;
4268 if (Iter++ == MaxIter)
4269 return std::nullopt;
4270 }
4271
4272 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4273}
4274
4277 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4278 assert(MI.getOpcode() == TargetOpcode::G_OR);
4279 MachineFunction &MF = *MI.getMF();
4280 // Assuming a little-endian target, transform:
4281 // s8 *a = ...
4282 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4283 // =>
4284 // s32 val = *((i32)a)
4285 //
4286 // s8 *a = ...
4287 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4288 // =>
4289 // s32 val = BSWAP(*((s32)a))
4290 Register Dst = MI.getOperand(0).getReg();
4291 LLT Ty = MRI.getType(Dst);
4292 if (Ty.isVector())
4293 return false;
4294
4295 // We need to combine at least two loads into this type. Since the smallest
4296 // possible load is into a byte, we need at least a 16-bit wide type.
4297 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4298 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4299 return false;
4300
4301 // Match a collection of non-OR instructions in the pattern.
4302 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4303 if (!RegsToVisit)
4304 return false;
4305
4306 // We have a collection of non-OR instructions. Figure out how wide each of
4307 // the small loads should be based off of the number of potential loads we
4308 // found.
4309 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4310 if (NarrowMemSizeInBits % 8 != 0)
4311 return false;
4312
4313 // Check if each register feeding into each OR is a load from the same
4314 // base pointer + some arithmetic.
4315 //
4316 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4317 //
4318 // Also verify that each of these ends up putting a[i] into the same memory
4319 // offset as a load into a wide type would.
4321 GZExtLoad *LowestIdxLoad, *LatestLoad;
4322 int64_t LowestIdx;
4323 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4324 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4325 if (!MaybeLoadInfo)
4326 return false;
4327 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4328
4329 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4330 // we found before, check if this corresponds to a big or little endian byte
4331 // pattern. If it does, then we can represent it using a load + possibly a
4332 // BSWAP.
4333 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4334 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4335 if (!IsBigEndian)
4336 return false;
4337 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4338 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4339 return false;
4340
4341 // Make sure that the load from the lowest index produces offset 0 in the
4342 // final value.
4343 //
4344 // This ensures that we won't combine something like this:
4345 //
4346 // load x[i] -> byte 2
4347 // load x[i+1] -> byte 0 ---> wide_load x[i]
4348 // load x[i+2] -> byte 1
4349 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4350 const unsigned ZeroByteOffset =
4351 *IsBigEndian
4352 ? bigEndianByteAt(NumLoadsInTy, 0)
4353 : littleEndianByteAt(NumLoadsInTy, 0);
4354 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4355 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4356 ZeroOffsetIdx->second != LowestIdx)
4357 return false;
4358
4359 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4360 // may not use index 0.
4361 Register Ptr = LowestIdxLoad->getPointerReg();
4362 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4363 LegalityQuery::MemDesc MMDesc(MMO);
4364 MMDesc.MemoryTy = Ty;
4366 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4367 return false;
4368 auto PtrInfo = MMO.getPointerInfo();
4369 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4370
4371 // Load must be allowed and fast on the target.
4373 auto &DL = MF.getDataLayout();
4374 unsigned Fast = 0;
4375 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4376 !Fast)
4377 return false;
4378
4379 MatchInfo = [=](MachineIRBuilder &MIB) {
4380 MIB.setInstrAndDebugLoc(*LatestLoad);
4381 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4382 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4383 if (NeedsBSwap)
4384 MIB.buildBSwap(Dst, LoadDst);
4385 };
4386 return true;
4387}
4388
4390 MachineInstr *&ExtMI) const {
4391 auto &PHI = cast<GPhi>(MI);
4392 Register DstReg = PHI.getReg(0);
4393
4394 // TODO: Extending a vector may be expensive, don't do this until heuristics
4395 // are better.
4396 if (MRI.getType(DstReg).isVector())
4397 return false;
4398
4399 // Try to match a phi, whose only use is an extend.
4400 if (!MRI.hasOneNonDBGUse(DstReg))
4401 return false;
4402 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4403 switch (ExtMI->getOpcode()) {
4404 case TargetOpcode::G_ANYEXT:
4405 return true; // G_ANYEXT is usually free.
4406 case TargetOpcode::G_ZEXT:
4407 case TargetOpcode::G_SEXT:
4408 break;
4409 default:
4410 return false;
4411 }
4412
4413 // If the target is likely to fold this extend away, don't propagate.
4414 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4415 return false;
4416
4417 // We don't want to propagate the extends unless there's a good chance that
4418 // they'll be optimized in some way.
4419 // Collect the unique incoming values.
4421 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4422 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4423 switch (DefMI->getOpcode()) {
4424 case TargetOpcode::G_LOAD:
4425 case TargetOpcode::G_TRUNC:
4426 case TargetOpcode::G_SEXT:
4427 case TargetOpcode::G_ZEXT:
4428 case TargetOpcode::G_ANYEXT:
4429 case TargetOpcode::G_CONSTANT:
4430 InSrcs.insert(DefMI);
4431 // Don't try to propagate if there are too many places to create new
4432 // extends, chances are it'll increase code size.
4433 if (InSrcs.size() > 2)
4434 return false;
4435 break;
4436 default:
4437 return false;
4438 }
4439 }
4440 return true;
4441}
4442
4444 MachineInstr *&ExtMI) const {
4445 auto &PHI = cast<GPhi>(MI);
4446 Register DstReg = ExtMI->getOperand(0).getReg();
4447 LLT ExtTy = MRI.getType(DstReg);
4448
4449 // Propagate the extension into the block of each incoming reg's block.
4450 // Use a SetVector here because PHIs can have duplicate edges, and we want
4451 // deterministic iteration order.
4454 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4455 auto SrcReg = PHI.getIncomingValue(I);
4456 auto *SrcMI = MRI.getVRegDef(SrcReg);
4457 if (!SrcMIs.insert(SrcMI))
4458 continue;
4459
4460 // Build an extend after each src inst.
4461 auto *MBB = SrcMI->getParent();
4462 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4463 if (InsertPt != MBB->end() && InsertPt->isPHI())
4464 InsertPt = MBB->getFirstNonPHI();
4465
4466 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4467 Builder.setDebugLoc(MI.getDebugLoc());
4468 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4469 OldToNewSrcMap[SrcMI] = NewExt;
4470 }
4471
4472 // Create a new phi with the extended inputs.
4473 Builder.setInstrAndDebugLoc(MI);
4474 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4475 NewPhi.addDef(DstReg);
4476 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4477 if (!MO.isReg()) {
4478 NewPhi.addMBB(MO.getMBB());
4479 continue;
4480 }
4481 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4482 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4483 }
4484 Builder.insertInstr(NewPhi);
4485 ExtMI->eraseFromParent();
4486}
4487
4489 Register &Reg) const {
4490 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4491 // If we have a constant index, look for a G_BUILD_VECTOR source
4492 // and find the source register that the index maps to.
4493 Register SrcVec = MI.getOperand(1).getReg();
4494 LLT SrcTy = MRI.getType(SrcVec);
4495 if (SrcTy.isScalableVector())
4496 return false;
4497
4498 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4499 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4500 return false;
4501
4502 unsigned VecIdx = Cst->Value.getZExtValue();
4503
4504 // Check if we have a build_vector or build_vector_trunc with an optional
4505 // trunc in front.
4506 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4507 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4508 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4509 }
4510
4511 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4512 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4513 return false;
4514
4515 EVT Ty(getMVTForLLT(SrcTy));
4516 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4517 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4518 return false;
4519
4520 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4521 return true;
4522}
4523
4525 Register &Reg) const {
4526 // Check the type of the register, since it may have come from a
4527 // G_BUILD_VECTOR_TRUNC.
4528 LLT ScalarTy = MRI.getType(Reg);
4529 Register DstReg = MI.getOperand(0).getReg();
4530 LLT DstTy = MRI.getType(DstReg);
4531
4532 if (ScalarTy != DstTy) {
4533 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4534 Builder.buildTrunc(DstReg, Reg);
4535 MI.eraseFromParent();
4536 return;
4537 }
4539}
4540
4543 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4544 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4545 // This combine tries to find build_vector's which have every source element
4546 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4547 // the masked load scalarization is run late in the pipeline. There's already
4548 // a combine for a similar pattern starting from the extract, but that
4549 // doesn't attempt to do it if there are multiple uses of the build_vector,
4550 // which in this case is true. Starting the combine from the build_vector
4551 // feels more natural than trying to find sibling nodes of extracts.
4552 // E.g.
4553 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4554 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4555 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4556 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4557 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4558 // ==>
4559 // replace ext{1,2,3,4} with %s{1,2,3,4}
4560
4561 Register DstReg = MI.getOperand(0).getReg();
4562 LLT DstTy = MRI.getType(DstReg);
4563 unsigned NumElts = DstTy.getNumElements();
4564
4565 SmallBitVector ExtractedElts(NumElts);
4566 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4567 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4568 return false;
4569 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4570 if (!Cst)
4571 return false;
4572 unsigned Idx = Cst->getZExtValue();
4573 if (Idx >= NumElts)
4574 return false; // Out of range.
4575 ExtractedElts.set(Idx);
4576 SrcDstPairs.emplace_back(
4577 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4578 }
4579 // Match if every element was extracted.
4580 return ExtractedElts.all();
4581}
4582
4585 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4586 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4587 for (auto &Pair : SrcDstPairs) {
4588 auto *ExtMI = Pair.second;
4589 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4590 ExtMI->eraseFromParent();
4591 }
4592 MI.eraseFromParent();
4593}
4594
4597 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4598 applyBuildFnNoErase(MI, MatchInfo);
4599 MI.eraseFromParent();
4600}
4601
4604 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4605 MatchInfo(Builder);
4606}
4607
4609 bool AllowScalarConstants,
4610 BuildFnTy &MatchInfo) const {
4611 assert(MI.getOpcode() == TargetOpcode::G_OR);
4612
4613 Register Dst = MI.getOperand(0).getReg();
4614 LLT Ty = MRI.getType(Dst);
4615 unsigned BitWidth = Ty.getScalarSizeInBits();
4616
4617 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4618 unsigned FshOpc = 0;
4619
4620 // Match (or (shl ...), (lshr ...)).
4621 if (!mi_match(Dst, MRI,
4622 // m_GOr() handles the commuted version as well.
4623 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4624 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4625 return false;
4626
4627 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4628 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4629 int64_t CstShlAmt = 0, CstLShrAmt;
4630 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4631 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4632 CstShlAmt + CstLShrAmt == BitWidth) {
4633 FshOpc = TargetOpcode::G_FSHR;
4634 Amt = LShrAmt;
4635 } else if (mi_match(LShrAmt, MRI,
4637 ShlAmt == Amt) {
4638 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4639 FshOpc = TargetOpcode::G_FSHL;
4640 } else if (mi_match(ShlAmt, MRI,
4642 LShrAmt == Amt) {
4643 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4644 FshOpc = TargetOpcode::G_FSHR;
4645 } else {
4646 return false;
4647 }
4648
4649 LLT AmtTy = MRI.getType(Amt);
4650 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4651 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4652 return false;
4653
4654 MatchInfo = [=](MachineIRBuilder &B) {
4655 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4656 };
4657 return true;
4658}
4659
4660/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4662 unsigned Opc = MI.getOpcode();
4663 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4664 Register X = MI.getOperand(1).getReg();
4665 Register Y = MI.getOperand(2).getReg();
4666 if (X != Y)
4667 return false;
4668 unsigned RotateOpc =
4669 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4670 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4671}
4672
4674 unsigned Opc = MI.getOpcode();
4675 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4676 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4677 Observer.changingInstr(MI);
4678 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4679 : TargetOpcode::G_ROTR));
4680 MI.removeOperand(2);
4681 Observer.changedInstr(MI);
4682}
4683
4684// Fold (rot x, c) -> (rot x, c % BitSize)
4686 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4687 MI.getOpcode() == TargetOpcode::G_ROTR);
4688 unsigned Bitsize =
4689 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4690 Register AmtReg = MI.getOperand(2).getReg();
4691 bool OutOfRange = false;
4692 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4693 if (auto *CI = dyn_cast<ConstantInt>(C))
4694 OutOfRange |= CI->getValue().uge(Bitsize);
4695 return true;
4696 };
4697 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4698}
4699
4701 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4702 MI.getOpcode() == TargetOpcode::G_ROTR);
4703 unsigned Bitsize =
4704 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4705 Register Amt = MI.getOperand(2).getReg();
4706 LLT AmtTy = MRI.getType(Amt);
4707 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4708 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4709 Observer.changingInstr(MI);
4710 MI.getOperand(2).setReg(Amt);
4711 Observer.changedInstr(MI);
4712}
4713
4715 int64_t &MatchInfo) const {
4716 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4717 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4718
4719 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4720 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4721 // KnownBits on the LHS in two cases:
4722 //
4723 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4724 // we cannot do any transforms so we can safely bail out early.
4725 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4726 // >=0.
4727 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4728 if (KnownRHS.isUnknown())
4729 return false;
4730
4731 std::optional<bool> KnownVal;
4732 if (KnownRHS.isZero()) {
4733 // ? uge 0 -> always true
4734 // ? ult 0 -> always false
4735 if (Pred == CmpInst::ICMP_UGE)
4736 KnownVal = true;
4737 else if (Pred == CmpInst::ICMP_ULT)
4738 KnownVal = false;
4739 }
4740
4741 if (!KnownVal) {
4742 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4743 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4744 }
4745
4746 if (!KnownVal)
4747 return false;
4748 MatchInfo =
4749 *KnownVal
4751 /*IsVector = */
4752 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4753 /* IsFP = */ false)
4754 : 0;
4755 return true;
4756}
4757
4760 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4761 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4762 // Given:
4763 //
4764 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4765 // %cmp = G_ICMP ne %x, 0
4766 //
4767 // Or:
4768 //
4769 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4770 // %cmp = G_ICMP eq %x, 1
4771 //
4772 // We can replace %cmp with %x assuming true is 1 on the target.
4773 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4774 if (!CmpInst::isEquality(Pred))
4775 return false;
4776 Register Dst = MI.getOperand(0).getReg();
4777 LLT DstTy = MRI.getType(Dst);
4779 /* IsFP = */ false) != 1)
4780 return false;
4781 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4782 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4783 return false;
4784 Register LHS = MI.getOperand(2).getReg();
4785 auto KnownLHS = VT->getKnownBits(LHS);
4786 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4787 return false;
4788 // Make sure replacing Dst with the LHS is a legal operation.
4789 LLT LHSTy = MRI.getType(LHS);
4790 unsigned LHSSize = LHSTy.getSizeInBits();
4791 unsigned DstSize = DstTy.getSizeInBits();
4792 unsigned Op = TargetOpcode::COPY;
4793 if (DstSize != LHSSize)
4794 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4795 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4796 return false;
4797 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4798 return true;
4799}
4800
4801// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4804 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4805 assert(MI.getOpcode() == TargetOpcode::G_AND);
4806
4807 // Ignore vector types to simplify matching the two constants.
4808 // TODO: do this for vectors and scalars via a demanded bits analysis.
4809 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4810 if (Ty.isVector())
4811 return false;
4812
4813 Register Src;
4814 Register AndMaskReg;
4815 int64_t AndMaskBits;
4816 int64_t OrMaskBits;
4817 if (!mi_match(MI, MRI,
4818 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4819 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4820 return false;
4821
4822 // Check if OrMask could turn on any bits in Src.
4823 if (AndMaskBits & OrMaskBits)
4824 return false;
4825
4826 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4827 Observer.changingInstr(MI);
4828 // Canonicalize the result to have the constant on the RHS.
4829 if (MI.getOperand(1).getReg() == AndMaskReg)
4830 MI.getOperand(2).setReg(AndMaskReg);
4831 MI.getOperand(1).setReg(Src);
4832 Observer.changedInstr(MI);
4833 };
4834 return true;
4835}
4836
4837/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4840 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4841 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4842 Register Dst = MI.getOperand(0).getReg();
4843 Register Src = MI.getOperand(1).getReg();
4844 LLT Ty = MRI.getType(Src);
4846 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4847 return false;
4848 int64_t Width = MI.getOperand(2).getImm();
4849 Register ShiftSrc;
4850 int64_t ShiftImm;
4851 if (!mi_match(
4852 Src, MRI,
4853 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4854 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4855 return false;
4856 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4857 return false;
4858
4859 MatchInfo = [=](MachineIRBuilder &B) {
4860 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4861 auto Cst2 = B.buildConstant(ExtractTy, Width);
4862 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4863 };
4864 return true;
4865}
4866
4867/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4869 BuildFnTy &MatchInfo) const {
4870 GAnd *And = cast<GAnd>(&MI);
4871 Register Dst = And->getReg(0);
4872 LLT Ty = MRI.getType(Dst);
4874 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4875 // into account.
4876 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4877 return false;
4878
4879 int64_t AndImm, LSBImm;
4880 Register ShiftSrc;
4881 const unsigned Size = Ty.getScalarSizeInBits();
4882 if (!mi_match(And->getReg(0), MRI,
4883 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4884 m_ICst(AndImm))))
4885 return false;
4886
4887 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4888 auto MaybeMask = static_cast<uint64_t>(AndImm);
4889 if (MaybeMask & (MaybeMask + 1))
4890 return false;
4891
4892 // LSB must fit within the register.
4893 if (static_cast<uint64_t>(LSBImm) >= Size)
4894 return false;
4895
4896 uint64_t Width = APInt(Size, AndImm).countr_one();
4897 MatchInfo = [=](MachineIRBuilder &B) {
4898 auto WidthCst = B.buildConstant(ExtractTy, Width);
4899 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4900 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4901 };
4902 return true;
4903}
4904
4907 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4908 const unsigned Opcode = MI.getOpcode();
4909 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4910
4911 const Register Dst = MI.getOperand(0).getReg();
4912
4913 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4914 ? TargetOpcode::G_SBFX
4915 : TargetOpcode::G_UBFX;
4916
4917 // Check if the type we would use for the extract is legal
4918 LLT Ty = MRI.getType(Dst);
4920 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4921 return false;
4922
4923 Register ShlSrc;
4924 int64_t ShrAmt;
4925 int64_t ShlAmt;
4926 const unsigned Size = Ty.getScalarSizeInBits();
4927
4928 // Try to match shr (shl x, c1), c2
4929 if (!mi_match(Dst, MRI,
4930 m_BinOp(Opcode,
4931 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4932 m_ICst(ShrAmt))))
4933 return false;
4934
4935 // Make sure that the shift sizes can fit a bitfield extract
4936 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4937 return false;
4938
4939 // Skip this combine if the G_SEXT_INREG combine could handle it
4940 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4941 return false;
4942
4943 // Calculate start position and width of the extract
4944 const int64_t Pos = ShrAmt - ShlAmt;
4945 const int64_t Width = Size - ShrAmt;
4946
4947 MatchInfo = [=](MachineIRBuilder &B) {
4948 auto WidthCst = B.buildConstant(ExtractTy, Width);
4949 auto PosCst = B.buildConstant(ExtractTy, Pos);
4950 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4951 };
4952 return true;
4953}
4954
4957 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4958 const unsigned Opcode = MI.getOpcode();
4959 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4960
4961 const Register Dst = MI.getOperand(0).getReg();
4962 LLT Ty = MRI.getType(Dst);
4964 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4965 return false;
4966
4967 // Try to match shr (and x, c1), c2
4968 Register AndSrc;
4969 int64_t ShrAmt;
4970 int64_t SMask;
4971 if (!mi_match(Dst, MRI,
4972 m_BinOp(Opcode,
4973 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4974 m_ICst(ShrAmt))))
4975 return false;
4976
4977 const unsigned Size = Ty.getScalarSizeInBits();
4978 if (ShrAmt < 0 || ShrAmt >= Size)
4979 return false;
4980
4981 // If the shift subsumes the mask, emit the 0 directly.
4982 if (0 == (SMask >> ShrAmt)) {
4983 MatchInfo = [=](MachineIRBuilder &B) {
4984 B.buildConstant(Dst, 0);
4985 };
4986 return true;
4987 }
4988
4989 // Check that ubfx can do the extraction, with no holes in the mask.
4990 uint64_t UMask = SMask;
4991 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4993 if (!isMask_64(UMask))
4994 return false;
4995
4996 // Calculate start position and width of the extract.
4997 const int64_t Pos = ShrAmt;
4998 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4999
5000 // It's preferable to keep the shift, rather than form G_SBFX.
5001 // TODO: remove the G_AND via demanded bits analysis.
5002 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
5003 return false;
5004
5005 MatchInfo = [=](MachineIRBuilder &B) {
5006 auto WidthCst = B.buildConstant(ExtractTy, Width);
5007 auto PosCst = B.buildConstant(ExtractTy, Pos);
5008 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
5009 };
5010 return true;
5011}
5012
5013bool CombinerHelper::reassociationCanBreakAddressingModePattern(
5014 MachineInstr &MI) const {
5015 auto &PtrAdd = cast<GPtrAdd>(MI);
5016
5017 Register Src1Reg = PtrAdd.getBaseReg();
5018 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
5019 if (!Src1Def)
5020 return false;
5021
5022 Register Src2Reg = PtrAdd.getOffsetReg();
5023
5024 if (MRI.hasOneNonDBGUse(Src1Reg))
5025 return false;
5026
5027 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
5028 if (!C1)
5029 return false;
5030 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5031 if (!C2)
5032 return false;
5033
5034 const APInt &C1APIntVal = *C1;
5035 const APInt &C2APIntVal = *C2;
5036 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
5037
5038 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
5039 // This combine may end up running before ptrtoint/inttoptr combines
5040 // manage to eliminate redundant conversions, so try to look through them.
5041 MachineInstr *ConvUseMI = &UseMI;
5042 unsigned ConvUseOpc = ConvUseMI->getOpcode();
5043 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
5044 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
5045 Register DefReg = ConvUseMI->getOperand(0).getReg();
5046 if (!MRI.hasOneNonDBGUse(DefReg))
5047 break;
5048 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
5049 ConvUseOpc = ConvUseMI->getOpcode();
5050 }
5051 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
5052 if (!LdStMI)
5053 continue;
5054 // Is x[offset2] already not a legal addressing mode? If so then
5055 // reassociating the constants breaks nothing (we test offset2 because
5056 // that's the one we hope to fold into the load or store).
5057 TargetLoweringBase::AddrMode AM;
5058 AM.HasBaseReg = true;
5059 AM.BaseOffs = C2APIntVal.getSExtValue();
5060 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
5061 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
5062 PtrAdd.getMF()->getFunction().getContext());
5063 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
5064 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5065 AccessTy, AS))
5066 continue;
5067
5068 // Would x[offset1+offset2] still be a legal addressing mode?
5069 AM.BaseOffs = CombinedValue;
5070 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
5071 AccessTy, AS))
5072 return true;
5073 }
5074
5075 return false;
5076}
5077
5079 MachineInstr *RHS,
5080 BuildFnTy &MatchInfo) const {
5081 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5082 Register Src1Reg = MI.getOperand(1).getReg();
5083 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5084 return false;
5085 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5086 if (!C2)
5087 return false;
5088
5089 // If both additions are nuw, the reassociated additions are also nuw.
5090 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5091 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5092 // therefore also nusw.
5093 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5094 // the new G_PTR_ADDs are then also inbounds.
5095 unsigned PtrAddFlags = MI.getFlags();
5096 unsigned AddFlags = RHS->getFlags();
5097 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5098 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5099 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5100 unsigned Flags = 0;
5101 if (IsNoUWrap)
5103 if (IsNoUSWrap)
5105 if (IsInBounds)
5107
5108 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5109 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5110
5111 auto NewBase =
5112 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5113 Observer.changingInstr(MI);
5114 MI.getOperand(1).setReg(NewBase.getReg(0));
5115 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5116 MI.setFlags(Flags);
5117 Observer.changedInstr(MI);
5118 };
5119 return !reassociationCanBreakAddressingModePattern(MI);
5120}
5121
5123 MachineInstr *LHS,
5124 MachineInstr *RHS,
5125 BuildFnTy &MatchInfo) const {
5126 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5127 // if and only if (G_PTR_ADD X, C) has one use.
5128 Register LHSBase;
5129 std::optional<ValueAndVReg> LHSCstOff;
5130 if (!mi_match(MI.getBaseReg(), MRI,
5131 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5132 return false;
5133
5134 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5135
5136 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5137 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5138 // so the new G_PTR_ADDs are also inbounds.
5139 unsigned PtrAddFlags = MI.getFlags();
5140 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5141 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5142 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5144 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5146 unsigned Flags = 0;
5147 if (IsNoUWrap)
5149 if (IsNoUSWrap)
5151 if (IsInBounds)
5153
5154 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5155 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5156 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5157 // doesn't happen.
5158 LHSPtrAdd->moveBefore(&MI);
5159 Register RHSReg = MI.getOffsetReg();
5160 // set VReg will cause type mismatch if it comes from extend/trunc
5161 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5162 Observer.changingInstr(MI);
5163 MI.getOperand(2).setReg(NewCst.getReg(0));
5164 MI.setFlags(Flags);
5165 Observer.changedInstr(MI);
5166 Observer.changingInstr(*LHSPtrAdd);
5167 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5168 LHSPtrAdd->setFlags(Flags);
5169 Observer.changedInstr(*LHSPtrAdd);
5170 };
5171 return !reassociationCanBreakAddressingModePattern(MI);
5172}
5173
5175 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5176 BuildFnTy &MatchInfo) const {
5177 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5178 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5179 if (!LHSPtrAdd)
5180 return false;
5181
5182 Register Src2Reg = MI.getOperand(2).getReg();
5183 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5184 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5185 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5186 if (!C1)
5187 return false;
5188 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5189 if (!C2)
5190 return false;
5191
5192 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5193 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5194 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5195 // largest signed integer that fits into the index type, which is the maximum
5196 // size of allocated objects according to the IR Language Reference.
5197 unsigned PtrAddFlags = MI.getFlags();
5198 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5199 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5200 bool IsInBounds =
5201 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5202 unsigned Flags = 0;
5203 if (IsNoUWrap)
5205 if (IsInBounds) {
5208 }
5209
5210 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5211 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5212 Observer.changingInstr(MI);
5213 MI.getOperand(1).setReg(LHSSrc1);
5214 MI.getOperand(2).setReg(NewCst.getReg(0));
5215 MI.setFlags(Flags);
5216 Observer.changedInstr(MI);
5217 };
5218 return !reassociationCanBreakAddressingModePattern(MI);
5219}
5220
5222 BuildFnTy &MatchInfo) const {
5223 auto &PtrAdd = cast<GPtrAdd>(MI);
5224 // We're trying to match a few pointer computation patterns here for
5225 // re-association opportunities.
5226 // 1) Isolating a constant operand to be on the RHS, e.g.:
5227 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5228 //
5229 // 2) Folding two constants in each sub-tree as long as such folding
5230 // doesn't break a legal addressing mode.
5231 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5232 //
5233 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5234 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5235 // iif (G_PTR_ADD X, C) has one use.
5236 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5237 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5238
5239 // Try to match example 2.
5240 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5241 return true;
5242
5243 // Try to match example 3.
5244 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5245 return true;
5246
5247 // Try to match example 1.
5248 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5249 return true;
5250
5251 return false;
5252}
5254 Register OpLHS, Register OpRHS,
5255 BuildFnTy &MatchInfo) const {
5256 LLT OpRHSTy = MRI.getType(OpRHS);
5257 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5258
5259 if (OpLHSDef->getOpcode() != Opc)
5260 return false;
5261
5262 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5263 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5264 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5265
5266 // If the inner op is (X op C), pull the constant out so it can be folded with
5267 // other constants in the expression tree. Folding is not guaranteed so we
5268 // might have (C1 op C2). In that case do not pull a constant out because it
5269 // won't help and can lead to infinite loops.
5270 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5271 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5272 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5273 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5274 MatchInfo = [=](MachineIRBuilder &B) {
5275 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5276 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5277 };
5278 return true;
5279 }
5280 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5281 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5282 // iff (op x, c1) has one use
5283 MatchInfo = [=](MachineIRBuilder &B) {
5284 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5285 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5286 };
5287 return true;
5288 }
5289 }
5290
5291 return false;
5292}
5293
5295 BuildFnTy &MatchInfo) const {
5296 // We don't check if the reassociation will break a legal addressing mode
5297 // here since pointer arithmetic is handled by G_PTR_ADD.
5298 unsigned Opc = MI.getOpcode();
5299 Register DstReg = MI.getOperand(0).getReg();
5300 Register LHSReg = MI.getOperand(1).getReg();
5301 Register RHSReg = MI.getOperand(2).getReg();
5302
5303 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5304 return true;
5305 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5306 return true;
5307 return false;
5308}
5309
5311 APInt &MatchInfo) const {
5312 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5313 Register SrcOp = MI.getOperand(1).getReg();
5314
5315 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5316 MatchInfo = *MaybeCst;
5317 return true;
5318 }
5319
5320 return false;
5321}
5322
5324 APInt &MatchInfo) const {
5325 Register Op1 = MI.getOperand(1).getReg();
5326 Register Op2 = MI.getOperand(2).getReg();
5327 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5328 if (!MaybeCst)
5329 return false;
5330 MatchInfo = *MaybeCst;
5331 return true;
5332}
5333
5335 ConstantFP *&MatchInfo) const {
5336 Register Op1 = MI.getOperand(1).getReg();
5337 Register Op2 = MI.getOperand(2).getReg();
5338 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5339 if (!MaybeCst)
5340 return false;
5341 MatchInfo =
5342 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5343 return true;
5344}
5345
5347 ConstantFP *&MatchInfo) const {
5348 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5349 MI.getOpcode() == TargetOpcode::G_FMAD);
5350 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5351
5352 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5353 if (!Op3Cst)
5354 return false;
5355
5356 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5357 if (!Op2Cst)
5358 return false;
5359
5360 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5361 if (!Op1Cst)
5362 return false;
5363
5364 APFloat Op1F = Op1Cst->getValueAPF();
5365 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5367 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5368 return true;
5369}
5370
5373 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5374 // Look for a binop feeding into an AND with a mask:
5375 //
5376 // %add = G_ADD %lhs, %rhs
5377 // %and = G_AND %add, 000...11111111
5378 //
5379 // Check if it's possible to perform the binop at a narrower width and zext
5380 // back to the original width like so:
5381 //
5382 // %narrow_lhs = G_TRUNC %lhs
5383 // %narrow_rhs = G_TRUNC %rhs
5384 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5385 // %new_add = G_ZEXT %narrow_add
5386 // %and = G_AND %new_add, 000...11111111
5387 //
5388 // This can allow later combines to eliminate the G_AND if it turns out
5389 // that the mask is irrelevant.
5390 assert(MI.getOpcode() == TargetOpcode::G_AND);
5391 Register Dst = MI.getOperand(0).getReg();
5392 Register AndLHS = MI.getOperand(1).getReg();
5393 Register AndRHS = MI.getOperand(2).getReg();
5394 LLT WideTy = MRI.getType(Dst);
5395
5396 // If the potential binop has more than one use, then it's possible that one
5397 // of those uses will need its full width.
5398 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5399 return false;
5400
5401 // Check if the LHS feeding the AND is impacted by the high bits that we're
5402 // masking out.
5403 //
5404 // e.g. for 64-bit x, y:
5405 //
5406 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5407 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5408 if (!LHSInst)
5409 return false;
5410 unsigned LHSOpc = LHSInst->getOpcode();
5411 switch (LHSOpc) {
5412 default:
5413 return false;
5414 case TargetOpcode::G_ADD:
5415 case TargetOpcode::G_SUB:
5416 case TargetOpcode::G_MUL:
5417 case TargetOpcode::G_AND:
5418 case TargetOpcode::G_OR:
5419 case TargetOpcode::G_XOR:
5420 break;
5421 }
5422
5423 // Find the mask on the RHS.
5424 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5425 if (!Cst)
5426 return false;
5427 auto Mask = Cst->Value;
5428 if (!Mask.isMask())
5429 return false;
5430
5431 // No point in combining if there's nothing to truncate.
5432 unsigned NarrowWidth = Mask.countr_one();
5433 if (NarrowWidth == WideTy.getSizeInBits())
5434 return false;
5435 LLT NarrowTy = LLT::integer(NarrowWidth);
5436
5437 // Check if adding the zext + truncates could be harmful.
5438 auto &MF = *MI.getMF();
5439 const auto &TLI = getTargetLowering();
5440 LLVMContext &Ctx = MF.getFunction().getContext();
5441 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5442 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5443 return false;
5444 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5445 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5446 return false;
5447 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5448 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5449 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5450 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5451 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5452 auto NarrowBinOp =
5453 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5454 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5455 Observer.changingInstr(MI);
5456 MI.getOperand(1).setReg(Ext.getReg(0));
5457 Observer.changedInstr(MI);
5458 };
5459 return true;
5460}
5461
5463 BuildFnTy &MatchInfo) const {
5464 unsigned Opc = MI.getOpcode();
5465 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5466
5467 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5468 return false;
5469
5470 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5471 Observer.changingInstr(MI);
5472 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5473 : TargetOpcode::G_SADDO;
5474 MI.setDesc(Builder.getTII().get(NewOpc));
5475 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5476 Observer.changedInstr(MI);
5477 };
5478 return true;
5479}
5480
5482 BuildFnTy &MatchInfo) const {
5483 // (G_*MULO x, 0) -> 0 + no carry out
5484 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5485 MI.getOpcode() == TargetOpcode::G_SMULO);
5486 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5487 return false;
5488 Register Dst = MI.getOperand(0).getReg();
5489 Register Carry = MI.getOperand(1).getReg();
5490 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5491 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5492 return false;
5493 MatchInfo = [=](MachineIRBuilder &B) {
5494 B.buildConstant(Dst, 0);
5495 B.buildConstant(Carry, 0);
5496 };
5497 return true;
5498}
5499
5501 BuildFnTy &MatchInfo) const {
5502 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5503 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5504 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5505 MI.getOpcode() == TargetOpcode::G_SADDE ||
5506 MI.getOpcode() == TargetOpcode::G_USUBE ||
5507 MI.getOpcode() == TargetOpcode::G_SSUBE);
5508 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5509 return false;
5510 MatchInfo = [&](MachineIRBuilder &B) {
5511 unsigned NewOpcode;
5512 switch (MI.getOpcode()) {
5513 case TargetOpcode::G_UADDE:
5514 NewOpcode = TargetOpcode::G_UADDO;
5515 break;
5516 case TargetOpcode::G_SADDE:
5517 NewOpcode = TargetOpcode::G_SADDO;
5518 break;
5519 case TargetOpcode::G_USUBE:
5520 NewOpcode = TargetOpcode::G_USUBO;
5521 break;
5522 case TargetOpcode::G_SSUBE:
5523 NewOpcode = TargetOpcode::G_SSUBO;
5524 break;
5525 }
5526 Observer.changingInstr(MI);
5527 MI.setDesc(B.getTII().get(NewOpcode));
5528 MI.removeOperand(4);
5529 Observer.changedInstr(MI);
5530 };
5531 return true;
5532}
5533
5535 BuildFnTy &MatchInfo) const {
5536 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5537 Register Dst = MI.getOperand(0).getReg();
5538 // (x + y) - z -> x (if y == z)
5539 // (x + y) - z -> y (if x == z)
5540 Register X, Y, Z;
5541 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5542 Register ReplaceReg;
5543 int64_t CstX, CstY;
5544 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5546 ReplaceReg = X;
5547 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5549 ReplaceReg = Y;
5550 if (ReplaceReg) {
5551 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5552 return true;
5553 }
5554 }
5555
5556 // x - (y + z) -> 0 - y (if x == z)
5557 // x - (y + z) -> 0 - z (if x == y)
5558 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5559 Register ReplaceReg;
5560 int64_t CstX;
5561 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5563 ReplaceReg = Y;
5564 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5566 ReplaceReg = Z;
5567 if (ReplaceReg) {
5568 MatchInfo = [=](MachineIRBuilder &B) {
5569 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5570 B.buildSub(Dst, Zero, ReplaceReg);
5571 };
5572 return true;
5573 }
5574 }
5575 return false;
5576}
5577
5579 unsigned Opcode = MI.getOpcode();
5580 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5581 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5582 Register Dst = UDivorRem.getReg(0);
5583 Register LHS = UDivorRem.getReg(1);
5584 Register RHS = UDivorRem.getReg(2);
5585 LLT Ty = MRI.getType(Dst);
5586 LLT ScalarTy = Ty.getScalarType();
5587 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5589 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5590
5591 auto &MIB = Builder;
5592
5593 bool UseSRL = false;
5594 SmallVector<Register, 16> Shifts, Factors;
5595 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5596 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5597
5598 auto BuildExactUDIVPattern = [&](const Constant *C) {
5599 // Don't recompute inverses for each splat element.
5600 if (IsSplat && !Factors.empty()) {
5601 Shifts.push_back(Shifts[0]);
5602 Factors.push_back(Factors[0]);
5603 return true;
5604 }
5605
5606 auto *CI = cast<ConstantInt>(C);
5607 APInt Divisor = CI->getValue();
5608 unsigned Shift = Divisor.countr_zero();
5609 if (Shift) {
5610 Divisor.lshrInPlace(Shift);
5611 UseSRL = true;
5612 }
5613
5614 // Calculate the multiplicative inverse modulo BW.
5615 APInt Factor = Divisor.multiplicativeInverse();
5616 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5617 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5618 return true;
5619 };
5620
5621 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5622 // Collect all magic values from the build vector.
5623 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5624 llvm_unreachable("Expected unary predicate match to succeed");
5625
5626 Register Shift, Factor;
5627 if (Ty.isVector()) {
5628 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5629 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5630 } else {
5631 Shift = Shifts[0];
5632 Factor = Factors[0];
5633 }
5634
5635 Register Res = LHS;
5636
5637 if (UseSRL)
5638 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5639
5640 return MIB.buildMul(Ty, Res, Factor);
5641 }
5642
5643 unsigned KnownLeadingZeros =
5644 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5645
5646 bool UseNPQ = false;
5647 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5648 auto BuildUDIVPattern = [&](const Constant *C) {
5649 auto *CI = cast<ConstantInt>(C);
5650 const APInt &Divisor = CI->getValue();
5651
5652 bool SelNPQ = false;
5653 APInt Magic(Divisor.getBitWidth(), 0);
5654 unsigned PreShift = 0, PostShift = 0;
5655
5656 // Magic algorithm doesn't work for division by 1. We need to emit a select
5657 // at the end.
5658 // TODO: Use undef values for divisor of 1.
5659 if (!Divisor.isOne()) {
5660
5661 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5662 // in the dividend exceeds the leading zeros for the divisor.
5665 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5666
5667 Magic = std::move(magics.Magic);
5668
5669 assert(magics.PreShift < Divisor.getBitWidth() &&
5670 "We shouldn't generate an undefined shift!");
5671 assert(magics.PostShift < Divisor.getBitWidth() &&
5672 "We shouldn't generate an undefined shift!");
5673 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5674 PreShift = magics.PreShift;
5675 PostShift = magics.PostShift;
5676 SelNPQ = magics.IsAdd;
5677 }
5678
5679 PreShifts.push_back(
5680 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5681 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5682 NPQFactors.push_back(
5683 MIB.buildConstant(ScalarTy,
5684 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5685 : APInt::getZero(EltBits))
5686 .getReg(0));
5687 PostShifts.push_back(
5688 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5689 UseNPQ |= SelNPQ;
5690 return true;
5691 };
5692
5693 // Collect the shifts/magic values from each element.
5694 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5695 (void)Matched;
5696 assert(Matched && "Expected unary predicate match to succeed");
5697
5698 Register PreShift, PostShift, MagicFactor, NPQFactor;
5699 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5700 if (RHSDef) {
5701 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5702 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5703 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5704 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5705 } else {
5706 assert(MRI.getType(RHS).isScalar() &&
5707 "Non-build_vector operation should have been a scalar");
5708 PreShift = PreShifts[0];
5709 MagicFactor = MagicFactors[0];
5710 PostShift = PostShifts[0];
5711 }
5712
5713 Register Q = LHS;
5714 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5715
5716 // Multiply the numerator (operand 0) by the magic value.
5717 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5718
5719 if (UseNPQ) {
5720 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5721
5722 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5723 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5724 if (Ty.isVector())
5725 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5726 else
5727 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5728
5729 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5730 }
5731
5732 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5733 auto One = MIB.buildConstant(Ty, 1);
5734 auto IsOne = MIB.buildICmp(
5736 Ty.isScalar() ? LLT::integer(1) : Ty.changeElementType(LLT::integer(1)),
5737 RHS, One);
5738 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5739
5740 if (Opcode == TargetOpcode::G_UREM) {
5741 auto Prod = MIB.buildMul(Ty, ret, RHS);
5742 return MIB.buildSub(Ty, LHS, Prod);
5743 }
5744 return ret;
5745}
5746
5748 unsigned Opcode = MI.getOpcode();
5749 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5750 Register Dst = MI.getOperand(0).getReg();
5751 Register RHS = MI.getOperand(2).getReg();
5752 LLT DstTy = MRI.getType(Dst);
5753
5754 auto &MF = *MI.getMF();
5755 AttributeList Attr = MF.getFunction().getAttributes();
5756 const auto &TLI = getTargetLowering();
5757 LLVMContext &Ctx = MF.getFunction().getContext();
5758 if (DstTy.getScalarSizeInBits() == 1 ||
5759 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5760 return false;
5761
5762 // Don't do this for minsize because the instruction sequence is usually
5763 // larger.
5764 if (MF.getFunction().hasMinSize())
5765 return false;
5766
5767 if (Opcode == TargetOpcode::G_UDIV &&
5769 return matchUnaryPredicate(
5770 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5771 }
5772
5773 auto *RHSDef = MRI.getVRegDef(RHS);
5774 if (!isConstantOrConstantVector(*RHSDef, MRI))
5775 return false;
5776
5777 // Don't do this if the types are not going to be legal.
5778 if (LI) {
5779 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5780 return false;
5781 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5782 return false;
5784 {TargetOpcode::G_ICMP,
5785 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5786 DstTy}}))
5787 return false;
5788 if (Opcode == TargetOpcode::G_UREM &&
5789 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5790 return false;
5791 }
5792
5793 return matchUnaryPredicate(
5794 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5795}
5796
5798 auto *NewMI = buildUDivOrURemUsingMul(MI);
5799 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5800}
5801
5803 unsigned Opcode = MI.getOpcode();
5804 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5805 Register Dst = MI.getOperand(0).getReg();
5806 Register RHS = MI.getOperand(2).getReg();
5807 LLT DstTy = MRI.getType(Dst);
5808 auto SizeInBits = DstTy.getScalarSizeInBits();
5809 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5810
5811 auto &MF = *MI.getMF();
5812 AttributeList Attr = MF.getFunction().getAttributes();
5813 const auto &TLI = getTargetLowering();
5814 LLVMContext &Ctx = MF.getFunction().getContext();
5815 if (DstTy.getScalarSizeInBits() < 3 ||
5816 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5817 return false;
5818
5819 // Don't do this for minsize because the instruction sequence is usually
5820 // larger.
5821 if (MF.getFunction().hasMinSize())
5822 return false;
5823
5824 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5825 if (Opcode == TargetOpcode::G_SDIV &&
5827 return matchUnaryPredicate(
5828 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5829 }
5830
5831 auto *RHSDef = MRI.getVRegDef(RHS);
5832 if (!isConstantOrConstantVector(*RHSDef, MRI))
5833 return false;
5834
5835 // Don't do this if the types are not going to be legal.
5836 if (LI) {
5837 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5838 return false;
5839 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5840 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5841 return false;
5842 if (Opcode == TargetOpcode::G_SREM &&
5843 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5844 return false;
5845 }
5846
5847 return matchUnaryPredicate(
5848 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5849}
5850
5852 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5853 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5854}
5855
5857 unsigned Opcode = MI.getOpcode();
5858 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5859 Opcode == TargetOpcode::G_SREM);
5860 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5861 Register Dst = SDivorRem.getReg(0);
5862 Register LHS = SDivorRem.getReg(1);
5863 Register RHS = SDivorRem.getReg(2);
5864 LLT Ty = MRI.getType(Dst);
5865 LLT ScalarTy = Ty.getScalarType();
5866 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5868 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5869 auto &MIB = Builder;
5870
5871 bool UseSRA = false;
5872 SmallVector<Register, 16> ExactShifts, ExactFactors;
5873
5874 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5875 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5876
5877 auto BuildExactSDIVPattern = [&](const Constant *C) {
5878 // Don't recompute inverses for each splat element.
5879 if (IsSplat && !ExactFactors.empty()) {
5880 ExactShifts.push_back(ExactShifts[0]);
5881 ExactFactors.push_back(ExactFactors[0]);
5882 return true;
5883 }
5884
5885 auto *CI = cast<ConstantInt>(C);
5886 APInt Divisor = CI->getValue();
5887 unsigned Shift = Divisor.countr_zero();
5888 if (Shift) {
5889 Divisor.ashrInPlace(Shift);
5890 UseSRA = true;
5891 }
5892
5893 // Calculate the multiplicative inverse modulo BW.
5894 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5895 APInt Factor = Divisor.multiplicativeInverse();
5896 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5897 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5898 return true;
5899 };
5900
5901 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5902 // Collect all magic values from the build vector.
5903 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5904 (void)Matched;
5905 assert(Matched && "Expected unary predicate match to succeed");
5906
5907 Register Shift, Factor;
5908 if (Ty.isVector()) {
5909 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5910 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5911 } else {
5912 Shift = ExactShifts[0];
5913 Factor = ExactFactors[0];
5914 }
5915
5916 Register Res = LHS;
5917
5918 if (UseSRA)
5919 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5920
5921 return MIB.buildMul(Ty, Res, Factor);
5922 }
5923
5924 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5925
5926 auto BuildSDIVPattern = [&](const Constant *C) {
5927 auto *CI = cast<ConstantInt>(C);
5928 const APInt &Divisor = CI->getValue();
5929
5932 int NumeratorFactor = 0;
5933 int ShiftMask = -1;
5934
5935 if (Divisor.isOne() || Divisor.isAllOnes()) {
5936 // If d is +1/-1, we just multiply the numerator by +1/-1.
5937 NumeratorFactor = Divisor.getSExtValue();
5938 Magics.Magic = 0;
5939 Magics.ShiftAmount = 0;
5940 ShiftMask = 0;
5941 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5942 // If d > 0 and m < 0, add the numerator.
5943 NumeratorFactor = 1;
5944 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5945 // If d < 0 and m > 0, subtract the numerator.
5946 NumeratorFactor = -1;
5947 }
5948
5949 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5950 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5951 Shifts.push_back(
5952 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5953 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5954
5955 return true;
5956 };
5957
5958 // Collect the shifts/magic values from each element.
5959 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5960 (void)Matched;
5961 assert(Matched && "Expected unary predicate match to succeed");
5962
5963 Register MagicFactor, Factor, Shift, ShiftMask;
5964 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5965 if (RHSDef) {
5966 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5967 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5968 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5969 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5970 } else {
5971 assert(MRI.getType(RHS).isScalar() &&
5972 "Non-build_vector operation should have been a scalar");
5973 MagicFactor = MagicFactors[0];
5974 Factor = Factors[0];
5975 Shift = Shifts[0];
5976 ShiftMask = ShiftMasks[0];
5977 }
5978
5979 Register Q = LHS;
5980 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5981
5982 // (Optionally) Add/subtract the numerator using Factor.
5983 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5984 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5985
5986 // Shift right algebraic by shift value.
5987 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5988
5989 // Extract the sign bit, mask it and add it to the quotient.
5990 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5991 auto T = MIB.buildLShr(Ty, Q, SignShift);
5992 T = MIB.buildAnd(Ty, T, ShiftMask);
5993 auto ret = MIB.buildAdd(Ty, Q, T);
5994
5995 if (Opcode == TargetOpcode::G_SREM) {
5996 auto Prod = MIB.buildMul(Ty, ret, RHS);
5997 return MIB.buildSub(Ty, LHS, Prod);
5998 }
5999 return ret;
6000}
6001
6003 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
6004 MI.getOpcode() == TargetOpcode::G_UDIV) &&
6005 "Expected SDIV or UDIV");
6006 auto &Div = cast<GenericMachineInstr>(MI);
6007 Register RHS = Div.getReg(2);
6008 auto MatchPow2 = [&](const Constant *C) {
6009 auto *CI = dyn_cast<ConstantInt>(C);
6010 return CI && (CI->getValue().isPowerOf2() ||
6011 (IsSigned && CI->getValue().isNegatedPowerOf2()));
6012 };
6013 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
6014}
6015
6017 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
6018 auto &SDiv = cast<GenericMachineInstr>(MI);
6019 Register Dst = SDiv.getReg(0);
6020 Register LHS = SDiv.getReg(1);
6021 Register RHS = SDiv.getReg(2);
6022 LLT Ty = MRI.getType(Dst);
6024 LLT CCVT = Ty.isVector() ? LLT::vector(Ty.getElementCount(), LLT::integer(1))
6025 : LLT::integer(1);
6026
6027 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
6028 // to the following version:
6029 //
6030 // %c1 = G_CTTZ %rhs
6031 // %inexact = G_SUB $bitwidth, %c1
6032 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
6033 // %lshr = G_LSHR %sign, %inexact
6034 // %add = G_ADD %lhs, %lshr
6035 // %ashr = G_ASHR %add, %c1
6036 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
6037 // %zero = G_CONSTANT $0
6038 // %neg = G_NEG %ashr
6039 // %isneg = G_ICMP SLT %rhs, %zero
6040 // %res = G_SELECT %isneg, %neg, %ashr
6041
6042 unsigned BitWidth = Ty.getScalarSizeInBits();
6043 auto Zero = Builder.buildConstant(Ty, 0);
6044
6045 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
6046 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6047 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
6048 // Splat the sign bit into the register
6049 auto Sign = Builder.buildAShr(
6050 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
6051
6052 // Add (LHS < 0) ? abs2 - 1 : 0;
6053 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
6054 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
6055 auto AShr = Builder.buildAShr(Ty, Add, C1);
6056
6057 // Special case: (sdiv X, 1) -> X
6058 // Special Case: (sdiv X, -1) -> 0-X
6059 auto One = Builder.buildConstant(Ty, 1);
6060 auto MinusOne = Builder.buildConstant(Ty, -1);
6061 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
6062 auto IsMinusOne =
6063 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
6064 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
6065 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
6066
6067 // If divided by a positive value, we're done. Otherwise, the result must be
6068 // negated.
6069 auto Neg = Builder.buildNeg(Ty, AShr);
6070 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
6071 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
6072 MI.eraseFromParent();
6073}
6074
6076 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6077 auto &UDiv = cast<GenericMachineInstr>(MI);
6078 Register Dst = UDiv.getReg(0);
6079 Register LHS = UDiv.getReg(1);
6080 Register RHS = UDiv.getReg(2);
6081 LLT Ty = MRI.getType(Dst);
6083
6084 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6085 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6086 MI.eraseFromParent();
6087}
6088
6090 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6091 Register RHS = MI.getOperand(2).getReg();
6092 Register Dst = MI.getOperand(0).getReg();
6093 LLT Ty = MRI.getType(Dst);
6094 LLT RHSTy = MRI.getType(RHS);
6096 auto MatchPow2ExceptOne = [&](const Constant *C) {
6097 if (auto *CI = dyn_cast<ConstantInt>(C))
6098 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6099 return false;
6100 };
6101 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6102 return false;
6103 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6104 // get log base 2, and it is not always legal for on a target.
6105 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6106 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6107}
6108
6110 Register LHS = MI.getOperand(1).getReg();
6111 Register RHS = MI.getOperand(2).getReg();
6112 Register Dst = MI.getOperand(0).getReg();
6113 LLT Ty = MRI.getType(Dst);
6115 unsigned NumEltBits = Ty.getScalarSizeInBits();
6116
6117 auto LogBase2 = buildLogBase2(RHS, Builder);
6118 auto ShiftAmt =
6119 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6120 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6121 Builder.buildLShr(Dst, LHS, Trunc);
6122 MI.eraseFromParent();
6123}
6124
6126 Register &MatchInfo) const {
6127 Register Dst = MI.getOperand(0).getReg();
6128 Register Src = MI.getOperand(1).getReg();
6129 LLT DstTy = MRI.getType(Dst);
6130 LLT SrcTy = MRI.getType(Src);
6131 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6132 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6133 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6134
6136 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6137 return false;
6138
6139 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6140 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6141 return mi_match(Src, MRI,
6142 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6143 m_SpecificICstOrSplat(SignedMin)),
6144 m_SpecificICstOrSplat(SignedMax))) ||
6145 mi_match(Src, MRI,
6146 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6147 m_SpecificICstOrSplat(SignedMax)),
6148 m_SpecificICstOrSplat(SignedMin)));
6149}
6150
6152 Register &MatchInfo) const {
6153 Register Dst = MI.getOperand(0).getReg();
6154 Builder.buildTruncSSatS(Dst, MatchInfo);
6155 MI.eraseFromParent();
6156}
6157
6159 Register &MatchInfo) const {
6160 Register Dst = MI.getOperand(0).getReg();
6161 Register Src = MI.getOperand(1).getReg();
6162 LLT DstTy = MRI.getType(Dst);
6163 LLT SrcTy = MRI.getType(Src);
6164 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6165 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6166 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6167
6169 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6170 return false;
6171 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6172 return mi_match(Src, MRI,
6174 m_SpecificICstOrSplat(UnsignedMax))) ||
6175 mi_match(Src, MRI,
6176 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6177 m_SpecificICstOrSplat(UnsignedMax)),
6178 m_SpecificICstOrSplat(0))) ||
6179 mi_match(Src, MRI,
6181 m_SpecificICstOrSplat(UnsignedMax)));
6182}
6183
6185 Register &MatchInfo) const {
6186 Register Dst = MI.getOperand(0).getReg();
6187 Builder.buildTruncSSatU(Dst, MatchInfo);
6188 MI.eraseFromParent();
6189}
6190
6192 MachineInstr &MinMI) const {
6193 Register Min = MinMI.getOperand(2).getReg();
6194 Register Val = MinMI.getOperand(1).getReg();
6195 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6196 LLT SrcTy = MRI.getType(Val);
6197 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6198 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6199 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6200
6202 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6203 return false;
6204 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6205 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6206 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6207}
6208
6210 MachineInstr &SrcMI) const {
6211 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6212 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6213
6214 return LI &&
6215 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6216}
6217
6219 BuildFnTy &MatchInfo) const {
6220 unsigned Opc = MI.getOpcode();
6221 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6222 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6223 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6224
6225 Register Dst = MI.getOperand(0).getReg();
6226 Register X = MI.getOperand(1).getReg();
6227 Register Y = MI.getOperand(2).getReg();
6228 LLT Type = MRI.getType(Dst);
6229
6230 // fold (fadd x, fneg(y)) -> (fsub x, y)
6231 // fold (fadd fneg(y), x) -> (fsub x, y)
6232 // G_ADD is commutative so both cases are checked by m_GFAdd
6233 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6234 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6235 Opc = TargetOpcode::G_FSUB;
6236 }
6237 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6238 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6239 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6240 Opc = TargetOpcode::G_FADD;
6241 }
6242 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6243 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6244 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6245 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6246 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6247 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6248 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6249 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6250 // no opcode change
6251 } else
6252 return false;
6253
6254 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6255 Observer.changingInstr(MI);
6256 MI.setDesc(B.getTII().get(Opc));
6257 MI.getOperand(1).setReg(X);
6258 MI.getOperand(2).setReg(Y);
6259 Observer.changedInstr(MI);
6260 };
6261 return true;
6262}
6263
6265 Register &MatchInfo) const {
6266 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6267
6268 Register LHS = MI.getOperand(1).getReg();
6269 MatchInfo = MI.getOperand(2).getReg();
6270 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6271
6272 const auto LHSCst = Ty.isVector()
6273 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6275 if (!LHSCst)
6276 return false;
6277
6278 // -0.0 is always allowed
6279 if (LHSCst->Value.isNegZero())
6280 return true;
6281
6282 // +0.0 is only allowed if nsz is set.
6283 if (LHSCst->Value.isPosZero())
6284 return MI.getFlag(MachineInstr::FmNsz);
6285
6286 return false;
6287}
6288
6290 Register &MatchInfo) const {
6291 Register Dst = MI.getOperand(0).getReg();
6292 Builder.buildFNeg(
6293 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6294 eraseInst(MI);
6295}
6296
6297/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6298/// due to global flags or MachineInstr flags.
6299static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6300 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6301 return false;
6302 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6303}
6304
6305static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6306 const MachineRegisterInfo &MRI) {
6307 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6308 MRI.use_instr_nodbg_end()) >
6309 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6310 MRI.use_instr_nodbg_end());
6311}
6312
6314 bool &AllowFusionGlobally,
6315 bool &HasFMAD, bool &Aggressive,
6316 bool CanReassociate) const {
6317
6318 auto *MF = MI.getMF();
6319 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6320 const TargetOptions &Options = MF->getTarget().Options;
6321 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6322
6323 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6324 return false;
6325
6326 // Floating-point multiply-add with intermediate rounding.
6327 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6328 // Floating-point multiply-add without intermediate rounding.
6329 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6330 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6331 // No valid opcode, do not combine.
6332 if (!HasFMAD && !HasFMA)
6333 return false;
6334
6335 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6336 // If the addition is not contractable, do not combine.
6337 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6338 return false;
6339
6340 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6341 return true;
6342}
6343
6346 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6347 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6348
6349 bool AllowFusionGlobally, HasFMAD, Aggressive;
6350 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6351 return false;
6352
6353 Register Op1 = MI.getOperand(1).getReg();
6354 Register Op2 = MI.getOperand(2).getReg();
6355 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6356 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6357 unsigned PreferredFusedOpcode =
6358 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6359
6360 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6361 // prefer to fold the multiply with fewer uses.
6362 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6363 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6364 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6365 std::swap(LHS, RHS);
6366 }
6367
6368 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6369 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6370 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6371 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6372 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6373 {LHS.MI->getOperand(1).getReg(),
6374 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6375 };
6376 return true;
6377 }
6378
6379 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6380 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6381 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6382 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6383 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6384 {RHS.MI->getOperand(1).getReg(),
6385 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6386 };
6387 return true;
6388 }
6389
6390 return false;
6391}
6392
6395 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6396 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6397
6398 bool AllowFusionGlobally, HasFMAD, Aggressive;
6399 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6400 return false;
6401
6402 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6403 Register Op1 = MI.getOperand(1).getReg();
6404 Register Op2 = MI.getOperand(2).getReg();
6405 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6406 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6407 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6408
6409 unsigned PreferredFusedOpcode =
6410 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6411
6412 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6413 // prefer to fold the multiply with fewer uses.
6414 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6415 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6416 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6417 std::swap(LHS, RHS);
6418 }
6419
6420 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6421 MachineInstr *FpExtSrc;
6422 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6423 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6424 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6425 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6426 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6427 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6428 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6429 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6430 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6431 };
6432 return true;
6433 }
6434
6435 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6436 // Note: Commutes FADD operands.
6437 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6438 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6439 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6440 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6441 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6442 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6443 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6444 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6445 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6446 };
6447 return true;
6448 }
6449
6450 return false;
6451}
6452
6455 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6456 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6457
6458 bool AllowFusionGlobally, HasFMAD, Aggressive;
6459 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6460 return false;
6461
6462 Register Op1 = MI.getOperand(1).getReg();
6463 Register Op2 = MI.getOperand(2).getReg();
6464 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6465 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6466 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6467
6468 unsigned PreferredFusedOpcode =
6469 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6470
6471 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6472 // prefer to fold the multiply with fewer uses.
6473 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6474 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6475 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6476 std::swap(LHS, RHS);
6477 }
6478
6479 MachineInstr *FMA = nullptr;
6480 Register Z;
6481 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6482 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6483 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6484 TargetOpcode::G_FMUL) &&
6485 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6486 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6487 FMA = LHS.MI;
6488 Z = RHS.Reg;
6489 }
6490 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6491 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6492 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6493 TargetOpcode::G_FMUL) &&
6494 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6495 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6496 Z = LHS.Reg;
6497 FMA = RHS.MI;
6498 }
6499
6500 if (FMA) {
6501 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6502 Register X = FMA->getOperand(1).getReg();
6503 Register Y = FMA->getOperand(2).getReg();
6504 Register U = FMulMI->getOperand(1).getReg();
6505 Register V = FMulMI->getOperand(2).getReg();
6506
6507 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6508 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6509 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6510 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6511 {X, Y, InnerFMA});
6512 };
6513 return true;
6514 }
6515
6516 return false;
6517}
6518
6521 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6522 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6523
6524 bool AllowFusionGlobally, HasFMAD, Aggressive;
6525 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6526 return false;
6527
6528 if (!Aggressive)
6529 return false;
6530
6531 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6532 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6533 Register Op1 = MI.getOperand(1).getReg();
6534 Register Op2 = MI.getOperand(2).getReg();
6535 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6536 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6537
6538 unsigned PreferredFusedOpcode =
6539 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6540
6541 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6542 // prefer to fold the multiply with fewer uses.
6543 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6544 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6545 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6546 std::swap(LHS, RHS);
6547 }
6548
6549 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6550 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6552 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6553 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6554 Register InnerFMA =
6555 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6556 .getReg(0);
6557 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6558 {X, Y, InnerFMA});
6559 };
6560
6561 MachineInstr *FMulMI, *FMAMI;
6562 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6563 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6564 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6565 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6566 m_GFPExt(m_MInstr(FMulMI))) &&
6567 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6568 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6569 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6570 MatchInfo = [=](MachineIRBuilder &B) {
6571 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6572 FMulMI->getOperand(2).getReg(), RHS.Reg,
6573 LHS.MI->getOperand(1).getReg(),
6574 LHS.MI->getOperand(2).getReg(), B);
6575 };
6576 return true;
6577 }
6578
6579 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6580 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6581 // FIXME: This turns two single-precision and one double-precision
6582 // operation into two double-precision operations, which might not be
6583 // interesting for all targets, especially GPUs.
6584 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6585 FMAMI->getOpcode() == PreferredFusedOpcode) {
6586 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6587 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6588 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6589 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6590 MatchInfo = [=](MachineIRBuilder &B) {
6591 Register X = FMAMI->getOperand(1).getReg();
6592 Register Y = FMAMI->getOperand(2).getReg();
6593 X = B.buildFPExt(DstType, X).getReg(0);
6594 Y = B.buildFPExt(DstType, Y).getReg(0);
6595 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6596 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6597 };
6598
6599 return true;
6600 }
6601 }
6602
6603 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6604 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6605 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6606 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6607 m_GFPExt(m_MInstr(FMulMI))) &&
6608 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6609 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6610 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6611 MatchInfo = [=](MachineIRBuilder &B) {
6612 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6613 FMulMI->getOperand(2).getReg(), LHS.Reg,
6614 RHS.MI->getOperand(1).getReg(),
6615 RHS.MI->getOperand(2).getReg(), B);
6616 };
6617 return true;
6618 }
6619
6620 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6621 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6622 // FIXME: This turns two single-precision and one double-precision
6623 // operation into two double-precision operations, which might not be
6624 // interesting for all targets, especially GPUs.
6625 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6626 FMAMI->getOpcode() == PreferredFusedOpcode) {
6627 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6628 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6629 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6630 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6631 MatchInfo = [=](MachineIRBuilder &B) {
6632 Register X = FMAMI->getOperand(1).getReg();
6633 Register Y = FMAMI->getOperand(2).getReg();
6634 X = B.buildFPExt(DstType, X).getReg(0);
6635 Y = B.buildFPExt(DstType, Y).getReg(0);
6636 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6637 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6638 };
6639 return true;
6640 }
6641 }
6642
6643 return false;
6644}
6645
6648 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6649 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6650
6651 bool AllowFusionGlobally, HasFMAD, Aggressive;
6652 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6653 return false;
6654
6655 Register Op1 = MI.getOperand(1).getReg();
6656 Register Op2 = MI.getOperand(2).getReg();
6657 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6658 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6659 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6660
6661 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6662 // prefer to fold the multiply with fewer uses.
6663 int FirstMulHasFewerUses = true;
6664 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6665 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6666 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6667 FirstMulHasFewerUses = false;
6668
6669 unsigned PreferredFusedOpcode =
6670 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6671
6672 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6673 if (FirstMulHasFewerUses &&
6674 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6675 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6676 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6677 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6678 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6679 {LHS.MI->getOperand(1).getReg(),
6680 LHS.MI->getOperand(2).getReg(), NegZ});
6681 };
6682 return true;
6683 }
6684 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6685 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6686 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6687 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6688 Register NegY =
6689 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6690 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6691 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6692 };
6693 return true;
6694 }
6695
6696 return false;
6697}
6698
6701 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6702 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6703
6704 bool AllowFusionGlobally, HasFMAD, Aggressive;
6705 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6706 return false;
6707
6708 Register LHSReg = MI.getOperand(1).getReg();
6709 Register RHSReg = MI.getOperand(2).getReg();
6710 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6711
6712 unsigned PreferredFusedOpcode =
6713 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6714
6715 MachineInstr *FMulMI;
6716 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6717 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6718 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6719 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6720 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6721 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6722 Register NegX =
6723 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6724 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6725 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6726 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6727 };
6728 return true;
6729 }
6730
6731 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6732 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6733 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6734 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6735 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6736 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6737 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6738 {FMulMI->getOperand(1).getReg(),
6739 FMulMI->getOperand(2).getReg(), LHSReg});
6740 };
6741 return true;
6742 }
6743
6744 return false;
6745}
6746
6749 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6750 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6751
6752 bool AllowFusionGlobally, HasFMAD, Aggressive;
6753 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6754 return false;
6755
6756 Register LHSReg = MI.getOperand(1).getReg();
6757 Register RHSReg = MI.getOperand(2).getReg();
6758 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6759
6760 unsigned PreferredFusedOpcode =
6761 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6762
6763 MachineInstr *FMulMI;
6764 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6765 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6766 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6767 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6768 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6769 Register FpExtX =
6770 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6771 Register FpExtY =
6772 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6773 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6774 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6775 {FpExtX, FpExtY, NegZ});
6776 };
6777 return true;
6778 }
6779
6780 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6781 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6782 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6783 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6784 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6785 Register FpExtY =
6786 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6787 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6788 Register FpExtZ =
6789 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6790 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6791 {NegY, FpExtZ, LHSReg});
6792 };
6793 return true;
6794 }
6795
6796 return false;
6797}
6798
6801 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6802 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6803
6804 bool AllowFusionGlobally, HasFMAD, Aggressive;
6805 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6806 return false;
6807
6808 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6809 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6810 Register LHSReg = MI.getOperand(1).getReg();
6811 Register RHSReg = MI.getOperand(2).getReg();
6812
6813 unsigned PreferredFusedOpcode =
6814 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6815
6816 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6818 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6819 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6820 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6821 };
6822
6823 MachineInstr *FMulMI;
6824 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6825 // (fneg (fma (fpext x), (fpext y), z))
6826 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6827 // (fneg (fma (fpext x), (fpext y), z))
6828 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6829 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6830 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6831 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6832 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6833 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6834 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6835 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6836 FMulMI->getOperand(2).getReg(), RHSReg, B);
6837 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6838 };
6839 return true;
6840 }
6841
6842 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6843 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6844 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6845 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6846 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6847 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6848 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6849 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6850 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6851 FMulMI->getOperand(2).getReg(), LHSReg, B);
6852 };
6853 return true;
6854 }
6855
6856 return false;
6857}
6858
6860 unsigned &IdxToPropagate) const {
6861 bool PropagateNaN;
6862 switch (MI.getOpcode()) {
6863 default:
6864 return false;
6865 case TargetOpcode::G_FMINNUM:
6866 case TargetOpcode::G_FMAXNUM:
6867 PropagateNaN = false;
6868 break;
6869 case TargetOpcode::G_FMINIMUM:
6870 case TargetOpcode::G_FMAXIMUM:
6871 PropagateNaN = true;
6872 break;
6873 }
6874
6875 auto MatchNaN = [&](unsigned Idx) {
6876 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6877 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6878 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6879 return false;
6880 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6881 return true;
6882 };
6883
6884 return MatchNaN(1) || MatchNaN(2);
6885}
6886
6887// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6888// reciprocal.
6889// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6891 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6892 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6893
6894 Register X = MI.getOperand(1).getReg();
6895 Register Y = MI.getOperand(2).getReg();
6896
6897 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6898 return false;
6899
6900 // Skip if current node is a reciprocal/fneg-reciprocal.
6901 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6902 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
6903 return false;
6904
6905 // Exit early if the target does not want this transform or if there can't
6906 // possibly be enough uses of the divisor to make the transform worthwhile.
6907 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6908 if (!MinUses)
6909 return false;
6910
6911 // Find all FDIV users of the same divisor. For the moment we limit all
6912 // instructions to a single BB and use the first Instr in MatchInfo as the
6913 // dominating position.
6914 MatchInfo.push_back(&MI);
6915 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6916 if (&U == &MI || U.getParent() != MI.getParent())
6917 continue;
6918 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6919 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
6920 // This division is eligible for optimization only if global unsafe math
6921 // is enabled or if this division allows reciprocal formation.
6922 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6923 MatchInfo.push_back(&U);
6924 if (dominates(U, *MatchInfo[0]))
6925 std::swap(MatchInfo[0], MatchInfo.back());
6926 }
6927 }
6928 }
6929
6930 // Now that we have the actual number of divisor uses, make sure it meets
6931 // the minimum threshold specified by the target.
6932 return MatchInfo.size() >= MinUses;
6933}
6934
6936 SmallVector<MachineInstr *> &MatchInfo) const {
6937 // Generate the new div at the position of the first instruction, that we have
6938 // ensured will dominate all other instructions.
6939 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6940 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6941 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6942 MatchInfo[0]->getOperand(2).getReg(),
6943 MatchInfo[0]->getFlags());
6944
6945 // Replace all found div's with fmul instructions.
6946 for (MachineInstr *MI : MatchInfo) {
6947 Builder.setInsertPt(*MI->getParent(), MI);
6948 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6949 Div->getOperand(0).getReg(), MI->getFlags());
6950 MI->eraseFromParent();
6951 }
6952}
6953
6955 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6956 Register LHS = MI.getOperand(1).getReg();
6957 Register RHS = MI.getOperand(2).getReg();
6958
6959 // Helper lambda to check for opportunities for
6960 // A + (B - A) -> B
6961 // (B - A) + A -> B
6962 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6963 Register Reg;
6964 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6965 Reg == MaybeSameReg;
6966 };
6967 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6968}
6969
6971 Register &MatchInfo) const {
6972 // This combine folds the following patterns:
6973 //
6974 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6975 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6976 // into
6977 // x
6978 // if
6979 // k == sizeof(VecEltTy)/2
6980 // type(x) == type(dst)
6981 //
6982 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6983 // into
6984 // x
6985 // if
6986 // type(x) == type(dst)
6987
6988 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6989 LLT DstEltTy = DstVecTy.getElementType();
6990
6991 Register Lo, Hi;
6992
6993 if (mi_match(
6994 MI, MRI,
6996 MatchInfo = Lo;
6997 return MRI.getType(MatchInfo) == DstVecTy;
6998 }
6999
7000 std::optional<ValueAndVReg> ShiftAmount;
7001 const auto LoPattern = m_GBitcast(m_Reg(Lo));
7002 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
7003 if (mi_match(
7004 MI, MRI,
7005 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
7006 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
7007 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
7008 MatchInfo = Lo;
7009 return MRI.getType(MatchInfo) == DstVecTy;
7010 }
7011 }
7012
7013 return false;
7014}
7015
7017 Register &MatchInfo) const {
7018 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
7019 // if type(x) == type(G_TRUNC)
7020 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7021 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
7022 return false;
7023
7024 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
7025}
7026
7028 Register &MatchInfo) const {
7029 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
7030 // y if K == size of vector element type
7031 std::optional<ValueAndVReg> ShiftAmt;
7032 if (!mi_match(MI.getOperand(1).getReg(), MRI,
7034 m_GCst(ShiftAmt))))
7035 return false;
7036
7037 LLT MatchTy = MRI.getType(MatchInfo);
7038 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
7039 MatchTy == MRI.getType(MI.getOperand(0).getReg());
7040}
7041
7042unsigned CombinerHelper::getFPMinMaxOpcForSelect(
7043 CmpInst::Predicate Pred, LLT DstTy,
7044 SelectPatternNaNBehaviour VsNaNRetVal) const {
7045 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
7046 "Expected a NaN behaviour?");
7047 // Choose an opcode based off of legality or the behaviour when one of the
7048 // LHS/RHS may be NaN.
7049 switch (Pred) {
7050 default:
7051 return 0;
7052 case CmpInst::FCMP_UGT:
7053 case CmpInst::FCMP_UGE:
7054 case CmpInst::FCMP_OGT:
7055 case CmpInst::FCMP_OGE:
7056 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7057 return TargetOpcode::G_FMAXNUM;
7058 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7059 return TargetOpcode::G_FMAXIMUM;
7060 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
7061 return TargetOpcode::G_FMAXNUM;
7062 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
7063 return TargetOpcode::G_FMAXIMUM;
7064 return 0;
7065 case CmpInst::FCMP_ULT:
7066 case CmpInst::FCMP_ULE:
7067 case CmpInst::FCMP_OLT:
7068 case CmpInst::FCMP_OLE:
7069 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
7070 return TargetOpcode::G_FMINNUM;
7071 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
7072 return TargetOpcode::G_FMINIMUM;
7073 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
7074 return TargetOpcode::G_FMINNUM;
7075 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7076 return 0;
7077 return TargetOpcode::G_FMINIMUM;
7078 }
7079}
7080
7081CombinerHelper::SelectPatternNaNBehaviour
7082CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7083 bool IsOrderedComparison) const {
7084 bool LHSSafe = VT->isKnownNeverNaN(LHS);
7085 bool RHSSafe = VT->isKnownNeverNaN(RHS);
7086 // Completely unsafe.
7087 if (!LHSSafe && !RHSSafe)
7088 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7089 if (LHSSafe && RHSSafe)
7090 return SelectPatternNaNBehaviour::RETURNS_ANY;
7091 // An ordered comparison will return false when given a NaN, so it
7092 // returns the RHS.
7093 if (IsOrderedComparison)
7094 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7095 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7096 // An unordered comparison will return true when given a NaN, so it
7097 // returns the LHS.
7098 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7099 : SelectPatternNaNBehaviour::RETURNS_NAN;
7100}
7101
7102bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7103 Register TrueVal, Register FalseVal,
7104 BuildFnTy &MatchInfo) const {
7105 // Match: select (fcmp cond x, y) x, y
7106 // select (fcmp cond x, y) y, x
7107 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7108 LLT DstTy = MRI.getType(Dst);
7109 // Bail out early on pointers, since we'll never want to fold to a min/max.
7110 if (DstTy.isPointer())
7111 return false;
7112 // Match a floating point compare with a less-than/greater-than predicate.
7113 // TODO: Allow multiple users of the compare if they are all selects.
7114 CmpInst::Predicate Pred;
7115 Register CmpLHS, CmpRHS;
7116 if (!mi_match(Cond, MRI,
7118 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7119 CmpInst::isEquality(Pred))
7120 return false;
7121 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7122 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7123 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7124 return false;
7125 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7126 std::swap(CmpLHS, CmpRHS);
7127 Pred = CmpInst::getSwappedPredicate(Pred);
7128 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7129 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7130 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7131 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7132 }
7133 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7134 return false;
7135 // Decide what type of max/min this should be based off of the predicate.
7136 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7137 if (!Opc || !isLegal({Opc, {DstTy}}))
7138 return false;
7139 // Comparisons between signed zero and zero may have different results...
7140 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7141 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7142 // We don't know if a comparison between two 0s will give us a consistent
7143 // result. Be conservative and only proceed if at least one side is
7144 // non-zero.
7145 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7146 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7147 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7148 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7149 return false;
7150 }
7151 }
7152 MatchInfo = [=](MachineIRBuilder &B) {
7153 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7154 };
7155 return true;
7156}
7157
7159 BuildFnTy &MatchInfo) const {
7160 // TODO: Handle integer cases.
7161 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7162 // Condition may be fed by a truncated compare.
7163 Register Cond = MI.getOperand(1).getReg();
7164 Register MaybeTrunc;
7165 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7166 Cond = MaybeTrunc;
7167 Register Dst = MI.getOperand(0).getReg();
7168 Register TrueVal = MI.getOperand(2).getReg();
7169 Register FalseVal = MI.getOperand(3).getReg();
7170 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7171}
7172
7174 BuildFnTy &MatchInfo) const {
7175 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7176 // (X + Y) == X --> Y == 0
7177 // (X + Y) != X --> Y != 0
7178 // (X - Y) == X --> Y == 0
7179 // (X - Y) != X --> Y != 0
7180 // (X ^ Y) == X --> Y == 0
7181 // (X ^ Y) != X --> Y != 0
7182 Register Dst = MI.getOperand(0).getReg();
7183 CmpInst::Predicate Pred;
7184 Register X, Y, OpLHS, OpRHS;
7185 bool MatchedSub = mi_match(
7186 Dst, MRI,
7187 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7188 if (MatchedSub && X != OpLHS)
7189 return false;
7190 if (!MatchedSub) {
7191 if (!mi_match(Dst, MRI,
7192 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7193 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7194 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7195 return false;
7196 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7197 }
7198 MatchInfo = [=](MachineIRBuilder &B) {
7199 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7200 B.buildICmp(Pred, Dst, Y, Zero);
7201 };
7202 return CmpInst::isEquality(Pred) && Y.isValid();
7203}
7204
7205/// Return the minimum useless shift amount that results in complete loss of the
7206/// source value. Return std::nullopt when it cannot determine a value.
7207static std::optional<unsigned>
7208getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7209 std::optional<int64_t> &Result) {
7210 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7211 Opcode == TargetOpcode::G_ASHR) &&
7212 "Expect G_SHL, G_LSHR or G_ASHR.");
7213 auto SignificantBits = 0;
7214 switch (Opcode) {
7215 case TargetOpcode::G_SHL:
7216 SignificantBits = ValueKB.countMinTrailingZeros();
7217 Result = 0;
7218 break;
7219 case TargetOpcode::G_LSHR:
7220 Result = 0;
7221 SignificantBits = ValueKB.countMinLeadingZeros();
7222 break;
7223 case TargetOpcode::G_ASHR:
7224 if (ValueKB.isNonNegative()) {
7225 SignificantBits = ValueKB.countMinLeadingZeros();
7226 Result = 0;
7227 } else if (ValueKB.isNegative()) {
7228 SignificantBits = ValueKB.countMinLeadingOnes();
7229 Result = -1;
7230 } else {
7231 // Cannot determine shift result.
7232 Result = std::nullopt;
7233 }
7234 break;
7235 default:
7236 break;
7237 }
7238 return ValueKB.getBitWidth() - SignificantBits;
7239}
7240
7242 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7243 Register ShiftVal = MI.getOperand(1).getReg();
7244 Register ShiftReg = MI.getOperand(2).getReg();
7245 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7246 auto IsShiftTooBig = [&](const Constant *C) {
7247 auto *CI = dyn_cast<ConstantInt>(C);
7248 if (!CI)
7249 return false;
7250 if (CI->uge(ResTy.getScalarSizeInBits())) {
7251 MatchInfo = std::nullopt;
7252 return true;
7253 }
7254 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7255 MI.getOpcode(), MatchInfo);
7256 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7257 };
7258 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7259}
7260
7262 unsigned LHSOpndIdx = 1;
7263 unsigned RHSOpndIdx = 2;
7264 switch (MI.getOpcode()) {
7265 case TargetOpcode::G_UADDO:
7266 case TargetOpcode::G_SADDO:
7267 case TargetOpcode::G_UMULO:
7268 case TargetOpcode::G_SMULO:
7269 LHSOpndIdx = 2;
7270 RHSOpndIdx = 3;
7271 break;
7272 default:
7273 break;
7274 }
7275 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7276 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7277 if (!getIConstantVRegVal(LHS, MRI)) {
7278 // Skip commuting if LHS is not a constant. But, LHS may be a
7279 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7280 // have a constant on the RHS.
7281 if (MRI.getVRegDef(LHS)->getOpcode() !=
7282 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7283 return false;
7284 }
7285 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7286 return MRI.getVRegDef(RHS)->getOpcode() !=
7287 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7288 !getIConstantVRegVal(RHS, MRI);
7289}
7290
7292 Register LHS = MI.getOperand(1).getReg();
7293 Register RHS = MI.getOperand(2).getReg();
7294 std::optional<FPValueAndVReg> ValAndVReg;
7295 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7296 return false;
7297 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7298}
7299
7301 Observer.changingInstr(MI);
7302 unsigned LHSOpndIdx = 1;
7303 unsigned RHSOpndIdx = 2;
7304 switch (MI.getOpcode()) {
7305 case TargetOpcode::G_UADDO:
7306 case TargetOpcode::G_SADDO:
7307 case TargetOpcode::G_UMULO:
7308 case TargetOpcode::G_SMULO:
7309 LHSOpndIdx = 2;
7310 RHSOpndIdx = 3;
7311 break;
7312 default:
7313 break;
7314 }
7315 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7316 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7317 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7318 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7319 Observer.changedInstr(MI);
7320}
7321
7322bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7323 LLT SrcTy = MRI.getType(Src);
7324 if (SrcTy.isFixedVector())
7325 return isConstantSplatVector(Src, 1, AllowUndefs);
7326 if (SrcTy.isScalar()) {
7327 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7328 return true;
7329 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7330 return IConstant && IConstant->Value == 1;
7331 }
7332 return false; // scalable vector
7333}
7334
7335bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7336 LLT SrcTy = MRI.getType(Src);
7337 if (SrcTy.isFixedVector())
7338 return isConstantSplatVector(Src, 0, AllowUndefs);
7339 if (SrcTy.isScalar()) {
7340 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7341 return true;
7342 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7343 return IConstant && IConstant->Value == 0;
7344 }
7345 return false; // scalable vector
7346}
7347
7348// Ignores COPYs during conformance checks.
7349// FIXME scalable vectors.
7350bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7351 bool AllowUndefs) const {
7352 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7353 if (!BuildVector)
7354 return false;
7355 unsigned NumSources = BuildVector->getNumSources();
7356
7357 for (unsigned I = 0; I < NumSources; ++I) {
7358 GImplicitDef *ImplicitDef =
7360 if (ImplicitDef && AllowUndefs)
7361 continue;
7362 if (ImplicitDef && !AllowUndefs)
7363 return false;
7364 std::optional<ValueAndVReg> IConstant =
7366 if (IConstant && IConstant->Value == SplatValue)
7367 continue;
7368 return false;
7369 }
7370 return true;
7371}
7372
7373// Ignores COPYs during lookups.
7374// FIXME scalable vectors
7375std::optional<APInt>
7376CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7377 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7378 if (IConstant)
7379 return IConstant->Value;
7380
7381 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7382 if (!BuildVector)
7383 return std::nullopt;
7384 unsigned NumSources = BuildVector->getNumSources();
7385
7386 std::optional<APInt> Value = std::nullopt;
7387 for (unsigned I = 0; I < NumSources; ++I) {
7388 std::optional<ValueAndVReg> IConstant =
7390 if (!IConstant)
7391 return std::nullopt;
7392 if (!Value)
7393 Value = IConstant->Value;
7394 else if (*Value != IConstant->Value)
7395 return std::nullopt;
7396 }
7397 return Value;
7398}
7399
7400// FIXME G_SPLAT_VECTOR
7401bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7402 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7403 if (IConstant)
7404 return true;
7405
7406 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7407 if (!BuildVector)
7408 return false;
7409
7410 unsigned NumSources = BuildVector->getNumSources();
7411 for (unsigned I = 0; I < NumSources; ++I) {
7412 std::optional<ValueAndVReg> IConstant =
7414 if (!IConstant)
7415 return false;
7416 }
7417 return true;
7418}
7419
7420// TODO: use knownbits to determine zeros
7421bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7422 BuildFnTy &MatchInfo) const {
7423 uint32_t Flags = Select->getFlags();
7424 Register Dest = Select->getReg(0);
7425 Register Cond = Select->getCondReg();
7426 Register True = Select->getTrueReg();
7427 Register False = Select->getFalseReg();
7428 LLT CondTy = MRI.getType(Select->getCondReg());
7429 LLT TrueTy = MRI.getType(Select->getTrueReg());
7430
7431 // We only do this combine for scalar boolean conditions.
7432 if (CondTy != LLT::scalar(1))
7433 return false;
7434
7435 if (TrueTy.isPointer())
7436 return false;
7437
7438 // Both are scalars.
7439 std::optional<ValueAndVReg> TrueOpt =
7441 std::optional<ValueAndVReg> FalseOpt =
7443
7444 if (!TrueOpt || !FalseOpt)
7445 return false;
7446
7447 APInt TrueValue = TrueOpt->Value;
7448 APInt FalseValue = FalseOpt->Value;
7449
7450 // select Cond, 1, 0 --> zext (Cond)
7451 if (TrueValue.isOne() && FalseValue.isZero()) {
7452 MatchInfo = [=](MachineIRBuilder &B) {
7453 B.setInstrAndDebugLoc(*Select);
7454 B.buildZExtOrTrunc(Dest, Cond);
7455 };
7456 return true;
7457 }
7458
7459 // select Cond, -1, 0 --> sext (Cond)
7460 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7461 MatchInfo = [=](MachineIRBuilder &B) {
7462 B.setInstrAndDebugLoc(*Select);
7463 B.buildSExtOrTrunc(Dest, Cond);
7464 };
7465 return true;
7466 }
7467
7468 // select Cond, 0, 1 --> zext (!Cond)
7469 if (TrueValue.isZero() && FalseValue.isOne()) {
7470 MatchInfo = [=](MachineIRBuilder &B) {
7471 B.setInstrAndDebugLoc(*Select);
7472 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7473 B.buildNot(Inner, Cond);
7474 B.buildZExtOrTrunc(Dest, Inner);
7475 };
7476 return true;
7477 }
7478
7479 // select Cond, 0, -1 --> sext (!Cond)
7480 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7481 MatchInfo = [=](MachineIRBuilder &B) {
7482 B.setInstrAndDebugLoc(*Select);
7483 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7484 B.buildNot(Inner, Cond);
7485 B.buildSExtOrTrunc(Dest, Inner);
7486 };
7487 return true;
7488 }
7489
7490 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7491 if (TrueValue - 1 == FalseValue) {
7492 MatchInfo = [=](MachineIRBuilder &B) {
7493 B.setInstrAndDebugLoc(*Select);
7494 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7495 B.buildZExtOrTrunc(Inner, Cond);
7496 B.buildAdd(Dest, Inner, False);
7497 };
7498 return true;
7499 }
7500
7501 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7502 if (TrueValue + 1 == FalseValue) {
7503 MatchInfo = [=](MachineIRBuilder &B) {
7504 B.setInstrAndDebugLoc(*Select);
7505 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7506 B.buildSExtOrTrunc(Inner, Cond);
7507 B.buildAdd(Dest, Inner, False);
7508 };
7509 return true;
7510 }
7511
7512 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7513 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7514 MatchInfo = [=](MachineIRBuilder &B) {
7515 B.setInstrAndDebugLoc(*Select);
7516 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7517 B.buildZExtOrTrunc(Inner, Cond);
7518 // The shift amount must be scalar.
7519 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7520 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7521 B.buildShl(Dest, Inner, ShAmtC, Flags);
7522 };
7523 return true;
7524 }
7525
7526 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7527 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7528 MatchInfo = [=](MachineIRBuilder &B) {
7529 B.setInstrAndDebugLoc(*Select);
7530 Register Not = MRI.createGenericVirtualRegister(CondTy);
7531 B.buildNot(Not, Cond);
7532 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7533 B.buildZExtOrTrunc(Inner, Not);
7534 // The shift amount must be scalar.
7535 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7536 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7537 B.buildShl(Dest, Inner, ShAmtC, Flags);
7538 };
7539 return true;
7540 }
7541
7542 // select Cond, -1, C --> or (sext Cond), C
7543 if (TrueValue.isAllOnes()) {
7544 MatchInfo = [=](MachineIRBuilder &B) {
7545 B.setInstrAndDebugLoc(*Select);
7546 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7547 B.buildSExtOrTrunc(Inner, Cond);
7548 B.buildOr(Dest, Inner, False, Flags);
7549 };
7550 return true;
7551 }
7552
7553 // select Cond, C, -1 --> or (sext (not Cond)), C
7554 if (FalseValue.isAllOnes()) {
7555 MatchInfo = [=](MachineIRBuilder &B) {
7556 B.setInstrAndDebugLoc(*Select);
7557 Register Not = MRI.createGenericVirtualRegister(CondTy);
7558 B.buildNot(Not, Cond);
7559 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7560 B.buildSExtOrTrunc(Inner, Not);
7561 B.buildOr(Dest, Inner, True, Flags);
7562 };
7563 return true;
7564 }
7565
7566 return false;
7567}
7568
7569// TODO: use knownbits to determine zeros
7570bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7571 BuildFnTy &MatchInfo) const {
7572 uint32_t Flags = Select->getFlags();
7573 Register DstReg = Select->getReg(0);
7574 Register Cond = Select->getCondReg();
7575 Register True = Select->getTrueReg();
7576 Register False = Select->getFalseReg();
7577 LLT CondTy = MRI.getType(Select->getCondReg());
7578 LLT TrueTy = MRI.getType(Select->getTrueReg());
7579
7580 // Boolean or fixed vector of booleans.
7581 if (CondTy.isScalableVector() ||
7582 (CondTy.isFixedVector() &&
7583 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7584 CondTy.getScalarSizeInBits() != 1)
7585 return false;
7586
7587 if (CondTy != TrueTy)
7588 return false;
7589
7590 // select Cond, Cond, F --> or Cond, F
7591 // select Cond, 1, F --> or Cond, F
7592 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7593 MatchInfo = [=](MachineIRBuilder &B) {
7594 B.setInstrAndDebugLoc(*Select);
7595 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7596 B.buildZExtOrTrunc(Ext, Cond);
7597 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7598 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7599 };
7600 return true;
7601 }
7602
7603 // select Cond, T, Cond --> and Cond, T
7604 // select Cond, T, 0 --> and Cond, T
7605 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7606 MatchInfo = [=](MachineIRBuilder &B) {
7607 B.setInstrAndDebugLoc(*Select);
7608 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7609 B.buildZExtOrTrunc(Ext, Cond);
7610 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7611 B.buildAnd(DstReg, Ext, FreezeTrue);
7612 };
7613 return true;
7614 }
7615
7616 // select Cond, T, 1 --> or (not Cond), T
7617 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7618 MatchInfo = [=](MachineIRBuilder &B) {
7619 B.setInstrAndDebugLoc(*Select);
7620 // First the not.
7621 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7622 B.buildNot(Inner, Cond);
7623 // Then an ext to match the destination register.
7624 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7625 B.buildZExtOrTrunc(Ext, Inner);
7626 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7627 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7628 };
7629 return true;
7630 }
7631
7632 // select Cond, 0, F --> and (not Cond), F
7633 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7634 MatchInfo = [=](MachineIRBuilder &B) {
7635 B.setInstrAndDebugLoc(*Select);
7636 // First the not.
7637 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7638 B.buildNot(Inner, Cond);
7639 // Then an ext to match the destination register.
7640 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7641 B.buildZExtOrTrunc(Ext, Inner);
7642 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7643 B.buildAnd(DstReg, Ext, FreezeFalse);
7644 };
7645 return true;
7646 }
7647
7648 return false;
7649}
7650
7652 BuildFnTy &MatchInfo) const {
7653 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7654 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7655
7656 Register DstReg = Select->getReg(0);
7657 Register True = Select->getTrueReg();
7658 Register False = Select->getFalseReg();
7659 LLT DstTy = MRI.getType(DstReg);
7660
7661 if (DstTy.isPointerOrPointerVector())
7662 return false;
7663
7664 // We want to fold the icmp and replace the select.
7665 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7666 return false;
7667
7668 CmpInst::Predicate Pred = Cmp->getCond();
7669 // We need a larger or smaller predicate for
7670 // canonicalization.
7671 if (CmpInst::isEquality(Pred))
7672 return false;
7673
7674 Register CmpLHS = Cmp->getLHSReg();
7675 Register CmpRHS = Cmp->getRHSReg();
7676
7677 // We can swap CmpLHS and CmpRHS for higher hitrate.
7678 if (True == CmpRHS && False == CmpLHS) {
7679 std::swap(CmpLHS, CmpRHS);
7680 Pred = CmpInst::getSwappedPredicate(Pred);
7681 }
7682
7683 // (icmp X, Y) ? X : Y -> integer minmax.
7684 // see matchSelectPattern in ValueTracking.
7685 // Legality between G_SELECT and integer minmax can differ.
7686 if (True != CmpLHS || False != CmpRHS)
7687 return false;
7688
7689 switch (Pred) {
7690 case ICmpInst::ICMP_UGT:
7691 case ICmpInst::ICMP_UGE: {
7692 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7693 return false;
7694 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7695 return true;
7696 }
7697 case ICmpInst::ICMP_SGT:
7698 case ICmpInst::ICMP_SGE: {
7699 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7700 return false;
7701 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7702 return true;
7703 }
7704 case ICmpInst::ICMP_ULT:
7705 case ICmpInst::ICMP_ULE: {
7706 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7707 return false;
7708 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7709 return true;
7710 }
7711 case ICmpInst::ICMP_SLT:
7712 case ICmpInst::ICMP_SLE: {
7713 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7714 return false;
7715 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7716 return true;
7717 }
7718 default:
7719 return false;
7720 }
7721}
7722
7723// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7725 BuildFnTy &MatchInfo) const {
7726 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7727 Register DestReg = MI.getOperand(0).getReg();
7728 LLT DestTy = MRI.getType(DestReg);
7729
7730 Register X;
7731 Register Sub0;
7732 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7733 if (mi_match(DestReg, MRI,
7734 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7735 m_GSMax(m_Reg(X), NegPattern),
7736 m_GUMin(m_Reg(X), NegPattern),
7737 m_GUMax(m_Reg(X), NegPattern)))))) {
7738 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7739 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7740 if (isLegal({NewOpc, {DestTy}})) {
7741 MatchInfo = [=](MachineIRBuilder &B) {
7742 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7743 };
7744 return true;
7745 }
7746 }
7747
7748 return false;
7749}
7750
7753
7754 if (tryFoldSelectOfConstants(Select, MatchInfo))
7755 return true;
7756
7757 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7758 return true;
7759
7760 return false;
7761}
7762
7763/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7764/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7765/// into a single comparison using range-based reasoning.
7766/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7767bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7768 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7769 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7770 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7771 Register DstReg = Logic->getReg(0);
7772 Register LHS = Logic->getLHSReg();
7773 Register RHS = Logic->getRHSReg();
7774 unsigned Flags = Logic->getFlags();
7775
7776 // We need an G_ICMP on the LHS register.
7777 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7778 if (!Cmp1)
7779 return false;
7780
7781 // We need an G_ICMP on the RHS register.
7782 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7783 if (!Cmp2)
7784 return false;
7785
7786 // We want to fold the icmps.
7787 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7788 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7789 return false;
7790
7791 APInt C1;
7792 APInt C2;
7793 std::optional<ValueAndVReg> MaybeC1 =
7795 if (!MaybeC1)
7796 return false;
7797 C1 = MaybeC1->Value;
7798
7799 std::optional<ValueAndVReg> MaybeC2 =
7801 if (!MaybeC2)
7802 return false;
7803 C2 = MaybeC2->Value;
7804
7805 Register R1 = Cmp1->getLHSReg();
7806 Register R2 = Cmp2->getLHSReg();
7807 CmpInst::Predicate Pred1 = Cmp1->getCond();
7808 CmpInst::Predicate Pred2 = Cmp2->getCond();
7809 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7810 LLT CmpOperandTy = MRI.getType(R1);
7811
7812 if (CmpOperandTy.isPointer())
7813 return false;
7814
7815 // We build ands, adds, and constants of type CmpOperandTy.
7816 // They must be legal to build.
7817 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7818 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7819 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7820 return false;
7821
7822 // Look through add of a constant offset on R1, R2, or both operands. This
7823 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7824 std::optional<APInt> Offset1;
7825 std::optional<APInt> Offset2;
7826 if (R1 != R2) {
7827 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7828 std::optional<ValueAndVReg> MaybeOffset1 =
7830 if (MaybeOffset1) {
7831 R1 = Add->getLHSReg();
7832 Offset1 = MaybeOffset1->Value;
7833 }
7834 }
7835 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7836 std::optional<ValueAndVReg> MaybeOffset2 =
7838 if (MaybeOffset2) {
7839 R2 = Add->getLHSReg();
7840 Offset2 = MaybeOffset2->Value;
7841 }
7842 }
7843 }
7844
7845 if (R1 != R2)
7846 return false;
7847
7848 // We calculate the icmp ranges including maybe offsets.
7849 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7850 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7851 if (Offset1)
7852 CR1 = CR1.subtract(*Offset1);
7853
7854 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7855 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7856 if (Offset2)
7857 CR2 = CR2.subtract(*Offset2);
7858
7859 bool CreateMask = false;
7860 APInt LowerDiff;
7861 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7862 if (!CR) {
7863 // We need non-wrapping ranges.
7864 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7865 return false;
7866
7867 // Check whether we have equal-size ranges that only differ by one bit.
7868 // In that case we can apply a mask to map one range onto the other.
7869 LowerDiff = CR1.getLower() ^ CR2.getLower();
7870 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7871 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7872 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7873 CR1Size != CR2.getUpper() - CR2.getLower())
7874 return false;
7875
7876 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7877 CreateMask = true;
7878 }
7879
7880 if (IsAnd)
7881 CR = CR->inverse();
7882
7883 CmpInst::Predicate NewPred;
7884 APInt NewC, Offset;
7885 CR->getEquivalentICmp(NewPred, NewC, Offset);
7886
7887 // We take the result type of one of the original icmps, CmpTy, for
7888 // the to be build icmp. The operand type, CmpOperandTy, is used for
7889 // the other instructions and constants to be build. The types of
7890 // the parameters and output are the same for add and and. CmpTy
7891 // and the type of DstReg might differ. That is why we zext or trunc
7892 // the icmp into the destination register.
7893
7894 MatchInfo = [=](MachineIRBuilder &B) {
7895 if (CreateMask && Offset != 0) {
7896 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7897 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7898 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7899 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7900 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7901 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7902 B.buildZExtOrTrunc(DstReg, ICmp);
7903 } else if (CreateMask && Offset == 0) {
7904 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7905 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7906 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7907 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7908 B.buildZExtOrTrunc(DstReg, ICmp);
7909 } else if (!CreateMask && Offset != 0) {
7910 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7911 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7912 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7913 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7914 B.buildZExtOrTrunc(DstReg, ICmp);
7915 } else if (!CreateMask && Offset == 0) {
7916 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7917 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7918 B.buildZExtOrTrunc(DstReg, ICmp);
7919 } else {
7920 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7921 }
7922 };
7923 return true;
7924}
7925
7926bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7927 BuildFnTy &MatchInfo) const {
7928 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7929 Register DestReg = Logic->getReg(0);
7930 Register LHS = Logic->getLHSReg();
7931 Register RHS = Logic->getRHSReg();
7932 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7933
7934 // We need a compare on the LHS register.
7935 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7936 if (!Cmp1)
7937 return false;
7938
7939 // We need a compare on the RHS register.
7940 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7941 if (!Cmp2)
7942 return false;
7943
7944 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7945 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7946
7947 // We build one fcmp, want to fold the fcmps, replace the logic op,
7948 // and the fcmps must have the same shape.
7950 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7951 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7952 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7953 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7954 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7955 return false;
7956
7957 CmpInst::Predicate PredL = Cmp1->getCond();
7958 CmpInst::Predicate PredR = Cmp2->getCond();
7959 Register LHS0 = Cmp1->getLHSReg();
7960 Register LHS1 = Cmp1->getRHSReg();
7961 Register RHS0 = Cmp2->getLHSReg();
7962 Register RHS1 = Cmp2->getRHSReg();
7963
7964 if (LHS0 == RHS1 && LHS1 == RHS0) {
7965 // Swap RHS operands to match LHS.
7966 PredR = CmpInst::getSwappedPredicate(PredR);
7967 std::swap(RHS0, RHS1);
7968 }
7969
7970 if (LHS0 == RHS0 && LHS1 == RHS1) {
7971 // We determine the new predicate.
7972 unsigned CmpCodeL = getFCmpCode(PredL);
7973 unsigned CmpCodeR = getFCmpCode(PredR);
7974 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7975 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7976 MatchInfo = [=](MachineIRBuilder &B) {
7977 // The fcmp predicates fill the lower part of the enum.
7978 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7979 if (Pred == FCmpInst::FCMP_FALSE &&
7981 auto False = B.buildConstant(CmpTy, 0);
7982 B.buildZExtOrTrunc(DestReg, False);
7983 } else if (Pred == FCmpInst::FCMP_TRUE &&
7985 auto True =
7986 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7987 CmpTy.isVector() /*isVector*/,
7988 true /*isFP*/));
7989 B.buildZExtOrTrunc(DestReg, True);
7990 } else { // We take the predicate without predicate optimizations.
7991 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7992 B.buildZExtOrTrunc(DestReg, Cmp);
7993 }
7994 };
7995 return true;
7996 }
7997
7998 return false;
7999}
8000
8002 GAnd *And = cast<GAnd>(&MI);
8003
8004 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
8005 return true;
8006
8007 if (tryFoldLogicOfFCmps(And, MatchInfo))
8008 return true;
8009
8010 return false;
8011}
8012
8014 GOr *Or = cast<GOr>(&MI);
8015
8016 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
8017 return true;
8018
8019 if (tryFoldLogicOfFCmps(Or, MatchInfo))
8020 return true;
8021
8022 return false;
8023}
8024
8026 BuildFnTy &MatchInfo) const {
8028
8029 // Addo has no flags
8030 Register Dst = Add->getReg(0);
8031 Register Carry = Add->getReg(1);
8032 Register LHS = Add->getLHSReg();
8033 Register RHS = Add->getRHSReg();
8034 bool IsSigned = Add->isSigned();
8035 LLT DstTy = MRI.getType(Dst);
8036 LLT CarryTy = MRI.getType(Carry);
8037
8038 // Fold addo, if the carry is dead -> add, undef.
8039 if (MRI.use_nodbg_empty(Carry) &&
8040 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
8041 MatchInfo = [=](MachineIRBuilder &B) {
8042 B.buildAdd(Dst, LHS, RHS);
8043 B.buildUndef(Carry);
8044 };
8045 return true;
8046 }
8047
8048 // Canonicalize constant to RHS.
8049 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
8050 if (IsSigned) {
8051 MatchInfo = [=](MachineIRBuilder &B) {
8052 B.buildSAddo(Dst, Carry, RHS, LHS);
8053 };
8054 return true;
8055 }
8056 // !IsSigned
8057 MatchInfo = [=](MachineIRBuilder &B) {
8058 B.buildUAddo(Dst, Carry, RHS, LHS);
8059 };
8060 return true;
8061 }
8062
8063 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
8064 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
8065
8066 // Fold addo(c1, c2) -> c3, carry.
8067 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
8069 bool Overflow;
8070 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
8071 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
8072 MatchInfo = [=](MachineIRBuilder &B) {
8073 B.buildConstant(Dst, Result);
8074 B.buildConstant(Carry, Overflow);
8075 };
8076 return true;
8077 }
8078
8079 // Fold (addo x, 0) -> x, no carry
8080 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8081 MatchInfo = [=](MachineIRBuilder &B) {
8082 B.buildCopy(Dst, LHS);
8083 B.buildConstant(Carry, 0);
8084 };
8085 return true;
8086 }
8087
8088 // Given 2 constant operands whose sum does not overflow:
8089 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8090 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8091 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8092 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8093 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8094 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8095 std::optional<APInt> MaybeAddRHS =
8096 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8097 if (MaybeAddRHS) {
8098 bool Overflow;
8099 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8100 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8101 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8102 if (IsSigned) {
8103 MatchInfo = [=](MachineIRBuilder &B) {
8104 auto ConstRHS = B.buildConstant(DstTy, NewC);
8105 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8106 };
8107 return true;
8108 }
8109 // !IsSigned
8110 MatchInfo = [=](MachineIRBuilder &B) {
8111 auto ConstRHS = B.buildConstant(DstTy, NewC);
8112 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8113 };
8114 return true;
8115 }
8116 }
8117 };
8118
8119 // We try to combine addo to non-overflowing add.
8120 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8122 return false;
8123
8124 // We try to combine uaddo to non-overflowing add.
8125 if (!IsSigned) {
8126 ConstantRange CRLHS =
8127 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8128 ConstantRange CRRHS =
8129 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8130
8131 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8133 return false;
8135 MatchInfo = [=](MachineIRBuilder &B) {
8136 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8137 B.buildConstant(Carry, 0);
8138 };
8139 return true;
8140 }
8143 MatchInfo = [=](MachineIRBuilder &B) {
8144 B.buildAdd(Dst, LHS, RHS);
8145 B.buildConstant(Carry, 1);
8146 };
8147 return true;
8148 }
8149 }
8150 return false;
8151 }
8152
8153 // We try to combine saddo to non-overflowing add.
8154
8155 // If LHS and RHS each have at least two sign bits, then there is no signed
8156 // overflow.
8157 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8158 MatchInfo = [=](MachineIRBuilder &B) {
8159 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8160 B.buildConstant(Carry, 0);
8161 };
8162 return true;
8163 }
8164
8165 ConstantRange CRLHS =
8166 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8167 ConstantRange CRRHS =
8168 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8169
8170 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8172 return false;
8174 MatchInfo = [=](MachineIRBuilder &B) {
8175 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8176 B.buildConstant(Carry, 0);
8177 };
8178 return true;
8179 }
8182 MatchInfo = [=](MachineIRBuilder &B) {
8183 B.buildAdd(Dst, LHS, RHS);
8184 B.buildConstant(Carry, 1);
8185 };
8186 return true;
8187 }
8188 }
8189
8190 return false;
8191}
8192
8194 BuildFnTy &MatchInfo) const {
8196 MatchInfo(Builder);
8197 Root->eraseFromParent();
8198}
8199
8201 int64_t Exponent) const {
8202 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8204}
8205
8207 int64_t Exponent) const {
8208 auto [Dst, Base] = MI.getFirst2Regs();
8209 LLT Ty = MRI.getType(Dst);
8210 int64_t ExpVal = Exponent;
8211
8212 if (ExpVal == 0) {
8213 Builder.buildFConstant(Dst, 1.0);
8214 MI.removeFromParent();
8215 return;
8216 }
8217
8218 if (ExpVal < 0)
8219 ExpVal = -ExpVal;
8220
8221 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8222 // to generate the multiply sequence. There are more optimal ways to do this
8223 // (for example, powi(x,15) generates one more multiply than it should), but
8224 // this has the benefit of being both really simple and much better than a
8225 // libcall.
8226 std::optional<SrcOp> Res;
8227 SrcOp CurSquare = Base;
8228 while (ExpVal > 0) {
8229 if (ExpVal & 1) {
8230 if (!Res)
8231 Res = CurSquare;
8232 else
8233 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8234 }
8235
8236 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8237 ExpVal >>= 1;
8238 }
8239
8240 // If the original exponent was negative, invert the result, producing
8241 // 1/(x*x*x).
8242 if (Exponent < 0)
8243 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8244 MI.getFlags());
8245
8246 Builder.buildCopy(Dst, *Res);
8247 MI.eraseFromParent();
8248}
8249
8251 BuildFnTy &MatchInfo) const {
8252 // fold (A+C1)-C2 -> A+(C1-C2)
8253 const GSub *Sub = cast<GSub>(&MI);
8254 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8255
8256 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8257 return false;
8258
8259 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8260 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8261
8262 Register Dst = Sub->getReg(0);
8263 LLT DstTy = MRI.getType(Dst);
8264
8265 MatchInfo = [=](MachineIRBuilder &B) {
8266 auto Const = B.buildConstant(DstTy, C1 - C2);
8267 B.buildAdd(Dst, Add->getLHSReg(), Const);
8268 };
8269
8270 return true;
8271}
8272
8274 BuildFnTy &MatchInfo) const {
8275 // fold C2-(A+C1) -> (C2-C1)-A
8276 const GSub *Sub = cast<GSub>(&MI);
8277 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8278
8279 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8280 return false;
8281
8282 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8283 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8284
8285 Register Dst = Sub->getReg(0);
8286 LLT DstTy = MRI.getType(Dst);
8287
8288 MatchInfo = [=](MachineIRBuilder &B) {
8289 auto Const = B.buildConstant(DstTy, C2 - C1);
8290 B.buildSub(Dst, Const, Add->getLHSReg());
8291 };
8292
8293 return true;
8294}
8295
8297 BuildFnTy &MatchInfo) const {
8298 // fold (A-C1)-C2 -> A-(C1+C2)
8299 const GSub *Sub1 = cast<GSub>(&MI);
8300 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8301
8302 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8303 return false;
8304
8305 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8306 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8307
8308 Register Dst = Sub1->getReg(0);
8309 LLT DstTy = MRI.getType(Dst);
8310
8311 MatchInfo = [=](MachineIRBuilder &B) {
8312 auto Const = B.buildConstant(DstTy, C1 + C2);
8313 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8314 };
8315
8316 return true;
8317}
8318
8320 BuildFnTy &MatchInfo) const {
8321 // fold (C1-A)-C2 -> (C1-C2)-A
8322 const GSub *Sub1 = cast<GSub>(&MI);
8323 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8324
8325 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8326 return false;
8327
8328 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8329 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8330
8331 Register Dst = Sub1->getReg(0);
8332 LLT DstTy = MRI.getType(Dst);
8333
8334 MatchInfo = [=](MachineIRBuilder &B) {
8335 auto Const = B.buildConstant(DstTy, C1 - C2);
8336 B.buildSub(Dst, Const, Sub2->getRHSReg());
8337 };
8338
8339 return true;
8340}
8341
8343 BuildFnTy &MatchInfo) const {
8344 // fold ((A-C1)+C2) -> (A+(C2-C1))
8345 const GAdd *Add = cast<GAdd>(&MI);
8346 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8347
8348 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8349 return false;
8350
8351 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8352 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8353
8354 Register Dst = Add->getReg(0);
8355 LLT DstTy = MRI.getType(Dst);
8356
8357 MatchInfo = [=](MachineIRBuilder &B) {
8358 auto Const = B.buildConstant(DstTy, C2 - C1);
8359 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8360 };
8361
8362 return true;
8363}
8364
8366 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8367 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8368
8369 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8370 return false;
8371
8372 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8373
8374 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8375
8376 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8377 // $any:_(<8 x s16>) = G_ANYEXT $bv
8378 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8379 //
8380 // ->
8381 //
8382 // $any:_(s16) = G_ANYEXT $bv[0]
8383 // $any1:_(s16) = G_ANYEXT $bv[1]
8384 // $any2:_(s16) = G_ANYEXT $bv[2]
8385 // $any3:_(s16) = G_ANYEXT $bv[3]
8386 // $any4:_(s16) = G_ANYEXT $bv[4]
8387 // $any5:_(s16) = G_ANYEXT $bv[5]
8388 // $any6:_(s16) = G_ANYEXT $bv[6]
8389 // $any7:_(s16) = G_ANYEXT $bv[7]
8390 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8391 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8392
8393 // We want to unmerge into vectors.
8394 if (!DstTy.isFixedVector())
8395 return false;
8396
8397 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8398 if (!Any)
8399 return false;
8400
8401 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8402
8403 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8404 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8405
8406 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8407 return false;
8408
8409 // FIXME: check element types?
8410 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8411 return false;
8412
8413 LLT BigBvTy = MRI.getType(BV->getReg(0));
8414 LLT SmallBvTy = DstTy;
8415 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8416
8418 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8419 return false;
8420
8421 // We check the legality of scalar anyext.
8423 {TargetOpcode::G_ANYEXT,
8424 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8425 return false;
8426
8427 MatchInfo = [=](MachineIRBuilder &B) {
8428 // Build into each G_UNMERGE_VALUES def
8429 // a small build vector with anyext from the source build vector.
8430 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8432 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8433 Register SourceArray =
8434 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8435 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8436 Ops.push_back(AnyExt.getReg(0));
8437 }
8438 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8439 };
8440 };
8441 return true;
8442 };
8443
8444 return false;
8445}
8446
8448 BuildFnTy &MatchInfo) const {
8449
8450 bool Changed = false;
8451 auto &Shuffle = cast<GShuffleVector>(MI);
8452 ArrayRef<int> OrigMask = Shuffle.getMask();
8453 SmallVector<int, 16> NewMask;
8454 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8455 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8456 const unsigned NumDstElts = OrigMask.size();
8457 for (unsigned i = 0; i != NumDstElts; ++i) {
8458 int Idx = OrigMask[i];
8459 if (Idx >= (int)NumSrcElems) {
8460 Idx = -1;
8461 Changed = true;
8462 }
8463 NewMask.push_back(Idx);
8464 }
8465
8466 if (!Changed)
8467 return false;
8468
8469 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8470 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8471 std::move(NewMask));
8472 };
8473
8474 return true;
8475}
8476
8477static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8478 const unsigned MaskSize = Mask.size();
8479 for (unsigned I = 0; I < MaskSize; ++I) {
8480 int Idx = Mask[I];
8481 if (Idx < 0)
8482 continue;
8483
8484 if (Idx < (int)NumElems)
8485 Mask[I] = Idx + NumElems;
8486 else
8487 Mask[I] = Idx - NumElems;
8488 }
8489}
8490
8492 BuildFnTy &MatchInfo) const {
8493
8494 auto &Shuffle = cast<GShuffleVector>(MI);
8495 // If any of the two inputs is already undef, don't check the mask again to
8496 // prevent infinite loop
8497 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8498 return false;
8499
8500 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8501 return false;
8502
8503 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8504 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8506 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8507 return false;
8508
8509 ArrayRef<int> Mask = Shuffle.getMask();
8510 const unsigned NumSrcElems = Src1Ty.getNumElements();
8511
8512 bool TouchesSrc1 = false;
8513 bool TouchesSrc2 = false;
8514 const unsigned NumElems = Mask.size();
8515 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8516 if (Mask[Idx] < 0)
8517 continue;
8518
8519 if (Mask[Idx] < (int)NumSrcElems)
8520 TouchesSrc1 = true;
8521 else
8522 TouchesSrc2 = true;
8523 }
8524
8525 if (TouchesSrc1 == TouchesSrc2)
8526 return false;
8527
8528 Register NewSrc1 = Shuffle.getSrc1Reg();
8529 SmallVector<int, 16> NewMask(Mask);
8530 if (TouchesSrc2) {
8531 NewSrc1 = Shuffle.getSrc2Reg();
8532 commuteMask(NewMask, NumSrcElems);
8533 }
8534
8535 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8536 auto Undef = B.buildUndef(Src1Ty);
8537 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8538 };
8539
8540 return true;
8541}
8542
8544 BuildFnTy &MatchInfo) const {
8545 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8546
8547 Register Dst = Subo->getReg(0);
8548 Register LHS = Subo->getLHSReg();
8549 Register RHS = Subo->getRHSReg();
8550 Register Carry = Subo->getCarryOutReg();
8551 LLT DstTy = MRI.getType(Dst);
8552 LLT CarryTy = MRI.getType(Carry);
8553
8554 // Check legality before known bits.
8555 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8557 return false;
8558
8559 ConstantRange KBLHS =
8560 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8561 /* IsSigned=*/Subo->isSigned());
8562 ConstantRange KBRHS =
8563 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8564 /* IsSigned=*/Subo->isSigned());
8565
8566 if (Subo->isSigned()) {
8567 // G_SSUBO
8568 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8570 return false;
8572 MatchInfo = [=](MachineIRBuilder &B) {
8573 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8574 B.buildConstant(Carry, 0);
8575 };
8576 return true;
8577 }
8580 MatchInfo = [=](MachineIRBuilder &B) {
8581 B.buildSub(Dst, LHS, RHS);
8582 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8583 /*isVector=*/CarryTy.isVector(),
8584 /*isFP=*/false));
8585 };
8586 return true;
8587 }
8588 }
8589 return false;
8590 }
8591
8592 // G_USUBO
8593 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8595 return false;
8597 MatchInfo = [=](MachineIRBuilder &B) {
8598 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8599 B.buildConstant(Carry, 0);
8600 };
8601 return true;
8602 }
8605 MatchInfo = [=](MachineIRBuilder &B) {
8606 B.buildSub(Dst, LHS, RHS);
8607 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8608 /*isVector=*/CarryTy.isVector(),
8609 /*isFP=*/false));
8610 };
8611 return true;
8612 }
8613 }
8614
8615 return false;
8616}
8617
8618// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8619// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8621 BuildFnTy &MatchInfo) const {
8622 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8623 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) &&
8624 "Expected G_CTLZ variant");
8625
8626 const Register Dst = CtlzMI.getOperand(0).getReg();
8627 Register Src = CtlzMI.getOperand(1).getReg();
8628
8629 LLT Ty = MRI.getType(Dst);
8630 LLT SrcTy = MRI.getType(Src);
8631
8632 if (!(Ty.isValid() && Ty.isScalar()))
8633 return false;
8634
8635 if (!LI)
8636 return false;
8637
8638 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8639 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8640
8641 switch (LI->getAction(Query).Action) {
8642 default:
8643 return false;
8647 break;
8648 }
8649
8650 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8651 Register V;
8652 bool NeedAdd = true;
8653 if (mi_match(Src, MRI,
8655 m_SpecificICst(1))))) {
8656 NeedAdd = false;
8657 Src = V;
8658 }
8659
8660 unsigned BitWidth = Ty.getScalarSizeInBits();
8661
8662 Register X;
8663 if (!mi_match(Src, MRI,
8666 m_SpecificICst(BitWidth - 1)))))))
8667 return false;
8668
8669 MatchInfo = [=](MachineIRBuilder &B) {
8670 if (!NeedAdd) {
8671 B.buildCTLS(Dst, X);
8672 return;
8673 }
8674
8675 auto Ctls = B.buildCTLS(Ty, X);
8676 auto One = B.buildConstant(Ty, 1);
8677
8678 B.buildAdd(Dst, Ctls, One);
8679 };
8680
8681 return true;
8682}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
const fltSemantics & getSemantics() const
Definition APFloat.h:1524
bool isNaN() const
Definition APFloat.h:1514
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1290
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1075
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1806
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1083
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1316
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1027
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchBinopWithNeg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold a bitwiseop (~b +/- c) -> a bitwiseop ~(b -/+ c)
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValue() const
Definition Constants.h:464
const APFloat & getValueAPF() const
Definition Constants.h:463
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:216
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1423
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:1977
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:460
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:293
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1383
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1548
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:740
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1506
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1530
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:493
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1563
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1595
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:671
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:304
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1486
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:199
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1416
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:907
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:278
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:446
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1519
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1620
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:468
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:500
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1401
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:229
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...