LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
228 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
229 "Invalid instruction");
230 bool IsUndef = true;
231 MachineInstr *Undef = nullptr;
232
233 // Walk over all the operands of concat vectors and check if they are
234 // build_vector themselves or undef.
235 // Then collect their operands in Ops.
236 for (const MachineOperand &MO : MI.uses()) {
237 Register Reg = MO.getReg();
238 MachineInstr *Def = MRI.getVRegDef(Reg);
239 assert(Def && "Operand not defined");
240 if (!MRI.hasOneNonDBGUse(Reg))
241 return false;
242 switch (Def->getOpcode()) {
243 case TargetOpcode::G_BUILD_VECTOR:
244 IsUndef = false;
245 // Remember the operands of the build_vector to fold
246 // them into the yet-to-build flattened concat vectors.
247 for (const MachineOperand &BuildVecMO : Def->uses())
248 Ops.push_back(BuildVecMO.getReg());
249 break;
250 case TargetOpcode::G_IMPLICIT_DEF: {
251 LLT OpType = MRI.getType(Reg);
252 // Keep one undef value for all the undef operands.
253 if (!Undef) {
254 Builder.setInsertPt(*MI.getParent(), MI);
255 Undef = Builder.buildUndef(OpType.getScalarType());
256 }
257 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
258 OpType.getScalarType() &&
259 "All undefs should have the same type");
260 // Break the undef vector in as many scalar elements as needed
261 // for the flattening.
262 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
263 EltIdx != EltEnd; ++EltIdx)
264 Ops.push_back(Undef->getOperand(0).getReg());
265 break;
266 }
267 default:
268 return false;
269 }
270 }
271
272 // Check if the combine is illegal
273 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
275 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
276 return false;
277 }
278
279 if (IsUndef)
280 Ops.clear();
281
282 return true;
283}
286 // We determined that the concat_vectors can be flatten.
287 // Generate the flattened build_vector.
288 Register DstReg = MI.getOperand(0).getReg();
289 Builder.setInsertPt(*MI.getParent(), MI);
290 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
291
292 // Note: IsUndef is sort of redundant. We could have determine it by
293 // checking that at all Ops are undef. Alternatively, we could have
294 // generate a build_vector of undefs and rely on another combine to
295 // clean that up. For now, given we already gather this information
296 // in matchCombineConcatVectors, just save compile time and issue the
297 // right thing.
298 if (Ops.empty())
299 Builder.buildUndef(NewDstReg);
300 else
301 Builder.buildBuildVector(NewDstReg, Ops);
302 MI.eraseFromParent();
303 replaceRegWith(MRI, DstReg, NewDstReg);
304}
305
308 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
309 auto ConcatMI1 =
310 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
311 auto ConcatMI2 =
312 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
313 if (!ConcatMI1 || !ConcatMI2)
314 return false;
315
316 // Check that the sources of the Concat instructions have the same type
317 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
318 MRI.getType(ConcatMI2->getSourceReg(0)))
319 return false;
320
321 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
322 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
323 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
324 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
325 // Check if the index takes a whole source register from G_CONCAT_VECTORS
326 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
327 if (Mask[i] == -1) {
328 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
329 if (i + j >= Mask.size())
330 return false;
331 if (Mask[i + j] != -1)
332 return false;
333 }
335 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
336 return false;
337 Ops.push_back(0);
338 } else if (Mask[i] % ConcatSrcNumElt == 0) {
339 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
340 if (i + j >= Mask.size())
341 return false;
342 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
343 return false;
344 }
345 // Retrieve the source register from its respective G_CONCAT_VECTORS
346 // instruction
347 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
348 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
349 } else {
350 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
351 ConcatMI1->getNumSources()));
352 }
353 } else {
354 return false;
355 }
356 }
357
359 {TargetOpcode::G_CONCAT_VECTORS,
360 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
361 return false;
362
363 return !Ops.empty();
364}
365
368 LLT SrcTy = MRI.getType(Ops[0]);
369 Register UndefReg = 0;
370
371 for (unsigned i = 0; i < Ops.size(); i++) {
372 if (Ops[i] == 0) {
373 if (UndefReg == 0)
374 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
375 Ops[i] = UndefReg;
376 }
377 }
378
379 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
380 MI.eraseFromParent();
381}
382
385 if (matchCombineShuffleVector(MI, Ops)) {
387 return true;
388 }
389 return false;
390}
391
394 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
395 "Invalid instruction kind");
396 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
397 Register Src1 = MI.getOperand(1).getReg();
398 LLT SrcType = MRI.getType(Src1);
399 // As bizarre as it may look, shuffle vector can actually produce
400 // scalar! This is because at the IR level a <1 x ty> shuffle
401 // vector is perfectly valid.
402 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
403 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
404
405 // If the resulting vector is smaller than the size of the source
406 // vectors being concatenated, we won't be able to replace the
407 // shuffle vector into a concat_vectors.
408 //
409 // Note: We may still be able to produce a concat_vectors fed by
410 // extract_vector_elt and so on. It is less clear that would
411 // be better though, so don't bother for now.
412 //
413 // If the destination is a scalar, the size of the sources doesn't
414 // matter. we will lower the shuffle to a plain copy. This will
415 // work only if the source and destination have the same size. But
416 // that's covered by the next condition.
417 //
418 // TODO: If the size between the source and destination don't match
419 // we could still emit an extract vector element in that case.
420 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
421 return false;
422
423 // Check that the shuffle mask can be broken evenly between the
424 // different sources.
425 if (DstNumElts % SrcNumElts != 0)
426 return false;
427
428 // Mask length is a multiple of the source vector length.
429 // Check if the shuffle is some kind of concatenation of the input
430 // vectors.
431 unsigned NumConcat = DstNumElts / SrcNumElts;
432 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
433 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
434 for (unsigned i = 0; i != DstNumElts; ++i) {
435 int Idx = Mask[i];
436 // Undef value.
437 if (Idx < 0)
438 continue;
439 // Ensure the indices in each SrcType sized piece are sequential and that
440 // the same source is used for the whole piece.
441 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
442 (ConcatSrcs[i / SrcNumElts] >= 0 &&
443 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
444 return false;
445 // Remember which source this index came from.
446 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
447 }
448
449 // The shuffle is concatenating multiple vectors together.
450 // Collect the different operands for that.
451 Register UndefReg;
452 Register Src2 = MI.getOperand(2).getReg();
453 for (auto Src : ConcatSrcs) {
454 if (Src < 0) {
455 if (!UndefReg) {
456 Builder.setInsertPt(*MI.getParent(), MI);
457 UndefReg = Builder.buildUndef(SrcType).getReg(0);
458 }
459 Ops.push_back(UndefReg);
460 } else if (Src == 0)
461 Ops.push_back(Src1);
462 else
463 Ops.push_back(Src2);
464 }
465 return true;
466}
467
469 const ArrayRef<Register> Ops) {
470 Register DstReg = MI.getOperand(0).getReg();
471 Builder.setInsertPt(*MI.getParent(), MI);
472 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
473
474 if (Ops.size() == 1)
475 Builder.buildCopy(NewDstReg, Ops[0]);
476 else
477 Builder.buildMergeLikeInstr(NewDstReg, Ops);
478
479 MI.eraseFromParent();
480 replaceRegWith(MRI, DstReg, NewDstReg);
481}
482
484 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
485 "Invalid instruction kind");
486
487 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
488 return Mask.size() == 1;
489}
490
492 Register DstReg = MI.getOperand(0).getReg();
493 Builder.setInsertPt(*MI.getParent(), MI);
494
495 int I = MI.getOperand(3).getShuffleMask()[0];
496 Register Src1 = MI.getOperand(1).getReg();
497 LLT Src1Ty = MRI.getType(Src1);
498 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
499 Register SrcReg;
500 if (I >= Src1NumElts) {
501 SrcReg = MI.getOperand(2).getReg();
502 I -= Src1NumElts;
503 } else if (I >= 0)
504 SrcReg = Src1;
505
506 if (I < 0)
507 Builder.buildUndef(DstReg);
508 else if (!MRI.getType(SrcReg).isVector())
509 Builder.buildCopy(DstReg, SrcReg);
510 else
512
513 MI.eraseFromParent();
514}
515
516namespace {
517
518/// Select a preference between two uses. CurrentUse is the current preference
519/// while *ForCandidate is attributes of the candidate under consideration.
520PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
521 PreferredTuple &CurrentUse,
522 const LLT TyForCandidate,
523 unsigned OpcodeForCandidate,
524 MachineInstr *MIForCandidate) {
525 if (!CurrentUse.Ty.isValid()) {
526 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
527 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
528 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
529 return CurrentUse;
530 }
531
532 // We permit the extend to hoist through basic blocks but this is only
533 // sensible if the target has extending loads. If you end up lowering back
534 // into a load and extend during the legalizer then the end result is
535 // hoisting the extend up to the load.
536
537 // Prefer defined extensions to undefined extensions as these are more
538 // likely to reduce the number of instructions.
539 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
540 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
541 return CurrentUse;
542 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
543 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
544 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
545
546 // Prefer sign extensions to zero extensions as sign-extensions tend to be
547 // more expensive. Don't do this if the load is already a zero-extend load
548 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
549 // later.
550 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
551 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
552 OpcodeForCandidate == TargetOpcode::G_ZEXT)
553 return CurrentUse;
554 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
555 OpcodeForCandidate == TargetOpcode::G_SEXT)
556 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
557 }
558
559 // This is potentially target specific. We've chosen the largest type
560 // because G_TRUNC is usually free. One potential catch with this is that
561 // some targets have a reduced number of larger registers than smaller
562 // registers and this choice potentially increases the live-range for the
563 // larger value.
564 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
565 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
566 }
567 return CurrentUse;
568}
569
570/// Find a suitable place to insert some instructions and insert them. This
571/// function accounts for special cases like inserting before a PHI node.
572/// The current strategy for inserting before PHI's is to duplicate the
573/// instructions for each predecessor. However, while that's ok for G_TRUNC
574/// on most targets since it generally requires no code, other targets/cases may
575/// want to try harder to find a dominating block.
576static void InsertInsnsWithoutSideEffectsBeforeUse(
579 MachineOperand &UseMO)>
580 Inserter) {
581 MachineInstr &UseMI = *UseMO.getParent();
582
583 MachineBasicBlock *InsertBB = UseMI.getParent();
584
585 // If the use is a PHI then we want the predecessor block instead.
586 if (UseMI.isPHI()) {
587 MachineOperand *PredBB = std::next(&UseMO);
588 InsertBB = PredBB->getMBB();
589 }
590
591 // If the block is the same block as the def then we want to insert just after
592 // the def instead of at the start of the block.
593 if (InsertBB == DefMI.getParent()) {
595 Inserter(InsertBB, std::next(InsertPt), UseMO);
596 return;
597 }
598
599 // Otherwise we want the start of the BB
600 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
601}
602} // end anonymous namespace
603
605 PreferredTuple Preferred;
606 if (matchCombineExtendingLoads(MI, Preferred)) {
607 applyCombineExtendingLoads(MI, Preferred);
608 return true;
609 }
610 return false;
611}
612
613static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
614 unsigned CandidateLoadOpc;
615 switch (ExtOpc) {
616 case TargetOpcode::G_ANYEXT:
617 CandidateLoadOpc = TargetOpcode::G_LOAD;
618 break;
619 case TargetOpcode::G_SEXT:
620 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
621 break;
622 case TargetOpcode::G_ZEXT:
623 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
624 break;
625 default:
626 llvm_unreachable("Unexpected extend opc");
627 }
628 return CandidateLoadOpc;
629}
630
632 PreferredTuple &Preferred) {
633 // We match the loads and follow the uses to the extend instead of matching
634 // the extends and following the def to the load. This is because the load
635 // must remain in the same position for correctness (unless we also add code
636 // to find a safe place to sink it) whereas the extend is freely movable.
637 // It also prevents us from duplicating the load for the volatile case or just
638 // for performance.
639 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
640 if (!LoadMI)
641 return false;
642
643 Register LoadReg = LoadMI->getDstReg();
644
645 LLT LoadValueTy = MRI.getType(LoadReg);
646 if (!LoadValueTy.isScalar())
647 return false;
648
649 // Most architectures are going to legalize <s8 loads into at least a 1 byte
650 // load, and the MMOs can only describe memory accesses in multiples of bytes.
651 // If we try to perform extload combining on those, we can end up with
652 // %a(s8) = extload %ptr (load 1 byte from %ptr)
653 // ... which is an illegal extload instruction.
654 if (LoadValueTy.getSizeInBits() < 8)
655 return false;
656
657 // For non power-of-2 types, they will very likely be legalized into multiple
658 // loads. Don't bother trying to match them into extending loads.
659 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
660 return false;
661
662 // Find the preferred type aside from the any-extends (unless it's the only
663 // one) and non-extending ops. We'll emit an extending load to that type and
664 // and emit a variant of (extend (trunc X)) for the others according to the
665 // relative type sizes. At the same time, pick an extend to use based on the
666 // extend involved in the chosen type.
667 unsigned PreferredOpcode =
668 isa<GLoad>(&MI)
669 ? TargetOpcode::G_ANYEXT
670 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
671 Preferred = {LLT(), PreferredOpcode, nullptr};
672 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
673 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
674 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
675 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
676 const auto &MMO = LoadMI->getMMO();
677 // Don't do anything for atomics.
678 if (MMO.isAtomic())
679 continue;
680 // Check for legality.
681 if (!isPreLegalize()) {
682 LegalityQuery::MemDesc MMDesc(MMO);
683 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
684 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
685 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
686 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
687 .Action != LegalizeActions::Legal)
688 continue;
689 }
690 Preferred = ChoosePreferredUse(MI, Preferred,
691 MRI.getType(UseMI.getOperand(0).getReg()),
692 UseMI.getOpcode(), &UseMI);
693 }
694 }
695
696 // There were no extends
697 if (!Preferred.MI)
698 return false;
699 // It should be impossible to chose an extend without selecting a different
700 // type since by definition the result of an extend is larger.
701 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
702
703 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
704 return true;
705}
706
708 PreferredTuple &Preferred) {
709 // Rewrite the load to the chosen extending load.
710 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
711
712 // Inserter to insert a truncate back to the original type at a given point
713 // with some basic CSE to limit truncate duplication to one per BB.
715 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
716 MachineBasicBlock::iterator InsertBefore,
717 MachineOperand &UseMO) {
718 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
719 if (PreviouslyEmitted) {
721 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
723 return;
724 }
725
726 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
727 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
728 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
729 EmittedInsns[InsertIntoBB] = NewMI;
730 replaceRegOpWith(MRI, UseMO, NewDstReg);
731 };
732
734 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
735 MI.setDesc(Builder.getTII().get(LoadOpc));
736
737 // Rewrite all the uses to fix up the types.
738 auto &LoadValue = MI.getOperand(0);
740 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
741 Uses.push_back(&UseMO);
742
743 for (auto *UseMO : Uses) {
744 MachineInstr *UseMI = UseMO->getParent();
745
746 // If the extend is compatible with the preferred extend then we should fix
747 // up the type and extend so that it uses the preferred use.
748 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
749 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
750 Register UseDstReg = UseMI->getOperand(0).getReg();
751 MachineOperand &UseSrcMO = UseMI->getOperand(1);
752 const LLT UseDstTy = MRI.getType(UseDstReg);
753 if (UseDstReg != ChosenDstReg) {
754 if (Preferred.Ty == UseDstTy) {
755 // If the use has the same type as the preferred use, then merge
756 // the vregs and erase the extend. For example:
757 // %1:_(s8) = G_LOAD ...
758 // %2:_(s32) = G_SEXT %1(s8)
759 // %3:_(s32) = G_ANYEXT %1(s8)
760 // ... = ... %3(s32)
761 // rewrites to:
762 // %2:_(s32) = G_SEXTLOAD ...
763 // ... = ... %2(s32)
764 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
766 UseMO->getParent()->eraseFromParent();
767 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
768 // If the preferred size is smaller, then keep the extend but extend
769 // from the result of the extending load. For example:
770 // %1:_(s8) = G_LOAD ...
771 // %2:_(s32) = G_SEXT %1(s8)
772 // %3:_(s64) = G_ANYEXT %1(s8)
773 // ... = ... %3(s64)
774 /// rewrites to:
775 // %2:_(s32) = G_SEXTLOAD ...
776 // %3:_(s64) = G_ANYEXT %2:_(s32)
777 // ... = ... %3(s64)
778 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
779 } else {
780 // If the preferred size is large, then insert a truncate. For
781 // example:
782 // %1:_(s8) = G_LOAD ...
783 // %2:_(s64) = G_SEXT %1(s8)
784 // %3:_(s32) = G_ZEXT %1(s8)
785 // ... = ... %3(s32)
786 /// rewrites to:
787 // %2:_(s64) = G_SEXTLOAD ...
788 // %4:_(s8) = G_TRUNC %2:_(s32)
789 // %3:_(s64) = G_ZEXT %2:_(s8)
790 // ... = ... %3(s64)
791 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
792 InsertTruncAt);
793 }
794 continue;
795 }
796 // The use is (one of) the uses of the preferred use we chose earlier.
797 // We're going to update the load to def this value later so just erase
798 // the old extend.
800 UseMO->getParent()->eraseFromParent();
801 continue;
802 }
803
804 // The use isn't an extend. Truncate back to the type we originally loaded.
805 // This is free on many targets.
806 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
807 }
808
809 MI.getOperand(0).setReg(ChosenDstReg);
811}
812
814 BuildFnTy &MatchInfo) {
815 assert(MI.getOpcode() == TargetOpcode::G_AND);
816
817 // If we have the following code:
818 // %mask = G_CONSTANT 255
819 // %ld = G_LOAD %ptr, (load s16)
820 // %and = G_AND %ld, %mask
821 //
822 // Try to fold it into
823 // %ld = G_ZEXTLOAD %ptr, (load s8)
824
825 Register Dst = MI.getOperand(0).getReg();
826 if (MRI.getType(Dst).isVector())
827 return false;
828
829 auto MaybeMask =
830 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
831 if (!MaybeMask)
832 return false;
833
834 APInt MaskVal = MaybeMask->Value;
835
836 if (!MaskVal.isMask())
837 return false;
838
839 Register SrcReg = MI.getOperand(1).getReg();
840 // Don't use getOpcodeDef() here since intermediate instructions may have
841 // multiple users.
842 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
843 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
844 return false;
845
846 Register LoadReg = LoadMI->getDstReg();
847 LLT RegTy = MRI.getType(LoadReg);
848 Register PtrReg = LoadMI->getPointerReg();
849 unsigned RegSize = RegTy.getSizeInBits();
850 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
851 unsigned MaskSizeBits = MaskVal.countr_one();
852
853 // The mask may not be larger than the in-memory type, as it might cover sign
854 // extended bits
855 if (MaskSizeBits > LoadSizeBits.getValue())
856 return false;
857
858 // If the mask covers the whole destination register, there's nothing to
859 // extend
860 if (MaskSizeBits >= RegSize)
861 return false;
862
863 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
864 // at least byte loads. Avoid creating such loads here
865 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
866 return false;
867
868 const MachineMemOperand &MMO = LoadMI->getMMO();
869 LegalityQuery::MemDesc MemDesc(MMO);
870
871 // Don't modify the memory access size if this is atomic/volatile, but we can
872 // still adjust the opcode to indicate the high bit behavior.
873 if (LoadMI->isSimple())
874 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
875 else if (LoadSizeBits.getValue() > MaskSizeBits ||
876 LoadSizeBits.getValue() == RegSize)
877 return false;
878
879 // TODO: Could check if it's legal with the reduced or original memory size.
881 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
882 return false;
883
884 MatchInfo = [=](MachineIRBuilder &B) {
885 B.setInstrAndDebugLoc(*LoadMI);
886 auto &MF = B.getMF();
887 auto PtrInfo = MMO.getPointerInfo();
888 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
889 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
890 LoadMI->eraseFromParent();
891 };
892 return true;
893}
894
896 const MachineInstr &UseMI) {
897 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
898 "shouldn't consider debug uses");
899 assert(DefMI.getParent() == UseMI.getParent());
900 if (&DefMI == &UseMI)
901 return true;
902 const MachineBasicBlock &MBB = *DefMI.getParent();
903 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
904 return &MI == &DefMI || &MI == &UseMI;
905 });
906 if (DefOrUse == MBB.end())
907 llvm_unreachable("Block must contain both DefMI and UseMI!");
908 return &*DefOrUse == &DefMI;
909}
910
912 const MachineInstr &UseMI) {
913 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
914 "shouldn't consider debug uses");
915 if (MDT)
916 return MDT->dominates(&DefMI, &UseMI);
917 else if (DefMI.getParent() != UseMI.getParent())
918 return false;
919
920 return isPredecessor(DefMI, UseMI);
921}
922
924 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
925 Register SrcReg = MI.getOperand(1).getReg();
926 Register LoadUser = SrcReg;
927
928 if (MRI.getType(SrcReg).isVector())
929 return false;
930
931 Register TruncSrc;
932 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
933 LoadUser = TruncSrc;
934
935 uint64_t SizeInBits = MI.getOperand(2).getImm();
936 // If the source is a G_SEXTLOAD from the same bit width, then we don't
937 // need any extend at all, just a truncate.
938 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
939 // If truncating more than the original extended value, abort.
940 auto LoadSizeBits = LoadMI->getMemSizeInBits();
941 if (TruncSrc &&
942 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
943 return false;
944 if (LoadSizeBits == SizeInBits)
945 return true;
946 }
947 return false;
948}
949
951 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
952 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
953 MI.eraseFromParent();
954}
955
957 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
958 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
959
960 Register DstReg = MI.getOperand(0).getReg();
961 LLT RegTy = MRI.getType(DstReg);
962
963 // Only supports scalars for now.
964 if (RegTy.isVector())
965 return false;
966
967 Register SrcReg = MI.getOperand(1).getReg();
968 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
969 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
970 return false;
971
972 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
973
974 // If the sign extend extends from a narrower width than the load's width,
975 // then we can narrow the load width when we combine to a G_SEXTLOAD.
976 // Avoid widening the load at all.
977 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
978
979 // Don't generate G_SEXTLOADs with a < 1 byte width.
980 if (NewSizeBits < 8)
981 return false;
982 // Don't bother creating a non-power-2 sextload, it will likely be broken up
983 // anyway for most targets.
984 if (!isPowerOf2_32(NewSizeBits))
985 return false;
986
987 const MachineMemOperand &MMO = LoadDef->getMMO();
988 LegalityQuery::MemDesc MMDesc(MMO);
989
990 // Don't modify the memory access size if this is atomic/volatile, but we can
991 // still adjust the opcode to indicate the high bit behavior.
992 if (LoadDef->isSimple())
993 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
994 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
995 return false;
996
997 // TODO: Could check if it's legal with the reduced or original memory size.
998 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
999 {MRI.getType(LoadDef->getDstReg()),
1000 MRI.getType(LoadDef->getPointerReg())},
1001 {MMDesc}}))
1002 return false;
1003
1004 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1005 return true;
1006}
1007
1009 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1010 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1011 Register LoadReg;
1012 unsigned ScalarSizeBits;
1013 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1014 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1015
1016 // If we have the following:
1017 // %ld = G_LOAD %ptr, (load 2)
1018 // %ext = G_SEXT_INREG %ld, 8
1019 // ==>
1020 // %ld = G_SEXTLOAD %ptr (load 1)
1021
1022 auto &MMO = LoadDef->getMMO();
1023 Builder.setInstrAndDebugLoc(*LoadDef);
1024 auto &MF = Builder.getMF();
1025 auto PtrInfo = MMO.getPointerInfo();
1026 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1027 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1028 LoadDef->getPointerReg(), *NewMMO);
1029 MI.eraseFromParent();
1030}
1031
1033 if (Ty.isVector())
1035 Ty.getNumElements());
1036 return IntegerType::get(C, Ty.getSizeInBits());
1037}
1038
1039/// Return true if 'MI' is a load or a store that may be fold it's address
1040/// operand into the load / store addressing mode.
1044 auto *MF = MI->getMF();
1045 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1046 if (!Addr)
1047 return false;
1048
1049 AM.HasBaseReg = true;
1050 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1051 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1052 else
1053 AM.Scale = 1; // [reg +/- reg]
1054
1055 return TLI.isLegalAddressingMode(
1056 MF->getDataLayout(), AM,
1057 getTypeForLLT(MI->getMMO().getMemoryType(),
1058 MF->getFunction().getContext()),
1059 MI->getMMO().getAddrSpace());
1060}
1061
1062static unsigned getIndexedOpc(unsigned LdStOpc) {
1063 switch (LdStOpc) {
1064 case TargetOpcode::G_LOAD:
1065 return TargetOpcode::G_INDEXED_LOAD;
1066 case TargetOpcode::G_STORE:
1067 return TargetOpcode::G_INDEXED_STORE;
1068 case TargetOpcode::G_ZEXTLOAD:
1069 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1070 case TargetOpcode::G_SEXTLOAD:
1071 return TargetOpcode::G_INDEXED_SEXTLOAD;
1072 default:
1073 llvm_unreachable("Unexpected opcode");
1074 }
1075}
1076
1077bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1078 // Check for legality.
1079 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1080 LLT Ty = MRI.getType(LdSt.getReg(0));
1081 LLT MemTy = LdSt.getMMO().getMemoryType();
1083 {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
1084 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1085 SmallVector<LLT> OpTys;
1086 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1087 OpTys = {PtrTy, Ty, Ty};
1088 else
1089 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1090
1091 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1092 return isLegal(Q);
1093}
1094
1096 "post-index-use-threshold", cl::Hidden, cl::init(32),
1097 cl::desc("Number of uses of a base pointer to check before it is no longer "
1098 "considered for post-indexing."));
1099
1100bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1102 bool &RematOffset) {
1103 // We're looking for the following pattern, for either load or store:
1104 // %baseptr:_(p0) = ...
1105 // G_STORE %val(s64), %baseptr(p0)
1106 // %offset:_(s64) = G_CONSTANT i64 -256
1107 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1108 const auto &TLI = getTargetLowering();
1109
1110 Register Ptr = LdSt.getPointerReg();
1111 // If the store is the only use, don't bother.
1112 if (MRI.hasOneNonDBGUse(Ptr))
1113 return false;
1114
1115 if (!isIndexedLoadStoreLegal(LdSt))
1116 return false;
1117
1118 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1119 return false;
1120
1121 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1122 auto *PtrDef = MRI.getVRegDef(Ptr);
1123
1124 unsigned NumUsesChecked = 0;
1125 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1126 if (++NumUsesChecked > PostIndexUseThreshold)
1127 return false; // Try to avoid exploding compile time.
1128
1129 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1130 // The use itself might be dead. This can happen during combines if DCE
1131 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1132 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1133 continue;
1134
1135 // Check the user of this isn't the store, otherwise we'd be generate a
1136 // indexed store defining its own use.
1137 if (StoredValDef == &Use)
1138 continue;
1139
1140 Offset = PtrAdd->getOffsetReg();
1141 if (!ForceLegalIndexing &&
1142 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1143 /*IsPre*/ false, MRI))
1144 continue;
1145
1146 // Make sure the offset calculation is before the potentially indexed op.
1147 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1148 RematOffset = false;
1149 if (!dominates(*OffsetDef, LdSt)) {
1150 // If the offset however is just a G_CONSTANT, we can always just
1151 // rematerialize it where we need it.
1152 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1153 continue;
1154 RematOffset = true;
1155 }
1156
1157 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1158 if (&BasePtrUse == PtrDef)
1159 continue;
1160
1161 // If the user is a later load/store that can be post-indexed, then don't
1162 // combine this one.
1163 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1164 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1165 dominates(LdSt, *BasePtrLdSt) &&
1166 isIndexedLoadStoreLegal(*BasePtrLdSt))
1167 return false;
1168
1169 // Now we're looking for the key G_PTR_ADD instruction, which contains
1170 // the offset add that we want to fold.
1171 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1172 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1173 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1174 // If the use is in a different block, then we may produce worse code
1175 // due to the extra register pressure.
1176 if (BaseUseUse.getParent() != LdSt.getParent())
1177 return false;
1178
1179 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1180 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1181 return false;
1182 }
1183 if (!dominates(LdSt, BasePtrUse))
1184 return false; // All use must be dominated by the load/store.
1185 }
1186 }
1187
1188 Addr = PtrAdd->getReg(0);
1189 Base = PtrAdd->getBaseReg();
1190 return true;
1191 }
1192
1193 return false;
1194}
1195
1196bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1198 auto &MF = *LdSt.getParent()->getParent();
1199 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1200
1201 Addr = LdSt.getPointerReg();
1204 return false;
1205
1206 if (!ForceLegalIndexing &&
1207 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1208 return false;
1209
1210 if (!isIndexedLoadStoreLegal(LdSt))
1211 return false;
1212
1214 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1215 return false;
1216
1217 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1218 // Would require a copy.
1219 if (Base == St->getValueReg())
1220 return false;
1221
1222 // We're expecting one use of Addr in MI, but it could also be the
1223 // value stored, which isn't actually dominated by the instruction.
1224 if (St->getValueReg() == Addr)
1225 return false;
1226 }
1227
1228 // Avoid increasing cross-block register pressure.
1229 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1230 if (AddrUse.getParent() != LdSt.getParent())
1231 return false;
1232
1233 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1234 // That might allow us to end base's liveness here by adjusting the constant.
1235 bool RealUse = false;
1236 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1237 if (!dominates(LdSt, AddrUse))
1238 return false; // All use must be dominated by the load/store.
1239
1240 // If Ptr may be folded in addressing mode of other use, then it's
1241 // not profitable to do this transformation.
1242 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1243 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1244 RealUse = true;
1245 } else {
1246 RealUse = true;
1247 }
1248 }
1249 return RealUse;
1250}
1251
1253 BuildFnTy &MatchInfo) {
1254 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1255
1256 // Check if there is a load that defines the vector being extracted from.
1257 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1258 if (!LoadMI)
1259 return false;
1260
1261 Register Vector = MI.getOperand(1).getReg();
1262 LLT VecEltTy = MRI.getType(Vector).getElementType();
1263
1264 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1265
1266 // Checking whether we should reduce the load width.
1268 return false;
1269
1270 // Check if the defining load is simple.
1271 if (!LoadMI->isSimple())
1272 return false;
1273
1274 // If the vector element type is not a multiple of a byte then we are unable
1275 // to correctly compute an address to load only the extracted element as a
1276 // scalar.
1277 if (!VecEltTy.isByteSized())
1278 return false;
1279
1280 // Check for load fold barriers between the extraction and the load.
1281 if (MI.getParent() != LoadMI->getParent())
1282 return false;
1283 const unsigned MaxIter = 20;
1284 unsigned Iter = 0;
1285 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1286 if (II->isLoadFoldBarrier())
1287 return false;
1288 if (Iter++ == MaxIter)
1289 return false;
1290 }
1291
1292 // Check if the new load that we are going to create is legal
1293 // if we are in the post-legalization phase.
1294 MachineMemOperand MMO = LoadMI->getMMO();
1295 Align Alignment = MMO.getAlign();
1296 MachinePointerInfo PtrInfo;
1298
1299 // Finding the appropriate PtrInfo if offset is a known constant.
1300 // This is required to create the memory operand for the narrowed load.
1301 // This machine memory operand object helps us infer about legality
1302 // before we proceed to combine the instruction.
1303 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1304 int Elt = CVal->getZExtValue();
1305 // FIXME: should be (ABI size)*Elt.
1306 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1307 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1308 } else {
1309 // Discard the pointer info except the address space because the memory
1310 // operand can't represent this new access since the offset is variable.
1311 Offset = VecEltTy.getSizeInBits() / 8;
1313 }
1314
1315 Alignment = commonAlignment(Alignment, Offset);
1316
1317 Register VecPtr = LoadMI->getPointerReg();
1318 LLT PtrTy = MRI.getType(VecPtr);
1319
1320 MachineFunction &MF = *MI.getMF();
1321 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1322
1323 LegalityQuery::MemDesc MMDesc(*NewMMO);
1324
1325 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1326
1328 return false;
1329
1330 // Load must be allowed and fast on the target.
1332 auto &DL = MF.getDataLayout();
1333 unsigned Fast = 0;
1334 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1335 &Fast) ||
1336 !Fast)
1337 return false;
1338
1339 Register Result = MI.getOperand(0).getReg();
1340 Register Index = MI.getOperand(2).getReg();
1341
1342 MatchInfo = [=](MachineIRBuilder &B) {
1343 GISelObserverWrapper DummyObserver;
1344 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1345 //// Get pointer to the vector element.
1346 Register finalPtr = Helper.getVectorElementPointer(
1347 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1348 Index);
1349 // New G_LOAD instruction.
1350 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1351 // Remove original GLOAD instruction.
1352 LoadMI->eraseFromParent();
1353 };
1354
1355 return true;
1356}
1357
1360 auto &LdSt = cast<GLoadStore>(MI);
1361
1362 if (LdSt.isAtomic())
1363 return false;
1364
1365 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1366 MatchInfo.Offset);
1367 if (!MatchInfo.IsPre &&
1368 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1369 MatchInfo.Offset, MatchInfo.RematOffset))
1370 return false;
1371
1372 return true;
1373}
1374
1377 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1378 unsigned Opcode = MI.getOpcode();
1379 bool IsStore = Opcode == TargetOpcode::G_STORE;
1380 unsigned NewOpcode = getIndexedOpc(Opcode);
1381
1382 // If the offset constant didn't happen to dominate the load/store, we can
1383 // just clone it as needed.
1384 if (MatchInfo.RematOffset) {
1385 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1386 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1387 *OldCst->getOperand(1).getCImm());
1388 MatchInfo.Offset = NewCst.getReg(0);
1389 }
1390
1391 auto MIB = Builder.buildInstr(NewOpcode);
1392 if (IsStore) {
1393 MIB.addDef(MatchInfo.Addr);
1394 MIB.addUse(MI.getOperand(0).getReg());
1395 } else {
1396 MIB.addDef(MI.getOperand(0).getReg());
1397 MIB.addDef(MatchInfo.Addr);
1398 }
1399
1400 MIB.addUse(MatchInfo.Base);
1401 MIB.addUse(MatchInfo.Offset);
1402 MIB.addImm(MatchInfo.IsPre);
1403 MIB->cloneMemRefs(*MI.getMF(), MI);
1404 MI.eraseFromParent();
1405 AddrDef.eraseFromParent();
1406
1407 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1408}
1409
1411 MachineInstr *&OtherMI) {
1412 unsigned Opcode = MI.getOpcode();
1413 bool IsDiv, IsSigned;
1414
1415 switch (Opcode) {
1416 default:
1417 llvm_unreachable("Unexpected opcode!");
1418 case TargetOpcode::G_SDIV:
1419 case TargetOpcode::G_UDIV: {
1420 IsDiv = true;
1421 IsSigned = Opcode == TargetOpcode::G_SDIV;
1422 break;
1423 }
1424 case TargetOpcode::G_SREM:
1425 case TargetOpcode::G_UREM: {
1426 IsDiv = false;
1427 IsSigned = Opcode == TargetOpcode::G_SREM;
1428 break;
1429 }
1430 }
1431
1432 Register Src1 = MI.getOperand(1).getReg();
1433 unsigned DivOpcode, RemOpcode, DivremOpcode;
1434 if (IsSigned) {
1435 DivOpcode = TargetOpcode::G_SDIV;
1436 RemOpcode = TargetOpcode::G_SREM;
1437 DivremOpcode = TargetOpcode::G_SDIVREM;
1438 } else {
1439 DivOpcode = TargetOpcode::G_UDIV;
1440 RemOpcode = TargetOpcode::G_UREM;
1441 DivremOpcode = TargetOpcode::G_UDIVREM;
1442 }
1443
1444 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1445 return false;
1446
1447 // Combine:
1448 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1449 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1450 // into:
1451 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1452
1453 // Combine:
1454 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1455 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1456 // into:
1457 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1458
1459 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1460 if (MI.getParent() == UseMI.getParent() &&
1461 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1462 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1463 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1464 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1465 OtherMI = &UseMI;
1466 return true;
1467 }
1468 }
1469
1470 return false;
1471}
1472
1474 MachineInstr *&OtherMI) {
1475 unsigned Opcode = MI.getOpcode();
1476 assert(OtherMI && "OtherMI shouldn't be empty.");
1477
1478 Register DestDivReg, DestRemReg;
1479 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1480 DestDivReg = MI.getOperand(0).getReg();
1481 DestRemReg = OtherMI->getOperand(0).getReg();
1482 } else {
1483 DestDivReg = OtherMI->getOperand(0).getReg();
1484 DestRemReg = MI.getOperand(0).getReg();
1485 }
1486
1487 bool IsSigned =
1488 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1489
1490 // Check which instruction is first in the block so we don't break def-use
1491 // deps by "moving" the instruction incorrectly. Also keep track of which
1492 // instruction is first so we pick it's operands, avoiding use-before-def
1493 // bugs.
1494 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1495 Builder.setInstrAndDebugLoc(*FirstInst);
1496
1497 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1498 : TargetOpcode::G_UDIVREM,
1499 {DestDivReg, DestRemReg},
1500 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1501 MI.eraseFromParent();
1502 OtherMI->eraseFromParent();
1503}
1504
1506 MachineInstr *&BrCond) {
1507 assert(MI.getOpcode() == TargetOpcode::G_BR);
1508
1509 // Try to match the following:
1510 // bb1:
1511 // G_BRCOND %c1, %bb2
1512 // G_BR %bb3
1513 // bb2:
1514 // ...
1515 // bb3:
1516
1517 // The above pattern does not have a fall through to the successor bb2, always
1518 // resulting in a branch no matter which path is taken. Here we try to find
1519 // and replace that pattern with conditional branch to bb3 and otherwise
1520 // fallthrough to bb2. This is generally better for branch predictors.
1521
1522 MachineBasicBlock *MBB = MI.getParent();
1524 if (BrIt == MBB->begin())
1525 return false;
1526 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1527
1528 BrCond = &*std::prev(BrIt);
1529 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1530 return false;
1531
1532 // Check that the next block is the conditional branch target. Also make sure
1533 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1534 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1535 return BrCondTarget != MI.getOperand(0).getMBB() &&
1536 MBB->isLayoutSuccessor(BrCondTarget);
1537}
1538
1540 MachineInstr *&BrCond) {
1541 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1543 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1544 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1545 // this to i1 only since we might not know for sure what kind of
1546 // compare generated the condition value.
1547 auto True = Builder.buildConstant(
1548 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1549 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1550
1551 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1553 MI.getOperand(0).setMBB(FallthroughBB);
1555
1556 // Change the conditional branch to use the inverted condition and
1557 // new target block.
1558 Observer.changingInstr(*BrCond);
1559 BrCond->getOperand(0).setReg(Xor.getReg(0));
1560 BrCond->getOperand(1).setMBB(BrTarget);
1561 Observer.changedInstr(*BrCond);
1562}
1563
1564
1566 MachineIRBuilder HelperBuilder(MI);
1567 GISelObserverWrapper DummyObserver;
1568 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1569 return Helper.lowerMemcpyInline(MI) ==
1571}
1572
1574 MachineIRBuilder HelperBuilder(MI);
1575 GISelObserverWrapper DummyObserver;
1576 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1577 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1579}
1580
1582 const MachineRegisterInfo &MRI,
1583 const APFloat &Val) {
1584 APFloat Result(Val);
1585 switch (MI.getOpcode()) {
1586 default:
1587 llvm_unreachable("Unexpected opcode!");
1588 case TargetOpcode::G_FNEG: {
1589 Result.changeSign();
1590 return Result;
1591 }
1592 case TargetOpcode::G_FABS: {
1593 Result.clearSign();
1594 return Result;
1595 }
1596 case TargetOpcode::G_FPTRUNC: {
1597 bool Unused;
1598 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1600 &Unused);
1601 return Result;
1602 }
1603 case TargetOpcode::G_FSQRT: {
1604 bool Unused;
1606 &Unused);
1607 Result = APFloat(sqrt(Result.convertToDouble()));
1608 break;
1609 }
1610 case TargetOpcode::G_FLOG2: {
1611 bool Unused;
1613 &Unused);
1614 Result = APFloat(log2(Result.convertToDouble()));
1615 break;
1616 }
1617 }
1618 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1619 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1620 // `G_FLOG2` reach here.
1621 bool Unused;
1622 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1623 return Result;
1624}
1625
1627 const ConstantFP *Cst) {
1628 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1629 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1630 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1631 MI.eraseFromParent();
1632}
1633
1635 PtrAddChain &MatchInfo) {
1636 // We're trying to match the following pattern:
1637 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1638 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1639 // -->
1640 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1641
1642 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1643 return false;
1644
1645 Register Add2 = MI.getOperand(1).getReg();
1646 Register Imm1 = MI.getOperand(2).getReg();
1647 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1648 if (!MaybeImmVal)
1649 return false;
1650
1651 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1652 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1653 return false;
1654
1655 Register Base = Add2Def->getOperand(1).getReg();
1656 Register Imm2 = Add2Def->getOperand(2).getReg();
1657 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1658 if (!MaybeImm2Val)
1659 return false;
1660
1661 // Check if the new combined immediate forms an illegal addressing mode.
1662 // Do not combine if it was legal before but would get illegal.
1663 // To do so, we need to find a load/store user of the pointer to get
1664 // the access type.
1665 Type *AccessTy = nullptr;
1666 auto &MF = *MI.getMF();
1667 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1668 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1669 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1670 MF.getFunction().getContext());
1671 break;
1672 }
1673 }
1675 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1676 AMNew.BaseOffs = CombinedImm.getSExtValue();
1677 if (AccessTy) {
1678 AMNew.HasBaseReg = true;
1680 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1681 AMOld.HasBaseReg = true;
1682 unsigned AS = MRI.getType(Add2).getAddressSpace();
1683 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1684 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1685 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1686 return false;
1687 }
1688
1689 // Pass the combined immediate to the apply function.
1690 MatchInfo.Imm = AMNew.BaseOffs;
1691 MatchInfo.Base = Base;
1692 MatchInfo.Bank = getRegBank(Imm2);
1693 return true;
1694}
1695
1697 PtrAddChain &MatchInfo) {
1698 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1699 MachineIRBuilder MIB(MI);
1700 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1701 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1702 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1704 MI.getOperand(1).setReg(MatchInfo.Base);
1705 MI.getOperand(2).setReg(NewOffset.getReg(0));
1707}
1708
1710 RegisterImmPair &MatchInfo) {
1711 // We're trying to match the following pattern with any of
1712 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1713 // %t1 = SHIFT %base, G_CONSTANT imm1
1714 // %root = SHIFT %t1, G_CONSTANT imm2
1715 // -->
1716 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1717
1718 unsigned Opcode = MI.getOpcode();
1719 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1720 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1721 Opcode == TargetOpcode::G_USHLSAT) &&
1722 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1723
1724 Register Shl2 = MI.getOperand(1).getReg();
1725 Register Imm1 = MI.getOperand(2).getReg();
1726 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1727 if (!MaybeImmVal)
1728 return false;
1729
1730 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1731 if (Shl2Def->getOpcode() != Opcode)
1732 return false;
1733
1734 Register Base = Shl2Def->getOperand(1).getReg();
1735 Register Imm2 = Shl2Def->getOperand(2).getReg();
1736 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1737 if (!MaybeImm2Val)
1738 return false;
1739
1740 // Pass the combined immediate to the apply function.
1741 MatchInfo.Imm =
1742 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1743 MatchInfo.Reg = Base;
1744
1745 // There is no simple replacement for a saturating unsigned left shift that
1746 // exceeds the scalar size.
1747 if (Opcode == TargetOpcode::G_USHLSAT &&
1748 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1749 return false;
1750
1751 return true;
1752}
1753
1755 RegisterImmPair &MatchInfo) {
1756 unsigned Opcode = MI.getOpcode();
1757 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1758 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1759 Opcode == TargetOpcode::G_USHLSAT) &&
1760 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1761
1762 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1763 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1764 auto Imm = MatchInfo.Imm;
1765
1766 if (Imm >= ScalarSizeInBits) {
1767 // Any logical shift that exceeds scalar size will produce zero.
1768 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1769 Builder.buildConstant(MI.getOperand(0), 0);
1770 MI.eraseFromParent();
1771 return;
1772 }
1773 // Arithmetic shift and saturating signed left shift have no effect beyond
1774 // scalar size.
1775 Imm = ScalarSizeInBits - 1;
1776 }
1777
1778 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1779 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1781 MI.getOperand(1).setReg(MatchInfo.Reg);
1782 MI.getOperand(2).setReg(NewImm);
1784}
1785
1787 ShiftOfShiftedLogic &MatchInfo) {
1788 // We're trying to match the following pattern with any of
1789 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1790 // with any of G_AND/G_OR/G_XOR logic instructions.
1791 // %t1 = SHIFT %X, G_CONSTANT C0
1792 // %t2 = LOGIC %t1, %Y
1793 // %root = SHIFT %t2, G_CONSTANT C1
1794 // -->
1795 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1796 // %t4 = SHIFT %Y, G_CONSTANT C1
1797 // %root = LOGIC %t3, %t4
1798 unsigned ShiftOpcode = MI.getOpcode();
1799 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1800 ShiftOpcode == TargetOpcode::G_ASHR ||
1801 ShiftOpcode == TargetOpcode::G_LSHR ||
1802 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1803 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1804 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1805
1806 // Match a one-use bitwise logic op.
1807 Register LogicDest = MI.getOperand(1).getReg();
1808 if (!MRI.hasOneNonDBGUse(LogicDest))
1809 return false;
1810
1811 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1812 unsigned LogicOpcode = LogicMI->getOpcode();
1813 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1814 LogicOpcode != TargetOpcode::G_XOR)
1815 return false;
1816
1817 // Find a matching one-use shift by constant.
1818 const Register C1 = MI.getOperand(2).getReg();
1819 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1820 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1821 return false;
1822
1823 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1824
1825 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1826 // Shift should match previous one and should be a one-use.
1827 if (MI->getOpcode() != ShiftOpcode ||
1828 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1829 return false;
1830
1831 // Must be a constant.
1832 auto MaybeImmVal =
1833 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1834 if (!MaybeImmVal)
1835 return false;
1836
1837 ShiftVal = MaybeImmVal->Value.getSExtValue();
1838 return true;
1839 };
1840
1841 // Logic ops are commutative, so check each operand for a match.
1842 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1843 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1844 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1845 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1846 uint64_t C0Val;
1847
1848 if (matchFirstShift(LogicMIOp1, C0Val)) {
1849 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1850 MatchInfo.Shift2 = LogicMIOp1;
1851 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1852 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1853 MatchInfo.Shift2 = LogicMIOp2;
1854 } else
1855 return false;
1856
1857 MatchInfo.ValSum = C0Val + C1Val;
1858
1859 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1860 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1861 return false;
1862
1863 MatchInfo.Logic = LogicMI;
1864 return true;
1865}
1866
1868 ShiftOfShiftedLogic &MatchInfo) {
1869 unsigned Opcode = MI.getOpcode();
1870 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1871 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1872 Opcode == TargetOpcode::G_SSHLSAT) &&
1873 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1874
1875 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1876 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1877
1878 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1879
1880 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1881 Register Shift1 =
1882 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1883
1884 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1885 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1886 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1887 // remove old shift1. And it will cause crash later. So erase it earlier to
1888 // avoid the crash.
1889 MatchInfo.Shift2->eraseFromParent();
1890
1891 Register Shift2Const = MI.getOperand(2).getReg();
1892 Register Shift2 = Builder
1893 .buildInstr(Opcode, {DestType},
1894 {MatchInfo.LogicNonShiftReg, Shift2Const})
1895 .getReg(0);
1896
1897 Register Dest = MI.getOperand(0).getReg();
1898 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1899
1900 // This was one use so it's safe to remove it.
1901 MatchInfo.Logic->eraseFromParent();
1902
1903 MI.eraseFromParent();
1904}
1905
1907 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1908 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1909 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1910 auto &Shl = cast<GenericMachineInstr>(MI);
1911 Register DstReg = Shl.getReg(0);
1912 Register SrcReg = Shl.getReg(1);
1913 Register ShiftReg = Shl.getReg(2);
1914 Register X, C1;
1915
1916 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1917 return false;
1918
1919 if (!mi_match(SrcReg, MRI,
1921 m_GOr(m_Reg(X), m_Reg(C1))))))
1922 return false;
1923
1924 APInt C1Val, C2Val;
1925 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1926 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1927 return false;
1928
1929 auto *SrcDef = MRI.getVRegDef(SrcReg);
1930 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1931 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1932 LLT SrcTy = MRI.getType(SrcReg);
1933 MatchInfo = [=](MachineIRBuilder &B) {
1934 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1935 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
1936 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
1937 };
1938 return true;
1939}
1940
1942 unsigned &ShiftVal) {
1943 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1944 auto MaybeImmVal =
1945 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1946 if (!MaybeImmVal)
1947 return false;
1948
1949 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1950 return (static_cast<int32_t>(ShiftVal) != -1);
1951}
1952
1954 unsigned &ShiftVal) {
1955 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1956 MachineIRBuilder MIB(MI);
1957 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1958 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1960 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1961 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1963}
1964
1965// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1967 RegisterImmPair &MatchData) {
1968 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1969 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
1970 return false;
1971
1972 Register LHS = MI.getOperand(1).getReg();
1973
1974 Register ExtSrc;
1975 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1976 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1977 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1978 return false;
1979
1980 Register RHS = MI.getOperand(2).getReg();
1981 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
1982 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
1983 if (!MaybeShiftAmtVal)
1984 return false;
1985
1986 if (LI) {
1987 LLT SrcTy = MRI.getType(ExtSrc);
1988
1989 // We only really care about the legality with the shifted value. We can
1990 // pick any type the constant shift amount, so ask the target what to
1991 // use. Otherwise we would have to guess and hope it is reported as legal.
1992 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1993 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1994 return false;
1995 }
1996
1997 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
1998 MatchData.Reg = ExtSrc;
1999 MatchData.Imm = ShiftAmt;
2000
2001 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2002 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2003 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2004}
2005
2007 const RegisterImmPair &MatchData) {
2008 Register ExtSrcReg = MatchData.Reg;
2009 int64_t ShiftAmtVal = MatchData.Imm;
2010
2011 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2012 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2013 auto NarrowShift =
2014 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2015 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2016 MI.eraseFromParent();
2017}
2018
2020 Register &MatchInfo) {
2021 GMerge &Merge = cast<GMerge>(MI);
2022 SmallVector<Register, 16> MergedValues;
2023 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2024 MergedValues.emplace_back(Merge.getSourceReg(I));
2025
2026 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2027 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2028 return false;
2029
2030 for (unsigned I = 0; I < MergedValues.size(); ++I)
2031 if (MergedValues[I] != Unmerge->getReg(I))
2032 return false;
2033
2034 MatchInfo = Unmerge->getSourceReg();
2035 return true;
2036}
2037
2039 const MachineRegisterInfo &MRI) {
2040 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2041 ;
2042
2043 return Reg;
2044}
2045
2048 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2049 "Expected an unmerge");
2050 auto &Unmerge = cast<GUnmerge>(MI);
2051 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2052
2053 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2054 if (!SrcInstr)
2055 return false;
2056
2057 // Check the source type of the merge.
2058 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2059 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2060 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2061 if (SrcMergeTy != Dst0Ty && !SameSize)
2062 return false;
2063 // They are the same now (modulo a bitcast).
2064 // We can collect all the src registers.
2065 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2066 Operands.push_back(SrcInstr->getSourceReg(Idx));
2067 return true;
2068}
2069
2072 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2073 "Expected an unmerge");
2074 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2075 "Not enough operands to replace all defs");
2076 unsigned NumElems = MI.getNumOperands() - 1;
2077
2078 LLT SrcTy = MRI.getType(Operands[0]);
2079 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2080 bool CanReuseInputDirectly = DstTy == SrcTy;
2081 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2082 Register DstReg = MI.getOperand(Idx).getReg();
2083 Register SrcReg = Operands[Idx];
2084
2085 // This combine may run after RegBankSelect, so we need to be aware of
2086 // register banks.
2087 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2088 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2089 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2090 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2091 }
2092
2093 if (CanReuseInputDirectly)
2094 replaceRegWith(MRI, DstReg, SrcReg);
2095 else
2096 Builder.buildCast(DstReg, SrcReg);
2097 }
2098 MI.eraseFromParent();
2099}
2100
2102 SmallVectorImpl<APInt> &Csts) {
2103 unsigned SrcIdx = MI.getNumOperands() - 1;
2104 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2105 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2106 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2107 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2108 return false;
2109 // Break down the big constant in smaller ones.
2110 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2111 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2112 ? CstVal.getCImm()->getValue()
2113 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2114
2115 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2116 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2117 // Unmerge a constant.
2118 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2119 Csts.emplace_back(Val.trunc(ShiftAmt));
2120 Val = Val.lshr(ShiftAmt);
2121 }
2122
2123 return true;
2124}
2125
2127 SmallVectorImpl<APInt> &Csts) {
2128 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2129 "Expected an unmerge");
2130 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2131 "Not enough operands to replace all defs");
2132 unsigned NumElems = MI.getNumOperands() - 1;
2133 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2134 Register DstReg = MI.getOperand(Idx).getReg();
2135 Builder.buildConstant(DstReg, Csts[Idx]);
2136 }
2137
2138 MI.eraseFromParent();
2139}
2140
2142 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2143 unsigned SrcIdx = MI.getNumOperands() - 1;
2144 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2145 MatchInfo = [&MI](MachineIRBuilder &B) {
2146 unsigned NumElems = MI.getNumOperands() - 1;
2147 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2148 Register DstReg = MI.getOperand(Idx).getReg();
2149 B.buildUndef(DstReg);
2150 }
2151 };
2152 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2153}
2154
2156 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2157 "Expected an unmerge");
2158 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2159 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2160 return false;
2161 // Check that all the lanes are dead except the first one.
2162 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2163 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2164 return false;
2165 }
2166 return true;
2167}
2168
2170 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2171 Register Dst0Reg = MI.getOperand(0).getReg();
2172 Builder.buildTrunc(Dst0Reg, SrcReg);
2173 MI.eraseFromParent();
2174}
2175
2177 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2178 "Expected an unmerge");
2179 Register Dst0Reg = MI.getOperand(0).getReg();
2180 LLT Dst0Ty = MRI.getType(Dst0Reg);
2181 // G_ZEXT on vector applies to each lane, so it will
2182 // affect all destinations. Therefore we won't be able
2183 // to simplify the unmerge to just the first definition.
2184 if (Dst0Ty.isVector())
2185 return false;
2186 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2187 LLT SrcTy = MRI.getType(SrcReg);
2188 if (SrcTy.isVector())
2189 return false;
2190
2191 Register ZExtSrcReg;
2192 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2193 return false;
2194
2195 // Finally we can replace the first definition with
2196 // a zext of the source if the definition is big enough to hold
2197 // all of ZExtSrc bits.
2198 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2199 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2200}
2201
2203 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2204 "Expected an unmerge");
2205
2206 Register Dst0Reg = MI.getOperand(0).getReg();
2207
2208 MachineInstr *ZExtInstr =
2209 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2210 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2211 "Expecting a G_ZEXT");
2212
2213 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2214 LLT Dst0Ty = MRI.getType(Dst0Reg);
2215 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2216
2217 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2218 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2219 } else {
2220 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2221 "ZExt src doesn't fit in destination");
2222 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2223 }
2224
2225 Register ZeroReg;
2226 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2227 if (!ZeroReg)
2228 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2229 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2230 }
2231 MI.eraseFromParent();
2232}
2233
2235 unsigned TargetShiftSize,
2236 unsigned &ShiftVal) {
2237 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2238 MI.getOpcode() == TargetOpcode::G_LSHR ||
2239 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2240
2241 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2242 if (Ty.isVector()) // TODO:
2243 return false;
2244
2245 // Don't narrow further than the requested size.
2246 unsigned Size = Ty.getSizeInBits();
2247 if (Size <= TargetShiftSize)
2248 return false;
2249
2250 auto MaybeImmVal =
2251 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2252 if (!MaybeImmVal)
2253 return false;
2254
2255 ShiftVal = MaybeImmVal->Value.getSExtValue();
2256 return ShiftVal >= Size / 2 && ShiftVal < Size;
2257}
2258
2260 const unsigned &ShiftVal) {
2261 Register DstReg = MI.getOperand(0).getReg();
2262 Register SrcReg = MI.getOperand(1).getReg();
2263 LLT Ty = MRI.getType(SrcReg);
2264 unsigned Size = Ty.getSizeInBits();
2265 unsigned HalfSize = Size / 2;
2266 assert(ShiftVal >= HalfSize);
2267
2268 LLT HalfTy = LLT::scalar(HalfSize);
2269
2270 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2271 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2272
2273 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2274 Register Narrowed = Unmerge.getReg(1);
2275
2276 // dst = G_LSHR s64:x, C for C >= 32
2277 // =>
2278 // lo, hi = G_UNMERGE_VALUES x
2279 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2280
2281 if (NarrowShiftAmt != 0) {
2282 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2283 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2284 }
2285
2286 auto Zero = Builder.buildConstant(HalfTy, 0);
2287 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2288 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2289 Register Narrowed = Unmerge.getReg(0);
2290 // dst = G_SHL s64:x, C for C >= 32
2291 // =>
2292 // lo, hi = G_UNMERGE_VALUES x
2293 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2294 if (NarrowShiftAmt != 0) {
2295 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2296 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2297 }
2298
2299 auto Zero = Builder.buildConstant(HalfTy, 0);
2300 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2301 } else {
2302 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2303 auto Hi = Builder.buildAShr(
2304 HalfTy, Unmerge.getReg(1),
2305 Builder.buildConstant(HalfTy, HalfSize - 1));
2306
2307 if (ShiftVal == HalfSize) {
2308 // (G_ASHR i64:x, 32) ->
2309 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2310 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2311 } else if (ShiftVal == Size - 1) {
2312 // Don't need a second shift.
2313 // (G_ASHR i64:x, 63) ->
2314 // %narrowed = (G_ASHR hi_32(x), 31)
2315 // G_MERGE_VALUES %narrowed, %narrowed
2316 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2317 } else {
2318 auto Lo = Builder.buildAShr(
2319 HalfTy, Unmerge.getReg(1),
2320 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2321
2322 // (G_ASHR i64:x, C) ->, for C >= 32
2323 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2324 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2325 }
2326 }
2327
2328 MI.eraseFromParent();
2329}
2330
2332 unsigned TargetShiftAmount) {
2333 unsigned ShiftAmt;
2334 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2335 applyCombineShiftToUnmerge(MI, ShiftAmt);
2336 return true;
2337 }
2338
2339 return false;
2340}
2341
2343 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2344 Register DstReg = MI.getOperand(0).getReg();
2345 LLT DstTy = MRI.getType(DstReg);
2346 Register SrcReg = MI.getOperand(1).getReg();
2347 return mi_match(SrcReg, MRI,
2348 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2349}
2350
2352 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2353 Register DstReg = MI.getOperand(0).getReg();
2354 Builder.buildCopy(DstReg, Reg);
2355 MI.eraseFromParent();
2356}
2357
2359 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2360 Register DstReg = MI.getOperand(0).getReg();
2361 Builder.buildZExtOrTrunc(DstReg, Reg);
2362 MI.eraseFromParent();
2363}
2364
2366 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2367 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2368 Register LHS = MI.getOperand(1).getReg();
2369 Register RHS = MI.getOperand(2).getReg();
2370 LLT IntTy = MRI.getType(LHS);
2371
2372 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2373 // instruction.
2374 PtrReg.second = false;
2375 for (Register SrcReg : {LHS, RHS}) {
2376 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2377 // Don't handle cases where the integer is implicitly converted to the
2378 // pointer width.
2379 LLT PtrTy = MRI.getType(PtrReg.first);
2380 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2381 return true;
2382 }
2383
2384 PtrReg.second = true;
2385 }
2386
2387 return false;
2388}
2389
2391 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2392 Register Dst = MI.getOperand(0).getReg();
2393 Register LHS = MI.getOperand(1).getReg();
2394 Register RHS = MI.getOperand(2).getReg();
2395
2396 const bool DoCommute = PtrReg.second;
2397 if (DoCommute)
2398 std::swap(LHS, RHS);
2399 LHS = PtrReg.first;
2400
2401 LLT PtrTy = MRI.getType(LHS);
2402
2403 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2404 Builder.buildPtrToInt(Dst, PtrAdd);
2405 MI.eraseFromParent();
2406}
2407
2409 APInt &NewCst) {
2410 auto &PtrAdd = cast<GPtrAdd>(MI);
2411 Register LHS = PtrAdd.getBaseReg();
2412 Register RHS = PtrAdd.getOffsetReg();
2414
2415 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2416 APInt Cst;
2417 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2418 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2419 // G_INTTOPTR uses zero-extension
2420 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2421 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2422 return true;
2423 }
2424 }
2425
2426 return false;
2427}
2428
2430 APInt &NewCst) {
2431 auto &PtrAdd = cast<GPtrAdd>(MI);
2432 Register Dst = PtrAdd.getReg(0);
2433
2434 Builder.buildConstant(Dst, NewCst);
2435 PtrAdd.eraseFromParent();
2436}
2437
2439 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2440 Register DstReg = MI.getOperand(0).getReg();
2441 Register SrcReg = MI.getOperand(1).getReg();
2442 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2443 if (OriginalSrcReg.isValid())
2444 SrcReg = OriginalSrcReg;
2445 LLT DstTy = MRI.getType(DstReg);
2446 return mi_match(SrcReg, MRI,
2447 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2448}
2449
2451 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2452 Register DstReg = MI.getOperand(0).getReg();
2453 Register SrcReg = MI.getOperand(1).getReg();
2454 LLT DstTy = MRI.getType(DstReg);
2455 if (mi_match(SrcReg, MRI,
2456 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2457 unsigned DstSize = DstTy.getScalarSizeInBits();
2458 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2459 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2460 }
2461 return false;
2462}
2463
2465 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2466 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2467 MI.getOpcode() == TargetOpcode::G_SEXT ||
2468 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2469 "Expected a G_[ASZ]EXT");
2470 Register SrcReg = MI.getOperand(1).getReg();
2471 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2472 if (OriginalSrcReg.isValid())
2473 SrcReg = OriginalSrcReg;
2474 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2475 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2476 unsigned Opc = MI.getOpcode();
2477 unsigned SrcOpc = SrcMI->getOpcode();
2478 if (Opc == SrcOpc ||
2479 (Opc == TargetOpcode::G_ANYEXT &&
2480 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2481 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2482 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2483 return true;
2484 }
2485 return false;
2486}
2487
2489 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2490 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2491 MI.getOpcode() == TargetOpcode::G_SEXT ||
2492 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2493 "Expected a G_[ASZ]EXT");
2494
2495 Register Reg = std::get<0>(MatchInfo);
2496 unsigned SrcExtOp = std::get<1>(MatchInfo);
2497
2498 // Combine exts with the same opcode.
2499 if (MI.getOpcode() == SrcExtOp) {
2501 MI.getOperand(1).setReg(Reg);
2503 return;
2504 }
2505
2506 // Combine:
2507 // - anyext([sz]ext x) to [sz]ext x
2508 // - sext(zext x) to zext x
2509 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2510 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2511 SrcExtOp == TargetOpcode::G_ZEXT)) {
2512 Register DstReg = MI.getOperand(0).getReg();
2513 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2514 MI.eraseFromParent();
2515 }
2516}
2517
2519 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2520 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2521 Register SrcReg = MI.getOperand(1).getReg();
2522 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2523 unsigned SrcOpc = SrcMI->getOpcode();
2524 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2525 SrcOpc == TargetOpcode::G_ZEXT) {
2526 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2527 return true;
2528 }
2529 return false;
2530}
2531
2533 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2534 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2535 Register SrcReg = MatchInfo.first;
2536 unsigned SrcExtOp = MatchInfo.second;
2537 Register DstReg = MI.getOperand(0).getReg();
2538 LLT SrcTy = MRI.getType(SrcReg);
2539 LLT DstTy = MRI.getType(DstReg);
2540 if (SrcTy == DstTy) {
2541 MI.eraseFromParent();
2542 replaceRegWith(MRI, DstReg, SrcReg);
2543 return;
2544 }
2545 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2546 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2547 else
2548 Builder.buildTrunc(DstReg, SrcReg);
2549 MI.eraseFromParent();
2550}
2551
2553 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2554 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2555
2556 // ShiftTy > 32 > TruncTy -> 32
2557 if (ShiftSize > 32 && TruncSize < 32)
2558 return ShiftTy.changeElementSize(32);
2559
2560 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2561 // Some targets like it, some don't, some only like it under certain
2562 // conditions/processor versions, etc.
2563 // A TL hook might be needed for this.
2564
2565 // Don't combine
2566 return ShiftTy;
2567}
2568
2570 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2571 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2572 Register DstReg = MI.getOperand(0).getReg();
2573 Register SrcReg = MI.getOperand(1).getReg();
2574
2575 if (!MRI.hasOneNonDBGUse(SrcReg))
2576 return false;
2577
2578 LLT SrcTy = MRI.getType(SrcReg);
2579 LLT DstTy = MRI.getType(DstReg);
2580
2581 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2582 const auto &TL = getTargetLowering();
2583
2584 LLT NewShiftTy;
2585 switch (SrcMI->getOpcode()) {
2586 default:
2587 return false;
2588 case TargetOpcode::G_SHL: {
2589 NewShiftTy = DstTy;
2590
2591 // Make sure new shift amount is legal.
2592 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2593 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2594 return false;
2595 break;
2596 }
2597 case TargetOpcode::G_LSHR:
2598 case TargetOpcode::G_ASHR: {
2599 // For right shifts, we conservatively do not do the transform if the TRUNC
2600 // has any STORE users. The reason is that if we change the type of the
2601 // shift, we may break the truncstore combine.
2602 //
2603 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2604 for (auto &User : MRI.use_instructions(DstReg))
2605 if (User.getOpcode() == TargetOpcode::G_STORE)
2606 return false;
2607
2608 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2609 if (NewShiftTy == SrcTy)
2610 return false;
2611
2612 // Make sure we won't lose information by truncating the high bits.
2613 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2614 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2615 DstTy.getScalarSizeInBits()))
2616 return false;
2617 break;
2618 }
2619 }
2620
2622 {SrcMI->getOpcode(),
2623 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2624 return false;
2625
2626 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2627 return true;
2628}
2629
2631 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2632 MachineInstr *ShiftMI = MatchInfo.first;
2633 LLT NewShiftTy = MatchInfo.second;
2634
2635 Register Dst = MI.getOperand(0).getReg();
2636 LLT DstTy = MRI.getType(Dst);
2637
2638 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2639 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2640 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2641
2642 Register NewShift =
2643 Builder
2644 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2645 .getReg(0);
2646
2647 if (NewShiftTy == DstTy)
2648 replaceRegWith(MRI, Dst, NewShift);
2649 else
2650 Builder.buildTrunc(Dst, NewShift);
2651
2652 eraseInst(MI);
2653}
2654
2656 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2657 return MO.isReg() &&
2658 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2659 });
2660}
2661
2663 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2664 return !MO.isReg() ||
2665 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2666 });
2667}
2668
2670 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2671 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2672 return all_of(Mask, [](int Elt) { return Elt < 0; });
2673}
2674
2676 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2677 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2678 MRI);
2679}
2680
2682 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2683 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2684 MRI);
2685}
2686
2688 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2689 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2690 "Expected an insert/extract element op");
2691 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2692 unsigned IdxIdx =
2693 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2694 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2695 if (!Idx)
2696 return false;
2697 return Idx->getZExtValue() >= VecTy.getNumElements();
2698}
2699
2701 GSelect &SelMI = cast<GSelect>(MI);
2702 auto Cst =
2704 if (!Cst)
2705 return false;
2706 OpIdx = Cst->isZero() ? 3 : 2;
2707 return true;
2708}
2709
2710void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2711
2713 const MachineOperand &MOP2) {
2714 if (!MOP1.isReg() || !MOP2.isReg())
2715 return false;
2716 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2717 if (!InstAndDef1)
2718 return false;
2719 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2720 if (!InstAndDef2)
2721 return false;
2722 MachineInstr *I1 = InstAndDef1->MI;
2723 MachineInstr *I2 = InstAndDef2->MI;
2724
2725 // Handle a case like this:
2726 //
2727 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2728 //
2729 // Even though %0 and %1 are produced by the same instruction they are not
2730 // the same values.
2731 if (I1 == I2)
2732 return MOP1.getReg() == MOP2.getReg();
2733
2734 // If we have an instruction which loads or stores, we can't guarantee that
2735 // it is identical.
2736 //
2737 // For example, we may have
2738 //
2739 // %x1 = G_LOAD %addr (load N from @somewhere)
2740 // ...
2741 // call @foo
2742 // ...
2743 // %x2 = G_LOAD %addr (load N from @somewhere)
2744 // ...
2745 // %or = G_OR %x1, %x2
2746 //
2747 // It's possible that @foo will modify whatever lives at the address we're
2748 // loading from. To be safe, let's just assume that all loads and stores
2749 // are different (unless we have something which is guaranteed to not
2750 // change.)
2751 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2752 return false;
2753
2754 // If both instructions are loads or stores, they are equal only if both
2755 // are dereferenceable invariant loads with the same number of bits.
2756 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2757 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2758 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2759 if (!LS1 || !LS2)
2760 return false;
2761
2762 if (!I2->isDereferenceableInvariantLoad() ||
2763 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2764 return false;
2765 }
2766
2767 // Check for physical registers on the instructions first to avoid cases
2768 // like this:
2769 //
2770 // %a = COPY $physreg
2771 // ...
2772 // SOMETHING implicit-def $physreg
2773 // ...
2774 // %b = COPY $physreg
2775 //
2776 // These copies are not equivalent.
2777 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2778 return MO.isReg() && MO.getReg().isPhysical();
2779 })) {
2780 // Check if we have a case like this:
2781 //
2782 // %a = COPY $physreg
2783 // %b = COPY %a
2784 //
2785 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2786 // From that, we know that they must have the same value, since they must
2787 // have come from the same COPY.
2788 return I1->isIdenticalTo(*I2);
2789 }
2790
2791 // We don't have any physical registers, so we don't necessarily need the
2792 // same vreg defs.
2793 //
2794 // On the off-chance that there's some target instruction feeding into the
2795 // instruction, let's use produceSameValue instead of isIdenticalTo.
2796 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2797 // Handle instructions with multiple defs that produce same values. Values
2798 // are same for operands with same index.
2799 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2800 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2801 // I1 and I2 are different instructions but produce same values,
2802 // %1 and %6 are same, %1 and %7 are not the same value.
2803 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2804 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2805 }
2806 return false;
2807}
2808
2810 if (!MOP.isReg())
2811 return false;
2812 auto *MI = MRI.getVRegDef(MOP.getReg());
2813 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2814 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2815 MaybeCst->getSExtValue() == C;
2816}
2817
2819 if (!MOP.isReg())
2820 return false;
2821 std::optional<FPValueAndVReg> MaybeCst;
2822 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2823 return false;
2824
2825 return MaybeCst->Value.isExactlyValue(C);
2826}
2827
2829 unsigned OpIdx) {
2830 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2831 Register OldReg = MI.getOperand(0).getReg();
2832 Register Replacement = MI.getOperand(OpIdx).getReg();
2833 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2834 MI.eraseFromParent();
2835 replaceRegWith(MRI, OldReg, Replacement);
2836}
2837
2839 Register Replacement) {
2840 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2841 Register OldReg = MI.getOperand(0).getReg();
2842 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2843 MI.eraseFromParent();
2844 replaceRegWith(MRI, OldReg, Replacement);
2845}
2846
2848 unsigned ConstIdx) {
2849 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2850 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2851
2852 // Get the shift amount
2853 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2854 if (!VRegAndVal)
2855 return false;
2856
2857 // Return true of shift amount >= Bitwidth
2858 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2859}
2860
2862 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2863 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2864 "This is not a funnel shift operation");
2865
2866 Register ConstReg = MI.getOperand(3).getReg();
2867 LLT ConstTy = MRI.getType(ConstReg);
2868 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2869
2870 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2871 assert((VRegAndVal) && "Value is not a constant");
2872
2873 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2874 APInt NewConst = VRegAndVal->Value.urem(
2875 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2876
2877 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2879 MI.getOpcode(), {MI.getOperand(0)},
2880 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2881
2882 MI.eraseFromParent();
2883}
2884
2886 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2887 // Match (cond ? x : x)
2888 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2889 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2890 MRI);
2891}
2892
2894 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2895 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2896 MRI);
2897}
2898
2900 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2901 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2902 MRI);
2903}
2904
2906 MachineOperand &MO = MI.getOperand(OpIdx);
2907 return MO.isReg() &&
2908 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2909}
2910
2912 unsigned OpIdx) {
2913 MachineOperand &MO = MI.getOperand(OpIdx);
2914 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2915}
2916
2918 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2919 Builder.buildFConstant(MI.getOperand(0), C);
2920 MI.eraseFromParent();
2921}
2922
2924 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2925 Builder.buildConstant(MI.getOperand(0), C);
2926 MI.eraseFromParent();
2927}
2928
2930 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2931 Builder.buildConstant(MI.getOperand(0), C);
2932 MI.eraseFromParent();
2933}
2934
2936 ConstantFP *CFP) {
2937 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2938 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2939 MI.eraseFromParent();
2940}
2941
2943 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2944 Builder.buildUndef(MI.getOperand(0));
2945 MI.eraseFromParent();
2946}
2947
2949 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2950 Register LHS = MI.getOperand(1).getReg();
2951 Register RHS = MI.getOperand(2).getReg();
2952 Register &NewLHS = std::get<0>(MatchInfo);
2953 Register &NewRHS = std::get<1>(MatchInfo);
2954
2955 // Helper lambda to check for opportunities for
2956 // ((0-A) + B) -> B - A
2957 // (A + (0-B)) -> A - B
2958 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2959 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2960 return false;
2961 NewLHS = MaybeNewLHS;
2962 return true;
2963 };
2964
2965 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2966}
2967
2970 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2971 "Invalid opcode");
2972 Register DstReg = MI.getOperand(0).getReg();
2973 LLT DstTy = MRI.getType(DstReg);
2974 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2975 unsigned NumElts = DstTy.getNumElements();
2976 // If this MI is part of a sequence of insert_vec_elts, then
2977 // don't do the combine in the middle of the sequence.
2978 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2979 TargetOpcode::G_INSERT_VECTOR_ELT)
2980 return false;
2981 MachineInstr *CurrInst = &MI;
2982 MachineInstr *TmpInst;
2983 int64_t IntImm;
2984 Register TmpReg;
2985 MatchInfo.resize(NumElts);
2986 while (mi_match(
2987 CurrInst->getOperand(0).getReg(), MRI,
2988 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2989 if (IntImm >= NumElts || IntImm < 0)
2990 return false;
2991 if (!MatchInfo[IntImm])
2992 MatchInfo[IntImm] = TmpReg;
2993 CurrInst = TmpInst;
2994 }
2995 // Variable index.
2996 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2997 return false;
2998 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2999 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3000 if (!MatchInfo[I - 1].isValid())
3001 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3002 }
3003 return true;
3004 }
3005 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3006 // overwritten, bail out.
3007 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3008 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3009}
3010
3013 Register UndefReg;
3014 auto GetUndef = [&]() {
3015 if (UndefReg)
3016 return UndefReg;
3017 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3018 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3019 return UndefReg;
3020 };
3021 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
3022 if (!MatchInfo[I])
3023 MatchInfo[I] = GetUndef();
3024 }
3025 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3026 MI.eraseFromParent();
3027}
3028
3030 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3031 Register SubLHS, SubRHS;
3032 std::tie(SubLHS, SubRHS) = MatchInfo;
3033 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3034 MI.eraseFromParent();
3035}
3036
3039 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3040 //
3041 // Creates the new hand + logic instruction (but does not insert them.)
3042 //
3043 // On success, MatchInfo is populated with the new instructions. These are
3044 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3045 unsigned LogicOpcode = MI.getOpcode();
3046 assert(LogicOpcode == TargetOpcode::G_AND ||
3047 LogicOpcode == TargetOpcode::G_OR ||
3048 LogicOpcode == TargetOpcode::G_XOR);
3049 MachineIRBuilder MIB(MI);
3050 Register Dst = MI.getOperand(0).getReg();
3051 Register LHSReg = MI.getOperand(1).getReg();
3052 Register RHSReg = MI.getOperand(2).getReg();
3053
3054 // Don't recompute anything.
3055 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3056 return false;
3057
3058 // Make sure we have (hand x, ...), (hand y, ...)
3059 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3060 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3061 if (!LeftHandInst || !RightHandInst)
3062 return false;
3063 unsigned HandOpcode = LeftHandInst->getOpcode();
3064 if (HandOpcode != RightHandInst->getOpcode())
3065 return false;
3066 if (!LeftHandInst->getOperand(1).isReg() ||
3067 !RightHandInst->getOperand(1).isReg())
3068 return false;
3069
3070 // Make sure the types match up, and if we're doing this post-legalization,
3071 // we end up with legal types.
3072 Register X = LeftHandInst->getOperand(1).getReg();
3073 Register Y = RightHandInst->getOperand(1).getReg();
3074 LLT XTy = MRI.getType(X);
3075 LLT YTy = MRI.getType(Y);
3076 if (!XTy.isValid() || XTy != YTy)
3077 return false;
3078
3079 // Optional extra source register.
3080 Register ExtraHandOpSrcReg;
3081 switch (HandOpcode) {
3082 default:
3083 return false;
3084 case TargetOpcode::G_ANYEXT:
3085 case TargetOpcode::G_SEXT:
3086 case TargetOpcode::G_ZEXT: {
3087 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3088 break;
3089 }
3090 case TargetOpcode::G_AND:
3091 case TargetOpcode::G_ASHR:
3092 case TargetOpcode::G_LSHR:
3093 case TargetOpcode::G_SHL: {
3094 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3095 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3096 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3097 return false;
3098 ExtraHandOpSrcReg = ZOp.getReg();
3099 break;
3100 }
3101 }
3102
3103 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3104 return false;
3105
3106 // Record the steps to build the new instructions.
3107 //
3108 // Steps to build (logic x, y)
3109 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3110 OperandBuildSteps LogicBuildSteps = {
3111 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3112 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3113 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3114 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3115
3116 // Steps to build hand (logic x, y), ...z
3117 OperandBuildSteps HandBuildSteps = {
3118 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3119 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3120 if (ExtraHandOpSrcReg.isValid())
3121 HandBuildSteps.push_back(
3122 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3123 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3124
3125 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3126 return true;
3127}
3128
3131 assert(MatchInfo.InstrsToBuild.size() &&
3132 "Expected at least one instr to build?");
3133 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3134 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3135 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3136 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3137 for (auto &OperandFn : InstrToBuild.OperandFns)
3138 OperandFn(Instr);
3139 }
3140 MI.eraseFromParent();
3141}
3142
3144 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3145 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3146 int64_t ShlCst, AshrCst;
3147 Register Src;
3148 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3149 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3150 m_ICstOrSplat(AshrCst))))
3151 return false;
3152 if (ShlCst != AshrCst)
3153 return false;
3155 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3156 return false;
3157 MatchInfo = std::make_tuple(Src, ShlCst);
3158 return true;
3159}
3160
3162 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3163 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3164 Register Src;
3165 int64_t ShiftAmt;
3166 std::tie(Src, ShiftAmt) = MatchInfo;
3167 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3168 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3169 MI.eraseFromParent();
3170}
3171
3172/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3174 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3175 assert(MI.getOpcode() == TargetOpcode::G_AND);
3176
3177 Register Dst = MI.getOperand(0).getReg();
3178 LLT Ty = MRI.getType(Dst);
3179
3180 Register R;
3181 int64_t C1;
3182 int64_t C2;
3183 if (!mi_match(
3184 Dst, MRI,
3185 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3186 return false;
3187
3188 MatchInfo = [=](MachineIRBuilder &B) {
3189 if (C1 & C2) {
3190 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3191 return;
3192 }
3193 auto Zero = B.buildConstant(Ty, 0);
3194 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3195 };
3196 return true;
3197}
3198
3200 Register &Replacement) {
3201 // Given
3202 //
3203 // %y:_(sN) = G_SOMETHING
3204 // %x:_(sN) = G_SOMETHING
3205 // %res:_(sN) = G_AND %x, %y
3206 //
3207 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3208 //
3209 // Patterns like this can appear as a result of legalization. E.g.
3210 //
3211 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3212 // %one:_(s32) = G_CONSTANT i32 1
3213 // %and:_(s32) = G_AND %cmp, %one
3214 //
3215 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3216 assert(MI.getOpcode() == TargetOpcode::G_AND);
3217 if (!KB)
3218 return false;
3219
3220 Register AndDst = MI.getOperand(0).getReg();
3221 Register LHS = MI.getOperand(1).getReg();
3222 Register RHS = MI.getOperand(2).getReg();
3223 KnownBits LHSBits = KB->getKnownBits(LHS);
3224 KnownBits RHSBits = KB->getKnownBits(RHS);
3225
3226 // Check that x & Mask == x.
3227 // x & 1 == x, always
3228 // x & 0 == x, only if x is also 0
3229 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3230 //
3231 // Check if we can replace AndDst with the LHS of the G_AND
3232 if (canReplaceReg(AndDst, LHS, MRI) &&
3233 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3234 Replacement = LHS;
3235 return true;
3236 }
3237
3238 // Check if we can replace AndDst with the RHS of the G_AND
3239 if (canReplaceReg(AndDst, RHS, MRI) &&
3240 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3241 Replacement = RHS;
3242 return true;
3243 }
3244
3245 return false;
3246}
3247
3249 // Given
3250 //
3251 // %y:_(sN) = G_SOMETHING
3252 // %x:_(sN) = G_SOMETHING
3253 // %res:_(sN) = G_OR %x, %y
3254 //
3255 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3256 assert(MI.getOpcode() == TargetOpcode::G_OR);
3257 if (!KB)
3258 return false;
3259
3260 Register OrDst = MI.getOperand(0).getReg();
3261 Register LHS = MI.getOperand(1).getReg();
3262 Register RHS = MI.getOperand(2).getReg();
3263 KnownBits LHSBits = KB->getKnownBits(LHS);
3264 KnownBits RHSBits = KB->getKnownBits(RHS);
3265
3266 // Check that x | Mask == x.
3267 // x | 0 == x, always
3268 // x | 1 == x, only if x is also 1
3269 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3270 //
3271 // Check if we can replace OrDst with the LHS of the G_OR
3272 if (canReplaceReg(OrDst, LHS, MRI) &&
3273 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3274 Replacement = LHS;
3275 return true;
3276 }
3277
3278 // Check if we can replace OrDst with the RHS of the G_OR
3279 if (canReplaceReg(OrDst, RHS, MRI) &&
3280 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3281 Replacement = RHS;
3282 return true;
3283 }
3284
3285 return false;
3286}
3287
3289 // If the input is already sign extended, just drop the extension.
3290 Register Src = MI.getOperand(1).getReg();
3291 unsigned ExtBits = MI.getOperand(2).getImm();
3292 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3293 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3294}
3295
3296static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3297 int64_t Cst, bool IsVector, bool IsFP) {
3298 // For i1, Cst will always be -1 regardless of boolean contents.
3299 return (ScalarSizeBits == 1 && Cst == -1) ||
3300 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3301}
3302
3304 SmallVectorImpl<Register> &RegsToNegate) {
3305 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3306 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3307 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3308 Register XorSrc;
3309 Register CstReg;
3310 // We match xor(src, true) here.
3311 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3312 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3313 return false;
3314
3315 if (!MRI.hasOneNonDBGUse(XorSrc))
3316 return false;
3317
3318 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3319 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3320 // list of tree nodes to visit.
3321 RegsToNegate.push_back(XorSrc);
3322 // Remember whether the comparisons are all integer or all floating point.
3323 bool IsInt = false;
3324 bool IsFP = false;
3325 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3326 Register Reg = RegsToNegate[I];
3327 if (!MRI.hasOneNonDBGUse(Reg))
3328 return false;
3329 MachineInstr *Def = MRI.getVRegDef(Reg);
3330 switch (Def->getOpcode()) {
3331 default:
3332 // Don't match if the tree contains anything other than ANDs, ORs and
3333 // comparisons.
3334 return false;
3335 case TargetOpcode::G_ICMP:
3336 if (IsFP)
3337 return false;
3338 IsInt = true;
3339 // When we apply the combine we will invert the predicate.
3340 break;
3341 case TargetOpcode::G_FCMP:
3342 if (IsInt)
3343 return false;
3344 IsFP = true;
3345 // When we apply the combine we will invert the predicate.
3346 break;
3347 case TargetOpcode::G_AND:
3348 case TargetOpcode::G_OR:
3349 // Implement De Morgan's laws:
3350 // ~(x & y) -> ~x | ~y
3351 // ~(x | y) -> ~x & ~y
3352 // When we apply the combine we will change the opcode and recursively
3353 // negate the operands.
3354 RegsToNegate.push_back(Def->getOperand(1).getReg());
3355 RegsToNegate.push_back(Def->getOperand(2).getReg());
3356 break;
3357 }
3358 }
3359
3360 // Now we know whether the comparisons are integer or floating point, check
3361 // the constant in the xor.
3362 int64_t Cst;
3363 if (Ty.isVector()) {
3364 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3365 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3366 if (!MaybeCst)
3367 return false;
3368 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3369 return false;
3370 } else {
3371 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3372 return false;
3373 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3374 return false;
3375 }
3376
3377 return true;
3378}
3379
3381 SmallVectorImpl<Register> &RegsToNegate) {
3382 for (Register Reg : RegsToNegate) {
3383 MachineInstr *Def = MRI.getVRegDef(Reg);
3384 Observer.changingInstr(*Def);
3385 // For each comparison, invert the opcode. For each AND and OR, change the
3386 // opcode.
3387 switch (Def->getOpcode()) {
3388 default:
3389 llvm_unreachable("Unexpected opcode");
3390 case TargetOpcode::G_ICMP:
3391 case TargetOpcode::G_FCMP: {
3392 MachineOperand &PredOp = Def->getOperand(1);
3395 PredOp.setPredicate(NewP);
3396 break;
3397 }
3398 case TargetOpcode::G_AND:
3399 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3400 break;
3401 case TargetOpcode::G_OR:
3402 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3403 break;
3404 }
3405 Observer.changedInstr(*Def);
3406 }
3407
3408 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3409 MI.eraseFromParent();
3410}
3411
3413 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3414 // Match (xor (and x, y), y) (or any of its commuted cases)
3415 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3416 Register &X = MatchInfo.first;
3417 Register &Y = MatchInfo.second;
3418 Register AndReg = MI.getOperand(1).getReg();
3419 Register SharedReg = MI.getOperand(2).getReg();
3420
3421 // Find a G_AND on either side of the G_XOR.
3422 // Look for one of
3423 //
3424 // (xor (and x, y), SharedReg)
3425 // (xor SharedReg, (and x, y))
3426 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3427 std::swap(AndReg, SharedReg);
3428 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3429 return false;
3430 }
3431
3432 // Only do this if we'll eliminate the G_AND.
3433 if (!MRI.hasOneNonDBGUse(AndReg))
3434 return false;
3435
3436 // We can combine if SharedReg is the same as either the LHS or RHS of the
3437 // G_AND.
3438 if (Y != SharedReg)
3439 std::swap(X, Y);
3440 return Y == SharedReg;
3441}
3442
3444 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3445 // Fold (xor (and x, y), y) -> (and (not x), y)
3446 Register X, Y;
3447 std::tie(X, Y) = MatchInfo;
3448 auto Not = Builder.buildNot(MRI.getType(X), X);
3450 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3451 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3452 MI.getOperand(2).setReg(Y);
3454}
3455
3457 auto &PtrAdd = cast<GPtrAdd>(MI);
3458 Register DstReg = PtrAdd.getReg(0);
3459 LLT Ty = MRI.getType(DstReg);
3461
3462 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3463 return false;
3464
3465 if (Ty.isPointer()) {
3466 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3467 return ConstVal && *ConstVal == 0;
3468 }
3469
3470 assert(Ty.isVector() && "Expecting a vector type");
3471 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3472 return isBuildVectorAllZeros(*VecMI, MRI);
3473}
3474
3476 auto &PtrAdd = cast<GPtrAdd>(MI);
3477 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3478 PtrAdd.eraseFromParent();
3479}
3480
3481/// The second source operand is known to be a power of 2.
3483 Register DstReg = MI.getOperand(0).getReg();
3484 Register Src0 = MI.getOperand(1).getReg();
3485 Register Pow2Src1 = MI.getOperand(2).getReg();
3486 LLT Ty = MRI.getType(DstReg);
3487
3488 // Fold (urem x, pow2) -> (and x, pow2-1)
3489 auto NegOne = Builder.buildConstant(Ty, -1);
3490 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3491 Builder.buildAnd(DstReg, Src0, Add);
3492 MI.eraseFromParent();
3493}
3494
3496 unsigned &SelectOpNo) {
3497 Register LHS = MI.getOperand(1).getReg();
3498 Register RHS = MI.getOperand(2).getReg();
3499
3500 Register OtherOperandReg = RHS;
3501 SelectOpNo = 1;
3503
3504 // Don't do this unless the old select is going away. We want to eliminate the
3505 // binary operator, not replace a binop with a select.
3506 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3508 OtherOperandReg = LHS;
3509 SelectOpNo = 2;
3511 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3513 return false;
3514 }
3515
3516 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3517 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3518
3519 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3520 /*AllowFP*/ true,
3521 /*AllowOpaqueConstants*/ false))
3522 return false;
3523 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3524 /*AllowFP*/ true,
3525 /*AllowOpaqueConstants*/ false))
3526 return false;
3527
3528 unsigned BinOpcode = MI.getOpcode();
3529
3530 // We know that one of the operands is a select of constants. Now verify that
3531 // the other binary operator operand is either a constant, or we can handle a
3532 // variable.
3533 bool CanFoldNonConst =
3534 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3535 (isNullOrNullSplat(*SelectLHS, MRI) ||
3536 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3537 (isNullOrNullSplat(*SelectRHS, MRI) ||
3538 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3539 if (CanFoldNonConst)
3540 return true;
3541
3542 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3543 /*AllowFP*/ true,
3544 /*AllowOpaqueConstants*/ false);
3545}
3546
3547/// \p SelectOperand is the operand in binary operator \p MI that is the select
3548/// to fold.
3550 const unsigned &SelectOperand) {
3551 Register Dst = MI.getOperand(0).getReg();
3552 Register LHS = MI.getOperand(1).getReg();
3553 Register RHS = MI.getOperand(2).getReg();
3554 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3555
3556 Register SelectCond = Select->getOperand(1).getReg();
3557 Register SelectTrue = Select->getOperand(2).getReg();
3558 Register SelectFalse = Select->getOperand(3).getReg();
3559
3560 LLT Ty = MRI.getType(Dst);
3561 unsigned BinOpcode = MI.getOpcode();
3562
3563 Register FoldTrue, FoldFalse;
3564
3565 // We have a select-of-constants followed by a binary operator with a
3566 // constant. Eliminate the binop by pulling the constant math into the select.
3567 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3568 if (SelectOperand == 1) {
3569 // TODO: SelectionDAG verifies this actually constant folds before
3570 // committing to the combine.
3571
3572 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3573 FoldFalse =
3574 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3575 } else {
3576 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3577 FoldFalse =
3578 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3579 }
3580
3581 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3582 MI.eraseFromParent();
3583}
3584
3585std::optional<SmallVector<Register, 8>>
3586CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3587 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3588 // We want to detect if Root is part of a tree which represents a bunch
3589 // of loads being merged into a larger load. We'll try to recognize patterns
3590 // like, for example:
3591 //
3592 // Reg Reg
3593 // \ /
3594 // OR_1 Reg
3595 // \ /
3596 // OR_2
3597 // \ Reg
3598 // .. /
3599 // Root
3600 //
3601 // Reg Reg Reg Reg
3602 // \ / \ /
3603 // OR_1 OR_2
3604 // \ /
3605 // \ /
3606 // ...
3607 // Root
3608 //
3609 // Each "Reg" may have been produced by a load + some arithmetic. This
3610 // function will save each of them.
3611 SmallVector<Register, 8> RegsToVisit;
3613
3614 // In the "worst" case, we're dealing with a load for each byte. So, there
3615 // are at most #bytes - 1 ORs.
3616 const unsigned MaxIter =
3617 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3618 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3619 if (Ors.empty())
3620 break;
3621 const MachineInstr *Curr = Ors.pop_back_val();
3622 Register OrLHS = Curr->getOperand(1).getReg();
3623 Register OrRHS = Curr->getOperand(2).getReg();
3624
3625 // In the combine, we want to elimate the entire tree.
3626 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3627 return std::nullopt;
3628
3629 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3630 // something that may be a load + arithmetic.
3631 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3632 Ors.push_back(Or);
3633 else
3634 RegsToVisit.push_back(OrLHS);
3635 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3636 Ors.push_back(Or);
3637 else
3638 RegsToVisit.push_back(OrRHS);
3639 }
3640
3641 // We're going to try and merge each register into a wider power-of-2 type,
3642 // so we ought to have an even number of registers.
3643 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3644 return std::nullopt;
3645 return RegsToVisit;
3646}
3647
3648/// Helper function for findLoadOffsetsForLoadOrCombine.
3649///
3650/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3651/// and then moving that value into a specific byte offset.
3652///
3653/// e.g. x[i] << 24
3654///
3655/// \returns The load instruction and the byte offset it is moved into.
3656static std::optional<std::pair<GZExtLoad *, int64_t>>
3657matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3658 const MachineRegisterInfo &MRI) {
3659 assert(MRI.hasOneNonDBGUse(Reg) &&
3660 "Expected Reg to only have one non-debug use?");
3661 Register MaybeLoad;
3662 int64_t Shift;
3663 if (!mi_match(Reg, MRI,
3664 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3665 Shift = 0;
3666 MaybeLoad = Reg;
3667 }
3668
3669 if (Shift % MemSizeInBits != 0)
3670 return std::nullopt;
3671
3672 // TODO: Handle other types of loads.
3673 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3674 if (!Load)
3675 return std::nullopt;
3676
3677 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3678 return std::nullopt;
3679
3680 return std::make_pair(Load, Shift / MemSizeInBits);
3681}
3682
3683std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3684CombinerHelper::findLoadOffsetsForLoadOrCombine(
3686 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3687
3688 // Each load found for the pattern. There should be one for each RegsToVisit.
3690
3691 // The lowest index used in any load. (The lowest "i" for each x[i].)
3692 int64_t LowestIdx = INT64_MAX;
3693
3694 // The load which uses the lowest index.
3695 GZExtLoad *LowestIdxLoad = nullptr;
3696
3697 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3698 SmallSet<int64_t, 8> SeenIdx;
3699
3700 // Ensure each load is in the same MBB.
3701 // TODO: Support multiple MachineBasicBlocks.
3702 MachineBasicBlock *MBB = nullptr;
3703 const MachineMemOperand *MMO = nullptr;
3704
3705 // Earliest instruction-order load in the pattern.
3706 GZExtLoad *EarliestLoad = nullptr;
3707
3708 // Latest instruction-order load in the pattern.
3709 GZExtLoad *LatestLoad = nullptr;
3710
3711 // Base pointer which every load should share.
3713
3714 // We want to find a load for each register. Each load should have some
3715 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3716 // track of the load which uses the lowest index. Later, we will check if we
3717 // can use its pointer in the final, combined load.
3718 for (auto Reg : RegsToVisit) {
3719 // Find the load, and find the position that it will end up in (e.g. a
3720 // shifted) value.
3721 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3722 if (!LoadAndPos)
3723 return std::nullopt;
3724 GZExtLoad *Load;
3725 int64_t DstPos;
3726 std::tie(Load, DstPos) = *LoadAndPos;
3727
3728 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3729 // it is difficult to check for stores/calls/etc between loads.
3730 MachineBasicBlock *LoadMBB = Load->getParent();
3731 if (!MBB)
3732 MBB = LoadMBB;
3733 if (LoadMBB != MBB)
3734 return std::nullopt;
3735
3736 // Make sure that the MachineMemOperands of every seen load are compatible.
3737 auto &LoadMMO = Load->getMMO();
3738 if (!MMO)
3739 MMO = &LoadMMO;
3740 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3741 return std::nullopt;
3742
3743 // Find out what the base pointer and index for the load is.
3744 Register LoadPtr;
3745 int64_t Idx;
3746 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3747 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3748 LoadPtr = Load->getOperand(1).getReg();
3749 Idx = 0;
3750 }
3751
3752 // Don't combine things like a[i], a[i] -> a bigger load.
3753 if (!SeenIdx.insert(Idx).second)
3754 return std::nullopt;
3755
3756 // Every load must share the same base pointer; don't combine things like:
3757 //
3758 // a[i], b[i + 1] -> a bigger load.
3759 if (!BasePtr.isValid())
3760 BasePtr = LoadPtr;
3761 if (BasePtr != LoadPtr)
3762 return std::nullopt;
3763
3764 if (Idx < LowestIdx) {
3765 LowestIdx = Idx;
3766 LowestIdxLoad = Load;
3767 }
3768
3769 // Keep track of the byte offset that this load ends up at. If we have seen
3770 // the byte offset, then stop here. We do not want to combine:
3771 //
3772 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3773 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3774 return std::nullopt;
3775 Loads.insert(Load);
3776
3777 // Keep track of the position of the earliest/latest loads in the pattern.
3778 // We will check that there are no load fold barriers between them later
3779 // on.
3780 //
3781 // FIXME: Is there a better way to check for load fold barriers?
3782 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3783 EarliestLoad = Load;
3784 if (!LatestLoad || dominates(*LatestLoad, *Load))
3785 LatestLoad = Load;
3786 }
3787
3788 // We found a load for each register. Let's check if each load satisfies the
3789 // pattern.
3790 assert(Loads.size() == RegsToVisit.size() &&
3791 "Expected to find a load for each register?");
3792 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3793 LatestLoad && "Expected at least two loads?");
3794
3795 // Check if there are any stores, calls, etc. between any of the loads. If
3796 // there are, then we can't safely perform the combine.
3797 //
3798 // MaxIter is chosen based off the (worst case) number of iterations it
3799 // typically takes to succeed in the LLVM test suite plus some padding.
3800 //
3801 // FIXME: Is there a better way to check for load fold barriers?
3802 const unsigned MaxIter = 20;
3803 unsigned Iter = 0;
3804 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3805 LatestLoad->getIterator())) {
3806 if (Loads.count(&MI))
3807 continue;
3808 if (MI.isLoadFoldBarrier())
3809 return std::nullopt;
3810 if (Iter++ == MaxIter)
3811 return std::nullopt;
3812 }
3813
3814 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3815}
3816
3818 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3819 assert(MI.getOpcode() == TargetOpcode::G_OR);
3820 MachineFunction &MF = *MI.getMF();
3821 // Assuming a little-endian target, transform:
3822 // s8 *a = ...
3823 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3824 // =>
3825 // s32 val = *((i32)a)
3826 //
3827 // s8 *a = ...
3828 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3829 // =>
3830 // s32 val = BSWAP(*((s32)a))
3831 Register Dst = MI.getOperand(0).getReg();
3832 LLT Ty = MRI.getType(Dst);
3833 if (Ty.isVector())
3834 return false;
3835
3836 // We need to combine at least two loads into this type. Since the smallest
3837 // possible load is into a byte, we need at least a 16-bit wide type.
3838 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3839 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3840 return false;
3841
3842 // Match a collection of non-OR instructions in the pattern.
3843 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3844 if (!RegsToVisit)
3845 return false;
3846
3847 // We have a collection of non-OR instructions. Figure out how wide each of
3848 // the small loads should be based off of the number of potential loads we
3849 // found.
3850 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3851 if (NarrowMemSizeInBits % 8 != 0)
3852 return false;
3853
3854 // Check if each register feeding into each OR is a load from the same
3855 // base pointer + some arithmetic.
3856 //
3857 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3858 //
3859 // Also verify that each of these ends up putting a[i] into the same memory
3860 // offset as a load into a wide type would.
3862 GZExtLoad *LowestIdxLoad, *LatestLoad;
3863 int64_t LowestIdx;
3864 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3865 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3866 if (!MaybeLoadInfo)
3867 return false;
3868 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3869
3870 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3871 // we found before, check if this corresponds to a big or little endian byte
3872 // pattern. If it does, then we can represent it using a load + possibly a
3873 // BSWAP.
3874 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3875 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3876 if (!IsBigEndian)
3877 return false;
3878 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3879 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3880 return false;
3881
3882 // Make sure that the load from the lowest index produces offset 0 in the
3883 // final value.
3884 //
3885 // This ensures that we won't combine something like this:
3886 //
3887 // load x[i] -> byte 2
3888 // load x[i+1] -> byte 0 ---> wide_load x[i]
3889 // load x[i+2] -> byte 1
3890 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3891 const unsigned ZeroByteOffset =
3892 *IsBigEndian
3893 ? bigEndianByteAt(NumLoadsInTy, 0)
3894 : littleEndianByteAt(NumLoadsInTy, 0);
3895 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3896 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3897 ZeroOffsetIdx->second != LowestIdx)
3898 return false;
3899
3900 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3901 // may not use index 0.
3902 Register Ptr = LowestIdxLoad->getPointerReg();
3903 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3904 LegalityQuery::MemDesc MMDesc(MMO);
3905 MMDesc.MemoryTy = Ty;
3907 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3908 return false;
3909 auto PtrInfo = MMO.getPointerInfo();
3910 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3911
3912 // Load must be allowed and fast on the target.
3914 auto &DL = MF.getDataLayout();
3915 unsigned Fast = 0;
3916 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3917 !Fast)
3918 return false;
3919
3920 MatchInfo = [=](MachineIRBuilder &MIB) {
3921 MIB.setInstrAndDebugLoc(*LatestLoad);
3922 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3923 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3924 if (NeedsBSwap)
3925 MIB.buildBSwap(Dst, LoadDst);
3926 };
3927 return true;
3928}
3929
3931 MachineInstr *&ExtMI) {
3932 auto &PHI = cast<GPhi>(MI);
3933 Register DstReg = PHI.getReg(0);
3934
3935 // TODO: Extending a vector may be expensive, don't do this until heuristics
3936 // are better.
3937 if (MRI.getType(DstReg).isVector())
3938 return false;
3939
3940 // Try to match a phi, whose only use is an extend.
3941 if (!MRI.hasOneNonDBGUse(DstReg))
3942 return false;
3943 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3944 switch (ExtMI->getOpcode()) {
3945 case TargetOpcode::G_ANYEXT:
3946 return true; // G_ANYEXT is usually free.
3947 case TargetOpcode::G_ZEXT:
3948 case TargetOpcode::G_SEXT:
3949 break;
3950 default:
3951 return false;
3952 }
3953
3954 // If the target is likely to fold this extend away, don't propagate.
3956 return false;
3957
3958 // We don't want to propagate the extends unless there's a good chance that
3959 // they'll be optimized in some way.
3960 // Collect the unique incoming values.
3962 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3963 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3964 switch (DefMI->getOpcode()) {
3965 case TargetOpcode::G_LOAD:
3966 case TargetOpcode::G_TRUNC:
3967 case TargetOpcode::G_SEXT:
3968 case TargetOpcode::G_ZEXT:
3969 case TargetOpcode::G_ANYEXT:
3970 case TargetOpcode::G_CONSTANT:
3971 InSrcs.insert(DefMI);
3972 // Don't try to propagate if there are too many places to create new
3973 // extends, chances are it'll increase code size.
3974 if (InSrcs.size() > 2)
3975 return false;
3976 break;
3977 default:
3978 return false;
3979 }
3980 }
3981 return true;
3982}
3983
3985 MachineInstr *&ExtMI) {
3986 auto &PHI = cast<GPhi>(MI);
3987 Register DstReg = ExtMI->getOperand(0).getReg();
3988 LLT ExtTy = MRI.getType(DstReg);
3989
3990 // Propagate the extension into the block of each incoming reg's block.
3991 // Use a SetVector here because PHIs can have duplicate edges, and we want
3992 // deterministic iteration order.
3995 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3996 auto SrcReg = PHI.getIncomingValue(I);
3997 auto *SrcMI = MRI.getVRegDef(SrcReg);
3998 if (!SrcMIs.insert(SrcMI))
3999 continue;
4000
4001 // Build an extend after each src inst.
4002 auto *MBB = SrcMI->getParent();
4003 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4004 if (InsertPt != MBB->end() && InsertPt->isPHI())
4005 InsertPt = MBB->getFirstNonPHI();
4006
4007 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4008 Builder.setDebugLoc(MI.getDebugLoc());
4009 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4010 OldToNewSrcMap[SrcMI] = NewExt;
4011 }
4012
4013 // Create a new phi with the extended inputs.
4015 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4016 NewPhi.addDef(DstReg);
4017 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4018 if (!MO.isReg()) {
4019 NewPhi.addMBB(MO.getMBB());
4020 continue;
4021 }
4022 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4023 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4024 }
4025 Builder.insertInstr(NewPhi);
4026 ExtMI->eraseFromParent();
4027}
4028
4030 Register &Reg) {
4031 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4032 // If we have a constant index, look for a G_BUILD_VECTOR source
4033 // and find the source register that the index maps to.
4034 Register SrcVec = MI.getOperand(1).getReg();
4035 LLT SrcTy = MRI.getType(SrcVec);
4036
4037 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4038 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4039 return false;
4040
4041 unsigned VecIdx = Cst->Value.getZExtValue();
4042
4043 // Check if we have a build_vector or build_vector_trunc with an optional
4044 // trunc in front.
4045 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4046 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4047 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4048 }
4049
4050 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4051 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4052 return false;
4053
4054 EVT Ty(getMVTForLLT(SrcTy));
4055 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4056 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4057 return false;
4058
4059 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4060 return true;
4061}
4062
4064 Register &Reg) {
4065 // Check the type of the register, since it may have come from a
4066 // G_BUILD_VECTOR_TRUNC.
4067 LLT ScalarTy = MRI.getType(Reg);
4068 Register DstReg = MI.getOperand(0).getReg();
4069 LLT DstTy = MRI.getType(DstReg);
4070
4071 if (ScalarTy != DstTy) {
4072 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4073 Builder.buildTrunc(DstReg, Reg);
4074 MI.eraseFromParent();
4075 return;
4076 }
4078}
4079
4082 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4083 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4084 // This combine tries to find build_vector's which have every source element
4085 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4086 // the masked load scalarization is run late in the pipeline. There's already
4087 // a combine for a similar pattern starting from the extract, but that
4088 // doesn't attempt to do it if there are multiple uses of the build_vector,
4089 // which in this case is true. Starting the combine from the build_vector
4090 // feels more natural than trying to find sibling nodes of extracts.
4091 // E.g.
4092 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4093 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4094 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4095 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4096 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4097 // ==>
4098 // replace ext{1,2,3,4} with %s{1,2,3,4}
4099
4100 Register DstReg = MI.getOperand(0).getReg();
4101 LLT DstTy = MRI.getType(DstReg);
4102 unsigned NumElts = DstTy.getNumElements();
4103
4104 SmallBitVector ExtractedElts(NumElts);
4105 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4106 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4107 return false;
4108 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4109 if (!Cst)
4110 return false;
4111 unsigned Idx = Cst->getZExtValue();
4112 if (Idx >= NumElts)
4113 return false; // Out of range.
4114 ExtractedElts.set(Idx);
4115 SrcDstPairs.emplace_back(
4116 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4117 }
4118 // Match if every element was extracted.
4119 return ExtractedElts.all();
4120}
4121
4124 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4125 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4126 for (auto &Pair : SrcDstPairs) {
4127 auto *ExtMI = Pair.second;
4128 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4129 ExtMI->eraseFromParent();
4130 }
4131 MI.eraseFromParent();
4132}
4133
4135 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4136 applyBuildFnNoErase(MI, MatchInfo);
4137 MI.eraseFromParent();
4138}
4139
4141 BuildFnTy &MatchInfo) {
4144 MatchInfo(Builder);
4145 Root->eraseFromParent();
4146}
4147
4149 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4150 MatchInfo(Builder);
4151}
4152
4154 BuildFnTy &MatchInfo) {
4155 assert(MI.getOpcode() == TargetOpcode::G_OR);
4156
4157 Register Dst = MI.getOperand(0).getReg();
4158 LLT Ty = MRI.getType(Dst);
4159 unsigned BitWidth = Ty.getScalarSizeInBits();
4160
4161 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4162 unsigned FshOpc = 0;
4163
4164 // Match (or (shl ...), (lshr ...)).
4165 if (!mi_match(Dst, MRI,
4166 // m_GOr() handles the commuted version as well.
4167 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4168 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4169 return false;
4170
4171 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4172 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4173 int64_t CstShlAmt, CstLShrAmt;
4174 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4175 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4176 CstShlAmt + CstLShrAmt == BitWidth) {
4177 FshOpc = TargetOpcode::G_FSHR;
4178 Amt = LShrAmt;
4179
4180 } else if (mi_match(LShrAmt, MRI,
4182 ShlAmt == Amt) {
4183 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4184 FshOpc = TargetOpcode::G_FSHL;
4185
4186 } else if (mi_match(ShlAmt, MRI,
4188 LShrAmt == Amt) {
4189 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4190 FshOpc = TargetOpcode::G_FSHR;
4191
4192 } else {
4193 return false;
4194 }
4195
4196 LLT AmtTy = MRI.getType(Amt);
4197 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4198 return false;
4199
4200 MatchInfo = [=](MachineIRBuilder &B) {
4201 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4202 };
4203 return true;
4204}
4205
4206/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4208 unsigned Opc = MI.getOpcode();
4209 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4210 Register X = MI.getOperand(1).getReg();
4211 Register Y = MI.getOperand(2).getReg();
4212 if (X != Y)
4213 return false;
4214 unsigned RotateOpc =
4215 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4216 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4217}
4218
4220 unsigned Opc = MI.getOpcode();
4221 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4222 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4224 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4225 : TargetOpcode::G_ROTR));
4226 MI.removeOperand(2);
4228}
4229
4230// Fold (rot x, c) -> (rot x, c % BitSize)
4232 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4233 MI.getOpcode() == TargetOpcode::G_ROTR);
4234 unsigned Bitsize =
4235 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4236 Register AmtReg = MI.getOperand(2).getReg();
4237 bool OutOfRange = false;
4238 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4239 if (auto *CI = dyn_cast<ConstantInt>(C))
4240 OutOfRange |= CI->getValue().uge(Bitsize);
4241 return true;
4242 };
4243 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4244}
4245
4247 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4248 MI.getOpcode() == TargetOpcode::G_ROTR);
4249 unsigned Bitsize =
4250 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4251 Register Amt = MI.getOperand(2).getReg();
4252 LLT AmtTy = MRI.getType(Amt);
4253 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4254 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4256 MI.getOperand(2).setReg(Amt);
4258}
4259
4261 int64_t &MatchInfo) {
4262 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4263 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4264 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4265 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4266 std::optional<bool> KnownVal;
4267 switch (Pred) {
4268 default:
4269 llvm_unreachable("Unexpected G_ICMP predicate?");
4270 case CmpInst::ICMP_EQ:
4271 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4272 break;
4273 case CmpInst::ICMP_NE:
4274 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4275 break;
4276 case CmpInst::ICMP_SGE:
4277 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4278 break;
4279 case CmpInst::ICMP_SGT:
4280 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4281 break;
4282 case CmpInst::ICMP_SLE:
4283 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4284 break;
4285 case CmpInst::ICMP_SLT:
4286 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4287 break;
4288 case CmpInst::ICMP_UGE:
4289 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4290 break;
4291 case CmpInst::ICMP_UGT:
4292 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4293 break;
4294 case CmpInst::ICMP_ULE:
4295 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4296 break;
4297 case CmpInst::ICMP_ULT:
4298 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4299 break;
4300 }
4301 if (!KnownVal)
4302 return false;
4303 MatchInfo =
4304 *KnownVal
4306 /*IsVector = */
4307 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4308 /* IsFP = */ false)
4309 : 0;
4310 return true;
4311}
4312
4314 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4315 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4316 // Given:
4317 //
4318 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4319 // %cmp = G_ICMP ne %x, 0
4320 //
4321 // Or:
4322 //
4323 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4324 // %cmp = G_ICMP eq %x, 1
4325 //
4326 // We can replace %cmp with %x assuming true is 1 on the target.
4327 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4328 if (!CmpInst::isEquality(Pred))
4329 return false;
4330 Register Dst = MI.getOperand(0).getReg();
4331 LLT DstTy = MRI.getType(Dst);
4333 /* IsFP = */ false) != 1)
4334 return false;
4335 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4336 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4337 return false;
4338 Register LHS = MI.getOperand(2).getReg();
4339 auto KnownLHS = KB->getKnownBits(LHS);
4340 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4341 return false;
4342 // Make sure replacing Dst with the LHS is a legal operation.
4343 LLT LHSTy = MRI.getType(LHS);
4344 unsigned LHSSize = LHSTy.getSizeInBits();
4345 unsigned DstSize = DstTy.getSizeInBits();
4346 unsigned Op = TargetOpcode::COPY;
4347 if (DstSize != LHSSize)
4348 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4349 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4350 return false;
4351 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4352 return true;
4353}
4354
4355// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4357 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4358 assert(MI.getOpcode() == TargetOpcode::G_AND);
4359
4360 // Ignore vector types to simplify matching the two constants.
4361 // TODO: do this for vectors and scalars via a demanded bits analysis.
4362 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4363 if (Ty.isVector())
4364 return false;
4365
4366 Register Src;
4367 Register AndMaskReg;
4368 int64_t AndMaskBits;
4369 int64_t OrMaskBits;
4370 if (!mi_match(MI, MRI,
4371 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4372 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4373 return false;
4374
4375 // Check if OrMask could turn on any bits in Src.
4376 if (AndMaskBits & OrMaskBits)
4377 return false;
4378
4379 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4381 // Canonicalize the result to have the constant on the RHS.
4382 if (MI.getOperand(1).getReg() == AndMaskReg)
4383 MI.getOperand(2).setReg(AndMaskReg);
4384 MI.getOperand(1).setReg(Src);
4386 };
4387 return true;
4388}
4389
4390/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4392 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4393 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4394 Register Dst = MI.getOperand(0).getReg();
4395 Register Src = MI.getOperand(1).getReg();
4396 LLT Ty = MRI.getType(Src);
4398 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4399 return false;
4400 int64_t Width = MI.getOperand(2).getImm();
4401 Register ShiftSrc;
4402 int64_t ShiftImm;
4403 if (!mi_match(
4404 Src, MRI,
4405 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4406 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4407 return false;
4408 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4409 return false;
4410
4411 MatchInfo = [=](MachineIRBuilder &B) {
4412 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4413 auto Cst2 = B.buildConstant(ExtractTy, Width);
4414 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4415 };
4416 return true;
4417}
4418
4419/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4421 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4422 assert(MI.getOpcode() == TargetOpcode::G_AND);
4423 Register Dst = MI.getOperand(0).getReg();
4424 LLT Ty = MRI.getType(Dst);
4426 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4427 return false;
4428
4429 int64_t AndImm, LSBImm;
4430 Register ShiftSrc;
4431 const unsigned Size = Ty.getScalarSizeInBits();
4432 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4433 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4434 m_ICst(AndImm))))
4435 return false;
4436
4437 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4438 auto MaybeMask = static_cast<uint64_t>(AndImm);
4439 if (MaybeMask & (MaybeMask + 1))
4440 return false;
4441
4442 // LSB must fit within the register.
4443 if (static_cast<uint64_t>(LSBImm) >= Size)
4444 return false;
4445
4446 uint64_t Width = APInt(Size, AndImm).countr_one();
4447 MatchInfo = [=](MachineIRBuilder &B) {
4448 auto WidthCst = B.buildConstant(ExtractTy, Width);
4449 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4450 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4451 };
4452 return true;
4453}
4454
4456 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4457 const unsigned Opcode = MI.getOpcode();
4458 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4459
4460 const Register Dst = MI.getOperand(0).getReg();
4461
4462 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4463 ? TargetOpcode::G_SBFX
4464 : TargetOpcode::G_UBFX;
4465
4466 // Check if the type we would use for the extract is legal
4467 LLT Ty = MRI.getType(Dst);
4469 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4470 return false;
4471
4472 Register ShlSrc;
4473 int64_t ShrAmt;
4474 int64_t ShlAmt;
4475 const unsigned Size = Ty.getScalarSizeInBits();
4476
4477 // Try to match shr (shl x, c1), c2
4478 if (!mi_match(Dst, MRI,
4479 m_BinOp(Opcode,
4480 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4481 m_ICst(ShrAmt))))
4482 return false;
4483
4484 // Make sure that the shift sizes can fit a bitfield extract
4485 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4486 return false;
4487
4488 // Skip this combine if the G_SEXT_INREG combine could handle it
4489 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4490 return false;
4491
4492 // Calculate start position and width of the extract
4493 const int64_t Pos = ShrAmt - ShlAmt;
4494 const int64_t Width = Size - ShrAmt;
4495
4496 MatchInfo = [=](MachineIRBuilder &B) {
4497 auto WidthCst = B.buildConstant(ExtractTy, Width);
4498 auto PosCst = B.buildConstant(ExtractTy, Pos);
4499 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4500 };
4501 return true;
4502}
4503
4505 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4506 const unsigned Opcode = MI.getOpcode();
4507 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4508
4509 const Register Dst = MI.getOperand(0).getReg();
4510 LLT Ty = MRI.getType(Dst);
4512 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4513 return false;
4514
4515 // Try to match shr (and x, c1), c2
4516 Register AndSrc;
4517 int64_t ShrAmt;
4518 int64_t SMask;
4519 if (!mi_match(Dst, MRI,
4520 m_BinOp(Opcode,
4521 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4522 m_ICst(ShrAmt))))
4523 return false;
4524
4525 const unsigned Size = Ty.getScalarSizeInBits();
4526 if (ShrAmt < 0 || ShrAmt >= Size)
4527 return false;
4528
4529 // If the shift subsumes the mask, emit the 0 directly.
4530 if (0 == (SMask >> ShrAmt)) {
4531 MatchInfo = [=](MachineIRBuilder &B) {
4532 B.buildConstant(Dst, 0);
4533 };
4534 return true;
4535 }
4536
4537 // Check that ubfx can do the extraction, with no holes in the mask.
4538 uint64_t UMask = SMask;
4539 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4540 UMask &= maskTrailingOnes<uint64_t>(Size);
4541 if (!isMask_64(UMask))
4542 return false;
4543
4544 // Calculate start position and width of the extract.
4545 const int64_t Pos = ShrAmt;
4546 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4547
4548 // It's preferable to keep the shift, rather than form G_SBFX.
4549 // TODO: remove the G_AND via demanded bits analysis.
4550 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4551 return false;
4552
4553 MatchInfo = [=](MachineIRBuilder &B) {
4554 auto WidthCst = B.buildConstant(ExtractTy, Width);
4555 auto PosCst = B.buildConstant(ExtractTy, Pos);
4556 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4557 };
4558 return true;
4559}
4560
4561bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4562 MachineInstr &MI) {
4563 auto &PtrAdd = cast<GPtrAdd>(MI);
4564
4565 Register Src1Reg = PtrAdd.getBaseReg();
4566 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4567 if (!Src1Def)
4568 return false;
4569
4570 Register Src2Reg = PtrAdd.getOffsetReg();
4571
4572 if (MRI.hasOneNonDBGUse(Src1Reg))
4573 return false;
4574
4575 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4576 if (!C1)
4577 return false;
4578 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4579 if (!C2)
4580 return false;
4581
4582 const APInt &C1APIntVal = *C1;
4583 const APInt &C2APIntVal = *C2;
4584 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4585
4586 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4587 // This combine may end up running before ptrtoint/inttoptr combines
4588 // manage to eliminate redundant conversions, so try to look through them.
4589 MachineInstr *ConvUseMI = &UseMI;
4590 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4591 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4592 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4593 Register DefReg = ConvUseMI->getOperand(0).getReg();
4594 if (!MRI.hasOneNonDBGUse(DefReg))
4595 break;
4596 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4597 ConvUseOpc = ConvUseMI->getOpcode();
4598 }
4599 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4600 if (!LdStMI)
4601 continue;
4602 // Is x[offset2] already not a legal addressing mode? If so then
4603 // reassociating the constants breaks nothing (we test offset2 because
4604 // that's the one we hope to fold into the load or store).
4606 AM.HasBaseReg = true;
4607 AM.BaseOffs = C2APIntVal.getSExtValue();
4608 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4609 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4610 PtrAdd.getMF()->getFunction().getContext());
4611 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4612 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4613 AccessTy, AS))
4614 continue;
4615
4616 // Would x[offset1+offset2] still be a legal addressing mode?
4617 AM.BaseOffs = CombinedValue;
4618 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4619 AccessTy, AS))
4620 return true;
4621 }
4622
4623 return false;
4624}
4625
4627 MachineInstr *RHS,
4628 BuildFnTy &MatchInfo) {
4629 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4630 Register Src1Reg = MI.getOperand(1).getReg();
4631 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4632 return false;
4633 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4634 if (!C2)
4635 return false;
4636
4637 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4638 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4639
4640 auto NewBase =
4641 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4643 MI.getOperand(1).setReg(NewBase.getReg(0));
4644 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4646 };
4647 return !reassociationCanBreakAddressingModePattern(MI);
4648}
4649
4651 MachineInstr *LHS,
4652 MachineInstr *RHS,
4653 BuildFnTy &MatchInfo) {
4654 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4655 // if and only if (G_PTR_ADD X, C) has one use.
4656 Register LHSBase;
4657 std::optional<ValueAndVReg> LHSCstOff;
4658 if (!mi_match(MI.getBaseReg(), MRI,
4659 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4660 return false;
4661
4662 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4663 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4664 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4665 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4666 // doesn't happen.
4667 LHSPtrAdd->moveBefore(&MI);
4668 Register RHSReg = MI.getOffsetReg();
4669 // set VReg will cause type mismatch if it comes from extend/trunc
4670 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4672 MI.getOperand(2).setReg(NewCst.getReg(0));
4674 Observer.changingInstr(*LHSPtrAdd);
4675 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4676 Observer.changedInstr(*LHSPtrAdd);
4677 };
4678 return !reassociationCanBreakAddressingModePattern(MI);
4679}
4680
4682 MachineInstr *LHS,
4683 MachineInstr *RHS,
4684 BuildFnTy &MatchInfo) {
4685 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4686 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4687 if (!LHSPtrAdd)
4688 return false;
4689
4690 Register Src2Reg = MI.getOperand(2).getReg();
4691 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4692 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4693 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4694 if (!C1)
4695 return false;
4696 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4697 if (!C2)
4698 return false;
4699
4700 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4701 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4703 MI.getOperand(1).setReg(LHSSrc1);
4704 MI.getOperand(2).setReg(NewCst.getReg(0));
4706 };
4707 return !reassociationCanBreakAddressingModePattern(MI);
4708}
4709
4711 BuildFnTy &MatchInfo) {
4712 auto &PtrAdd = cast<GPtrAdd>(MI);
4713 // We're trying to match a few pointer computation patterns here for
4714 // re-association opportunities.
4715 // 1) Isolating a constant operand to be on the RHS, e.g.:
4716 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4717 //
4718 // 2) Folding two constants in each sub-tree as long as such folding
4719 // doesn't break a legal addressing mode.
4720 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4721 //
4722 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4723 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4724 // iif (G_PTR_ADD X, C) has one use.
4725 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4726 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4727
4728 // Try to match example 2.
4729 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4730 return true;
4731
4732 // Try to match example 3.
4733 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4734 return true;
4735
4736 // Try to match example 1.
4737 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4738 return true;
4739
4740 return false;
4741}
4743 Register OpLHS, Register OpRHS,
4744 BuildFnTy &MatchInfo) {
4745 LLT OpRHSTy = MRI.getType(OpRHS);
4746 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4747
4748 if (OpLHSDef->getOpcode() != Opc)
4749 return false;
4750
4751 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4752 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4753 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4754
4755 // If the inner op is (X op C), pull the constant out so it can be folded with
4756 // other constants in the expression tree. Folding is not guaranteed so we
4757 // might have (C1 op C2). In that case do not pull a constant out because it
4758 // won't help and can lead to infinite loops.
4761 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4762 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4763 MatchInfo = [=](MachineIRBuilder &B) {
4764 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4765 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4766 };
4767 return true;
4768 }
4769 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4770 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4771 // iff (op x, c1) has one use
4772 MatchInfo = [=](MachineIRBuilder &B) {
4773 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4774 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4775 };
4776 return true;
4777 }
4778 }
4779
4780 return false;
4781}
4782
4784 BuildFnTy &MatchInfo) {
4785 // We don't check if the reassociation will break a legal addressing mode
4786 // here since pointer arithmetic is handled by G_PTR_ADD.
4787 unsigned Opc = MI.getOpcode();
4788 Register DstReg = MI.getOperand(0).getReg();
4789 Register LHSReg = MI.getOperand(1).getReg();
4790 Register RHSReg = MI.getOperand(2).getReg();
4791
4792 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4793 return true;
4794 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4795 return true;
4796 return false;
4797}
4798
4800 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4801 Register SrcOp = MI.getOperand(1).getReg();
4802
4803 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4804 MatchInfo = *MaybeCst;
4805 return true;
4806 }
4807
4808 return false;
4809}
4810
4812 Register Op1 = MI.getOperand(1).getReg();
4813 Register Op2 = MI.getOperand(2).getReg();
4814 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4815 if (!MaybeCst)
4816 return false;
4817 MatchInfo = *MaybeCst;
4818 return true;
4819}
4820
4822 Register Op1 = MI.getOperand(1).getReg();
4823 Register Op2 = MI.getOperand(2).getReg();
4824 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4825 if (!MaybeCst)
4826 return false;
4827 MatchInfo =
4828 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4829 return true;
4830}
4831
4833 ConstantFP *&MatchInfo) {
4834 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4835 MI.getOpcode() == TargetOpcode::G_FMAD);
4836 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4837
4838 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4839 if (!Op3Cst)
4840 return false;
4841
4842 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4843 if (!Op2Cst)
4844 return false;
4845
4846 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4847 if (!Op1Cst)
4848 return false;
4849
4850 APFloat Op1F = Op1Cst->getValueAPF();
4851 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4853 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4854 return true;
4855}
4856
4858 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4859 // Look for a binop feeding into an AND with a mask:
4860 //
4861 // %add = G_ADD %lhs, %rhs
4862 // %and = G_AND %add, 000...11111111
4863 //
4864 // Check if it's possible to perform the binop at a narrower width and zext
4865 // back to the original width like so:
4866 //
4867 // %narrow_lhs = G_TRUNC %lhs
4868 // %narrow_rhs = G_TRUNC %rhs
4869 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4870 // %new_add = G_ZEXT %narrow_add
4871 // %and = G_AND %new_add, 000...11111111
4872 //
4873 // This can allow later combines to eliminate the G_AND if it turns out
4874 // that the mask is irrelevant.
4875 assert(MI.getOpcode() == TargetOpcode::G_AND);
4876 Register Dst = MI.getOperand(0).getReg();
4877 Register AndLHS = MI.getOperand(1).getReg();
4878 Register AndRHS = MI.getOperand(2).getReg();
4879 LLT WideTy = MRI.getType(Dst);
4880
4881 // If the potential binop has more than one use, then it's possible that one
4882 // of those uses will need its full width.
4883 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4884 return false;
4885
4886 // Check if the LHS feeding the AND is impacted by the high bits that we're
4887 // masking out.
4888 //
4889 // e.g. for 64-bit x, y:
4890 //
4891 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4892 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4893 if (!LHSInst)
4894 return false;
4895 unsigned LHSOpc = LHSInst->getOpcode();
4896 switch (LHSOpc) {
4897 default:
4898 return false;
4899 case TargetOpcode::G_ADD:
4900 case TargetOpcode::G_SUB:
4901 case TargetOpcode::G_MUL:
4902 case TargetOpcode::G_AND:
4903 case TargetOpcode::G_OR:
4904 case TargetOpcode::G_XOR:
4905 break;
4906 }
4907
4908 // Find the mask on the RHS.
4909 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4910 if (!Cst)
4911 return false;
4912 auto Mask = Cst->Value;
4913 if (!Mask.isMask())
4914 return false;
4915
4916 // No point in combining if there's nothing to truncate.
4917 unsigned NarrowWidth = Mask.countr_one();
4918 if (NarrowWidth == WideTy.getSizeInBits())
4919 return false;
4920 LLT NarrowTy = LLT::scalar(NarrowWidth);
4921
4922 // Check if adding the zext + truncates could be harmful.
4923 auto &MF = *MI.getMF();
4924 const auto &TLI = getTargetLowering();
4925 LLVMContext &Ctx = MF.getFunction().getContext();
4926 auto &DL = MF.getDataLayout();
4927 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4928 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4929 return false;
4930 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4931 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4932 return false;
4933 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4934 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4935 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4936 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4937 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4938 auto NarrowBinOp =
4939 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4940 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4942 MI.getOperand(1).setReg(Ext.getReg(0));
4944 };
4945 return true;
4946}
4947
4949 unsigned Opc = MI.getOpcode();
4950 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4951
4952 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4953 return false;
4954
4955 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4957 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4958 : TargetOpcode::G_SADDO;
4959 MI.setDesc(Builder.getTII().get(NewOpc));
4960 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4962 };
4963 return true;
4964}
4965
4967 // (G_*MULO x, 0) -> 0 + no carry out
4968 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
4969 MI.getOpcode() == TargetOpcode::G_SMULO);
4970 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
4971 return false;
4972 Register Dst = MI.getOperand(0).getReg();
4973 Register Carry = MI.getOperand(1).getReg();
4976 return false;
4977 MatchInfo = [=](MachineIRBuilder &B) {
4978 B.buildConstant(Dst, 0);
4979 B.buildConstant(Carry, 0);
4980 };
4981 return true;
4982}
4983
4985 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
4986 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
4987 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
4988 MI.getOpcode() == TargetOpcode::G_SADDE ||
4989 MI.getOpcode() == TargetOpcode::G_USUBE ||
4990 MI.getOpcode() == TargetOpcode::G_SSUBE);
4991 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
4992 return false;
4993 MatchInfo = [&](MachineIRBuilder &B) {
4994 unsigned NewOpcode;
4995 switch (MI.getOpcode()) {
4996 case TargetOpcode::G_UADDE:
4997 NewOpcode = TargetOpcode::G_UADDO;
4998 break;
4999 case TargetOpcode::G_SADDE:
5000 NewOpcode = TargetOpcode::G_SADDO;
5001 break;
5002 case TargetOpcode::G_USUBE:
5003 NewOpcode = TargetOpcode::G_USUBO;
5004 break;
5005 case TargetOpcode::G_SSUBE:
5006 NewOpcode = TargetOpcode::G_SSUBO;
5007 break;
5008 }
5010 MI.setDesc(B.getTII().get(NewOpcode));
5011 MI.removeOperand(4);
5013 };
5014 return true;
5015}
5016
5018 BuildFnTy &MatchInfo) {
5019 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5020 Register Dst = MI.getOperand(0).getReg();
5021 // (x + y) - z -> x (if y == z)
5022 // (x + y) - z -> y (if x == z)
5023 Register X, Y, Z;
5024 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5025 Register ReplaceReg;
5026 int64_t CstX, CstY;
5027 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5029 ReplaceReg = X;
5030 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5032 ReplaceReg = Y;
5033 if (ReplaceReg) {
5034 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5035 return true;
5036 }
5037 }
5038
5039 // x - (y + z) -> 0 - y (if x == z)
5040 // x - (y + z) -> 0 - z (if x == y)
5041 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5042 Register ReplaceReg;
5043 int64_t CstX;
5044 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5046 ReplaceReg = Y;
5047 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5049 ReplaceReg = Z;
5050 if (ReplaceReg) {
5051 MatchInfo = [=](MachineIRBuilder &B) {
5052 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5053 B.buildSub(Dst, Zero, ReplaceReg);
5054 };
5055 return true;
5056 }
5057 }
5058 return false;
5059}
5060
5062 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5063 auto &UDiv = cast<GenericMachineInstr>(MI);
5064 Register Dst = UDiv.getReg(0);
5065 Register LHS = UDiv.getReg(1);
5066 Register RHS = UDiv.getReg(2);
5067 LLT Ty = MRI.getType(Dst);
5068 LLT ScalarTy = Ty.getScalarType();
5069 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5071 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5072
5073 unsigned KnownLeadingZeros =
5075 auto &MIB = Builder;
5076
5077 bool UseNPQ = false;
5078 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5079
5080 auto BuildUDIVPattern = [&](const Constant *C) {
5081 auto *CI = cast<ConstantInt>(C);
5082 const APInt &Divisor = CI->getValue();
5083
5084 bool SelNPQ = false;
5085 APInt Magic(Divisor.getBitWidth(), 0);
5086 unsigned PreShift = 0, PostShift = 0;
5087
5088 // Magic algorithm doesn't work for division by 1. We need to emit a select
5089 // at the end.
5090 // TODO: Use undef values for divisor of 1.
5091 if (!Divisor.isOne()) {
5092
5093 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5094 // in the dividend exceeds the leading zeros for the divisor.
5097 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5098
5099 Magic = std::move(magics.Magic);
5100
5101 assert(magics.PreShift < Divisor.getBitWidth() &&
5102 "We shouldn't generate an undefined shift!");
5103 assert(magics.PostShift < Divisor.getBitWidth() &&
5104 "We shouldn't generate an undefined shift!");
5105 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5106 PreShift = magics.PreShift;
5107 PostShift = magics.PostShift;
5108 SelNPQ = magics.IsAdd;
5109 }
5110
5111 PreShifts.push_back(
5112 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5113 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5114 NPQFactors.push_back(
5115 MIB.buildConstant(ScalarTy,
5116 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5117 : APInt::getZero(EltBits))
5118 .getReg(0));
5119 PostShifts.push_back(
5120 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5121 UseNPQ |= SelNPQ;
5122 return true;
5123 };
5124
5125 // Collect the shifts/magic values from each element.
5126 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5127 (void)Matched;
5128 assert(Matched && "Expected unary predicate match to succeed");
5129
5130 Register PreShift, PostShift, MagicFactor, NPQFactor;
5131 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5132 if (RHSDef) {
5133 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5134 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5135 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5136 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5137 } else {
5139 "Non-build_vector operation should have been a scalar");
5140 PreShift = PreShifts[0];
5141 MagicFactor = MagicFactors[0];
5142 PostShift = PostShifts[0];
5143 }
5144
5145 Register Q = LHS;
5146 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5147
5148 // Multiply the numerator (operand 0) by the magic value.
5149 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5150
5151 if (UseNPQ) {
5152 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5153
5154 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5155 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5156 if (Ty.isVector())
5157 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5158 else
5159 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5160
5161 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5162 }
5163
5164 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5165 auto One = MIB.buildConstant(Ty, 1);
5166 auto IsOne = MIB.buildICmp(
5168 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5169 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5170}
5171
5173 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5174 Register Dst = MI.getOperand(0).getReg();
5175 Register RHS = MI.getOperand(2).getReg();
5176 LLT DstTy = MRI.getType(Dst);
5177 auto *RHSDef = MRI.getVRegDef(RHS);
5178 if (!isConstantOrConstantVector(*RHSDef, MRI))
5179 return false;
5180
5181 auto &MF = *MI.getMF();
5182 AttributeList Attr = MF.getFunction().getAttributes();
5183 const auto &TLI = getTargetLowering();
5184 LLVMContext &Ctx = MF.getFunction().getContext();
5185 auto &DL = MF.getDataLayout();
5186 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5187 return false;
5188
5189 // Don't do this for minsize because the instruction sequence is usually
5190 // larger.
5191 if (MF.getFunction().hasMinSize())
5192 return false;
5193
5194 // Don't do this if the types are not going to be legal.
5195 if (LI) {
5196 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5197 return false;
5198 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5199 return false;
5201 {TargetOpcode::G_ICMP,
5202 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5203 DstTy}}))
5204 return false;
5205 }
5206
5207 return matchUnaryPredicate(
5208 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5209}
5210
5212 auto *NewMI = buildUDivUsingMul(MI);
5213 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5214}
5215
5217 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5218 Register Dst = MI.getOperand(0).getReg();
5219 Register RHS = MI.getOperand(2).getReg();
5220 LLT DstTy = MRI.getType(Dst);
5221
5222 auto &MF = *MI.getMF();
5223 AttributeList Attr = MF.getFunction().getAttributes();
5224 const auto &TLI = getTargetLowering();
5225 LLVMContext &Ctx = MF.getFunction().getContext();
5226 auto &DL = MF.getDataLayout();
5227 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5228 return false;
5229
5230 // Don't do this for minsize because the instruction sequence is usually
5231 // larger.
5232 if (MF.getFunction().hasMinSize())
5233 return false;
5234
5235 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5236 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5237 return matchUnaryPredicate(
5238 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5239 }
5240
5241 // Don't support the general case for now.
5242 return false;
5243}
5244
5246 auto *NewMI = buildSDivUsingMul(MI);
5247 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5248}
5249
5251 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5252 auto &SDiv = cast<GenericMachineInstr>(MI);
5253 Register Dst = SDiv.getReg(0);
5254 Register LHS = SDiv.getReg(1);
5255 Register RHS = SDiv.getReg(2);
5256 LLT Ty = MRI.getType(Dst);
5257 LLT ScalarTy = Ty.getScalarType();
5259 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5260 auto &MIB = Builder;
5261
5262 bool UseSRA = false;
5263 SmallVector<Register, 16> Shifts, Factors;
5264
5265 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5266 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5267
5268 auto BuildSDIVPattern = [&](const Constant *C) {
5269 // Don't recompute inverses for each splat element.
5270 if (IsSplat && !Factors.empty()) {
5271 Shifts.push_back(Shifts[0]);
5272 Factors.push_back(Factors[0]);
5273 return true;
5274 }
5275
5276 auto *CI = cast<ConstantInt>(C);
5277 APInt Divisor = CI->getValue();
5278 unsigned Shift = Divisor.countr_zero();
5279 if (Shift) {
5280 Divisor.ashrInPlace(Shift);
5281 UseSRA = true;
5282 }
5283
5284 // Calculate the multiplicative inverse modulo BW.
5285 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5286 APInt Factor = Divisor.multiplicativeInverse();
5287 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5288 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5289 return true;
5290 };
5291
5292 // Collect all magic values from the build vector.
5293 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5294 (void)Matched;
5295 assert(Matched && "Expected unary predicate match to succeed");
5296
5297 Register Shift, Factor;
5298 if (Ty.isVector()) {
5299 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5300 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5301 } else {
5302 Shift = Shifts[0];
5303 Factor = Factors[0];
5304 }
5305
5306 Register Res = LHS;
5307
5308 if (UseSRA)
5309 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5310
5311 return MIB.buildMul(Ty, Res, Factor);
5312}
5313
5315 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5316 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5317 "Expected SDIV or UDIV");
5318 auto &Div = cast<GenericMachineInstr>(MI);
5319 Register RHS = Div.getReg(2);
5320 auto MatchPow2 = [&](const Constant *C) {
5321 auto *CI = dyn_cast<ConstantInt>(C);
5322 return CI && (CI->getValue().isPowerOf2() ||
5323 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5324 };
5325 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5326}
5327
5329 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5330 auto &SDiv = cast<GenericMachineInstr>(MI);
5331 Register Dst = SDiv.getReg(0);
5332 Register LHS = SDiv.getReg(1);
5333 Register RHS = SDiv.getReg(2);
5334 LLT Ty = MRI.getType(Dst);
5336 LLT CCVT =
5337 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5338
5339 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5340 // to the following version:
5341 //
5342 // %c1 = G_CTTZ %rhs
5343 // %inexact = G_SUB $bitwidth, %c1
5344 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5345 // %lshr = G_LSHR %sign, %inexact
5346 // %add = G_ADD %lhs, %lshr
5347 // %ashr = G_ASHR %add, %c1
5348 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5349 // %zero = G_CONSTANT $0
5350 // %neg = G_NEG %ashr
5351 // %isneg = G_ICMP SLT %rhs, %zero
5352 // %res = G_SELECT %isneg, %neg, %ashr
5353
5354 unsigned BitWidth = Ty.getScalarSizeInBits();
5355 auto Zero = Builder.buildConstant(Ty, 0);
5356
5357 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5358 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5359 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5360 // Splat the sign bit into the register
5361 auto Sign = Builder.buildAShr(
5362 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5363
5364 // Add (LHS < 0) ? abs2 - 1 : 0;
5365 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5366 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5367 auto AShr = Builder.buildAShr(Ty, Add, C1);
5368
5369 // Special case: (sdiv X, 1) -> X
5370 // Special Case: (sdiv X, -1) -> 0-X
5371 auto One = Builder.buildConstant(Ty, 1);
5372 auto MinusOne = Builder.buildConstant(Ty, -1);
5373 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5374 auto IsMinusOne =
5376 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5377 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5378
5379 // If divided by a positive value, we're done. Otherwise, the result must be
5380 // negated.
5381 auto Neg = Builder.buildNeg(Ty, AShr);
5382 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5383 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5384 MI.eraseFromParent();
5385}
5386
5388 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5389 auto &UDiv = cast<GenericMachineInstr>(MI);
5390 Register Dst = UDiv.getReg(0);
5391 Register LHS = UDiv.getReg(1);
5392 Register RHS = UDiv.getReg(2);
5393 LLT Ty = MRI.getType(Dst);
5395
5396 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5397 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5398 MI.eraseFromParent();
5399}
5400
5402 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5403 Register RHS = MI.getOperand(2).getReg();
5404 Register Dst = MI.getOperand(0).getReg();
5405 LLT Ty = MRI.getType(Dst);
5407 auto MatchPow2ExceptOne = [&](const Constant *C) {
5408 if (auto *CI = dyn_cast<ConstantInt>(C))
5409 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5410 return false;
5411 };
5412 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5413 return false;
5414 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5415}
5416
5418 Register LHS = MI.getOperand(1).getReg();
5419 Register RHS = MI.getOperand(2).getReg();
5420 Register Dst = MI.getOperand(0).getReg();
5421 LLT Ty = MRI.getType(Dst);
5423 unsigned NumEltBits = Ty.getScalarSizeInBits();
5424
5425 auto LogBase2 = buildLogBase2(RHS, Builder);
5426 auto ShiftAmt =
5427 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5428 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5429 Builder.buildLShr(Dst, LHS, Trunc);
5430 MI.eraseFromParent();
5431}
5432
5434 BuildFnTy &MatchInfo) {
5435 unsigned Opc = MI.getOpcode();
5436 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5437 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5438 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5439
5440 Register Dst = MI.getOperand(0).getReg();
5441 Register X = MI.getOperand(1).getReg();
5442 Register Y = MI.getOperand(2).getReg();
5443 LLT Type = MRI.getType(Dst);
5444
5445 // fold (fadd x, fneg(y)) -> (fsub x, y)
5446 // fold (fadd fneg(y), x) -> (fsub x, y)
5447 // G_ADD is commutative so both cases are checked by m_GFAdd
5448 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5449 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5450 Opc = TargetOpcode::G_FSUB;
5451 }
5452 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5453 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5454 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5455 Opc = TargetOpcode::G_FADD;
5456 }
5457 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5458 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5459 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5460 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5461 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5462 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5463 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5464 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5465 // no opcode change
5466 } else
5467 return false;
5468
5469 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5471 MI.setDesc(B.getTII().get(Opc));
5472 MI.getOperand(1).setReg(X);
5473 MI.getOperand(2).setReg(Y);
5475 };
5476 return true;
5477}
5478
5480 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5481
5482 Register LHS = MI.getOperand(1).getReg();
5483 MatchInfo = MI.getOperand(2).getReg();
5484 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5485
5486 const auto LHSCst = Ty.isVector()
5487 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5489 if (!LHSCst)
5490 return false;
5491
5492 // -0.0 is always allowed
5493 if (LHSCst->Value.isNegZero())
5494 return true;
5495
5496 // +0.0 is only allowed if nsz is set.
5497 if (LHSCst->Value.isPosZero())
5498 return MI.getFlag(MachineInstr::FmNsz);
5499
5500 return false;
5501}
5502
5504 Register Dst = MI.getOperand(0).getReg();
5506 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5507 eraseInst(MI);
5508}
5509
5510/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5511/// due to global flags or MachineInstr flags.
5512static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5513 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5514 return false;
5515 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5516}
5517
5518static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5519 const MachineRegisterInfo &MRI) {
5520 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5521 MRI.use_instr_nodbg_end()) >
5522 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5523 MRI.use_instr_nodbg_end());
5524}
5525
5527 bool &AllowFusionGlobally,
5528 bool &HasFMAD, bool &Aggressive,
5529 bool CanReassociate) {
5530
5531 auto *MF = MI.getMF();
5532 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5533 const TargetOptions &Options = MF->getTarget().Options;
5534 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5535
5536 if (CanReassociate &&
5537 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5538 return false;
5539
5540 // Floating-point multiply-add with intermediate rounding.
5541 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5542 // Floating-point multiply-add without intermediate rounding.
5543 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5544 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5545 // No valid opcode, do not combine.
5546 if (!HasFMAD && !HasFMA)
5547 return false;
5548
5549 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5550 Options.UnsafeFPMath || HasFMAD;
5551 // If the addition is not contractable, do not combine.
5552 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5553 return false;
5554
5555 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5556 return true;
5557}
5558
5560 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5561 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5562
5563 bool AllowFusionGlobally, HasFMAD, Aggressive;
5564 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5565 return false;
5566
5567 Register Op1 = MI.getOperand(1).getReg();
5568 Register Op2 = MI.getOperand(2).getReg();
5571 unsigned PreferredFusedOpcode =
5572 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5573
5574 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5575 // prefer to fold the multiply with fewer uses.
5576 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5577 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5578 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5579 std::swap(LHS, RHS);
5580 }
5581
5582 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5583 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5584 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5585 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5586 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5587 {LHS.MI->getOperand(1).getReg(),
5588 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5589 };
5590 return true;
5591 }
5592
5593 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5594 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5595 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5596 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5597 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5598 {RHS.MI->getOperand(1).getReg(),
5599 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5600 };
5601 return true;
5602 }
5603
5604 return false;
5605}
5606
5608 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5609 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5610
5611 bool AllowFusionGlobally, HasFMAD, Aggressive;
5612 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5613 return false;
5614
5615 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5616 Register Op1 = MI.getOperand(1).getReg();
5617 Register Op2 = MI.getOperand(2).getReg();
5620 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5621
5622 unsigned PreferredFusedOpcode =
5623 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5624
5625 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5626 // prefer to fold the multiply with fewer uses.
5627 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5628 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5629 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5630 std::swap(LHS, RHS);
5631 }
5632
5633 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5634 MachineInstr *FpExtSrc;
5635 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5636 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5637 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5638 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5639 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5640 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5641 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5642 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5643 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5644 };
5645 return true;
5646 }
5647
5648 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5649 // Note: Commutes FADD operands.
5650 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5651 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5652 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5653 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5654 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5655 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5656 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5657 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5658 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5659 };
5660 return true;
5661 }
5662
5663 return false;
5664}
5665
5667 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5668 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5669
5670 bool AllowFusionGlobally, HasFMAD, Aggressive;
5671 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5672 return false;
5673
5674 Register Op1 = MI.getOperand(1).getReg();
5675 Register Op2 = MI.getOperand(2).getReg();
5678 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5679
5680 unsigned PreferredFusedOpcode =
5681 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5682
5683 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5684 // prefer to fold the multiply with fewer uses.
5685 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5686 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5687 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5688 std::swap(LHS, RHS);
5689 }
5690
5691 MachineInstr *FMA = nullptr;
5692 Register Z;
5693 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5694 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5695 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5696 TargetOpcode::G_FMUL) &&
5697 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5698 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5699 FMA = LHS.MI;
5700 Z = RHS.Reg;
5701 }
5702 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5703 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5704 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5705 TargetOpcode::G_FMUL) &&
5706 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5707 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5708 Z = LHS.Reg;
5709 FMA = RHS.MI;
5710 }
5711
5712 if (FMA) {
5713 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5714 Register X = FMA->getOperand(1).getReg();
5715 Register Y = FMA->getOperand(2).getReg();
5716 Register U = FMulMI->getOperand(1).getReg();
5717 Register V = FMulMI->getOperand(2).getReg();
5718
5719 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5720 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5721 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5722 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5723 {X, Y, InnerFMA});
5724 };
5725 return true;
5726 }
5727
5728 return false;
5729}
5730
5732 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5733 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5734
5735 bool AllowFusionGlobally, HasFMAD, Aggressive;
5736 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5737 return false;
5738
5739 if (!Aggressive)
5740 return false;
5741
5742 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5743 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5744 Register Op1 = MI.getOperand(1).getReg();
5745 Register Op2 = MI.getOperand(2).getReg();
5748
5749 unsigned PreferredFusedOpcode =
5750 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5751
5752 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5753 // prefer to fold the multiply with fewer uses.
5754 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5755 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5756 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5757 std::swap(LHS, RHS);
5758 }
5759
5760 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5761 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5763 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5764 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5765 Register InnerFMA =
5766 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5767 .getReg(0);
5768 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5769 {X, Y, InnerFMA});
5770 };
5771
5772 MachineInstr *FMulMI, *FMAMI;
5773 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5774 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5775 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5776 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5777 m_GFPExt(m_MInstr(FMulMI))) &&
5778 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5779 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5780 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5781 MatchInfo = [=](MachineIRBuilder &B) {
5782 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5783 FMulMI->getOperand(2).getReg(), RHS.Reg,
5784 LHS.MI->getOperand(1).getReg(),
5785 LHS.MI->getOperand(2).getReg(), B);
5786 };
5787 return true;
5788 }
5789
5790 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5791 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5792 // FIXME: This turns two single-precision and one double-precision
5793 // operation into two double-precision operations, which might not be
5794 // interesting for all targets, especially GPUs.
5795 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5796 FMAMI->getOpcode() == PreferredFusedOpcode) {
5797 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5798 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5799 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5800 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5801 MatchInfo = [=](MachineIRBuilder &B) {
5802 Register X = FMAMI->getOperand(1).getReg();
5803 Register Y = FMAMI->getOperand(2).getReg();
5804 X = B.buildFPExt(DstType, X).getReg(0);
5805 Y = B.buildFPExt(DstType, Y).getReg(0);
5806 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5807 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5808 };
5809
5810 return true;
5811 }
5812 }
5813
5814 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5815 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5816 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5817 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5818 m_GFPExt(m_MInstr(FMulMI))) &&
5819 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5820 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5821 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5822 MatchInfo = [=](MachineIRBuilder &B) {
5823 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5824 FMulMI->getOperand(2).getReg(), LHS.Reg,
5825 RHS.MI->getOperand(1).getReg(),
5826 RHS.MI->getOperand(2).getReg(), B);
5827 };
5828 return true;
5829 }
5830
5831 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5832 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5833 // FIXME: This turns two single-precision and one double-precision
5834 // operation into two double-precision operations, which might not be
5835 // interesting for all targets, especially GPUs.
5836 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5837 FMAMI->getOpcode() == PreferredFusedOpcode) {
5838 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5839 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5840 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5841 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5842 MatchInfo = [=](MachineIRBuilder &B) {
5843 Register X = FMAMI->getOperand(1).getReg();
5844 Register Y = FMAMI->getOperand(2).getReg();
5845 X = B.buildFPExt(DstType, X).getReg(0);
5846 Y = B.buildFPExt(DstType, Y).getReg(0);
5847 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5848 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5849 };
5850 return true;
5851 }
5852 }
5853
5854 return false;
5855}
5856
5858 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5859 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5860
5861 bool AllowFusionGlobally, HasFMAD, Aggressive;
5862 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5863 return false;
5864
5865 Register Op1 = MI.getOperand(1).getReg();
5866 Register Op2 = MI.getOperand(2).getReg();
5869 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5870
5871 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5872 // prefer to fold the multiply with fewer uses.
5873 int FirstMulHasFewerUses = true;
5874 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5875 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5876 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5877 FirstMulHasFewerUses = false;
5878
5879 unsigned PreferredFusedOpcode =
5880 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5881
5882 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5883 if (FirstMulHasFewerUses &&
5884 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5885 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5886 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5887 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5888 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5889 {LHS.MI->getOperand(1).getReg(),
5890 LHS.MI->getOperand(2).getReg(), NegZ});
5891 };
5892 return true;
5893 }
5894 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5895 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5896 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5897 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5898 Register NegY =
5899 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5900 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5901 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5902 };
5903 return true;
5904 }
5905
5906 return false;
5907}
5908
5910 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5911 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5912
5913 bool AllowFusionGlobally, HasFMAD, Aggressive;
5914 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5915 return false;
5916
5917 Register LHSReg = MI.getOperand(1).getReg();
5918 Register RHSReg = MI.getOperand(2).getReg();
5919 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5920
5921 unsigned PreferredFusedOpcode =
5922 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5923
5924 MachineInstr *FMulMI;
5925 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5926 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5927 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
5928 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5929 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5930 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5931 Register NegX =
5932 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5933 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5934 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5935 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
5936 };
5937 return true;
5938 }
5939
5940 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5941 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5942 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
5943 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5944 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5945 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5946 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5947 {FMulMI->getOperand(1).getReg(),
5948 FMulMI->getOperand(2).getReg(), LHSReg});
5949 };
5950 return true;
5951 }
5952
5953 return false;
5954}
5955
5957 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5958 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5959
5960 bool AllowFusionGlobally, HasFMAD, Aggressive;
5961 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5962 return false;
5963
5964 Register LHSReg = MI.getOperand(1).getReg();
5965 Register RHSReg = MI.getOperand(2).getReg();
5966 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5967
5968 unsigned PreferredFusedOpcode =
5969 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5970
5971 MachineInstr *FMulMI;
5972 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5973 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5974 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5975 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
5976 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5977 Register FpExtX =
5978 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5979 Register FpExtY =
5980 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5981 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5982 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5983 {FpExtX, FpExtY, NegZ});
5984 };
5985 return true;
5986 }
5987
5988 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5989 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5990 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5991 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
5992 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5993 Register FpExtY =
5994 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5995 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
5996 Register FpExtZ =
5997 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5998 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5999 {NegY, FpExtZ, LHSReg});
6000 };
6001 return true;
6002 }
6003
6004 return false;
6005}
6006
6008 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6009 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6010
6011 bool AllowFusionGlobally, HasFMAD, Aggressive;
6012 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6013 return false;
6014
6015 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6016 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6017 Register LHSReg = MI.getOperand(1).getReg();
6018 Register RHSReg = MI.getOperand(2).getReg();
6019
6020 unsigned PreferredFusedOpcode =
6021 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6022
6023 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6025 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6026 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6027 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6028 };
6029
6030 MachineInstr *FMulMI;
6031 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6032 // (fneg (fma (fpext x), (fpext y), z))
6033 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6034 // (fneg (fma (fpext x), (fpext y), z))
6035 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6036 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6037 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6038 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6039 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6040 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6042 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6043 FMulMI->getOperand(2).getReg(), RHSReg, B);
6044 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6045 };
6046 return true;
6047 }
6048
6049 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6050 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6051 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6052 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6053 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6054 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6055 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6056 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6057 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6058 FMulMI->getOperand(2).getReg(), LHSReg, B);
6059 };
6060 return true;
6061 }
6062
6063 return false;
6064}
6065
6067 unsigned &IdxToPropagate) {
6068 bool PropagateNaN;
6069 switch (MI.getOpcode()) {
6070 default:
6071 return false;
6072 case TargetOpcode::G_FMINNUM:
6073 case TargetOpcode::G_FMAXNUM:
6074 PropagateNaN = false;
6075 break;
6076 case TargetOpcode::G_FMINIMUM:
6077 case TargetOpcode::G_FMAXIMUM:
6078 PropagateNaN = true;
6079 break;
6080 }
6081
6082 auto MatchNaN = [&](unsigned Idx) {
6083 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6084 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6085 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6086 return false;
6087 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6088 return true;
6089 };
6090
6091 return MatchNaN(1) || MatchNaN(2);
6092}
6093
6095 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6096 Register LHS = MI.getOperand(1).getReg();
6097 Register RHS = MI.getOperand(2).getReg();
6098
6099 // Helper lambda to check for opportunities for
6100 // A + (B - A) -> B
6101 // (B - A) + A -> B
6102 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6103 Register Reg;
6104 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6105 Reg == MaybeSameReg;
6106 };
6107 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6108}
6109
6111 Register &MatchInfo) {
6112 // This combine folds the following patterns:
6113 //
6114 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6115 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6116 // into
6117 // x
6118 // if
6119 // k == sizeof(VecEltTy)/2
6120 // type(x) == type(dst)
6121 //
6122 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6123 // into
6124 // x
6125 // if
6126 // type(x) == type(dst)
6127
6128 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6129 LLT DstEltTy = DstVecTy.getElementType();
6130
6131 Register Lo, Hi;
6132
6133 if (mi_match(
6134 MI, MRI,
6136 MatchInfo = Lo;
6137 return MRI.getType(MatchInfo) == DstVecTy;
6138 }
6139
6140 std::optional<ValueAndVReg> ShiftAmount;
6141 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6142 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6143 if (mi_match(
6144 MI, MRI,
6145 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6146 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6147 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6148 MatchInfo = Lo;
6149 return MRI.getType(MatchInfo) == DstVecTy;
6150 }
6151 }
6152
6153 return false;
6154}
6155
6157 Register &MatchInfo) {
6158 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6159 // if type(x) == type(G_TRUNC)
6160 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6161 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6162 return false;
6163
6164 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6165}
6166
6168 Register &MatchInfo) {
6169 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6170 // y if K == size of vector element type
6171 std::optional<ValueAndVReg> ShiftAmt;
6172 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6174 m_GCst(ShiftAmt))))
6175 return false;
6176
6177 LLT MatchTy = MRI.getType(MatchInfo);
6178 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6179 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6180}
6181
6182unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6183 CmpInst::Predicate Pred, LLT DstTy,
6184 SelectPatternNaNBehaviour VsNaNRetVal) const {
6185 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6186 "Expected a NaN behaviour?");
6187 // Choose an opcode based off of legality or the behaviour when one of the
6188 // LHS/RHS may be NaN.
6189 switch (Pred) {
6190 default:
6191 return 0;
6192 case CmpInst::FCMP_UGT:
6193 case CmpInst::FCMP_UGE:
6194 case CmpInst::FCMP_OGT:
6195 case CmpInst::FCMP_OGE:
6196 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6197 return TargetOpcode::G_FMAXNUM;
6198 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6199 return TargetOpcode::G_FMAXIMUM;
6200 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6201 return TargetOpcode::G_FMAXNUM;
6202 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6203 return TargetOpcode::G_FMAXIMUM;
6204 return 0;
6205 case CmpInst::FCMP_ULT:
6206 case CmpInst::FCMP_ULE:
6207 case CmpInst::FCMP_OLT:
6208 case CmpInst::FCMP_OLE:
6209 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6210 return TargetOpcode::G_FMINNUM;
6211 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6212 return TargetOpcode::G_FMINIMUM;
6213 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6214 return TargetOpcode::G_FMINNUM;
6215 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6216 return 0;
6217 return TargetOpcode::G_FMINIMUM;
6218 }
6219}
6220
6221CombinerHelper::SelectPatternNaNBehaviour
6222CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6223 bool IsOrderedComparison) const {
6224 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6225 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6226 // Completely unsafe.
6227 if (!LHSSafe && !RHSSafe)
6228 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6229 if (LHSSafe && RHSSafe)
6230 return SelectPatternNaNBehaviour::RETURNS_ANY;
6231 // An ordered comparison will return false when given a NaN, so it
6232 // returns the RHS.
6233 if (IsOrderedComparison)
6234 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6235 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6236 // An unordered comparison will return true when given a NaN, so it
6237 // returns the LHS.
6238 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6239 : SelectPatternNaNBehaviour::RETURNS_NAN;
6240}
6241
6242bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6243 Register TrueVal, Register FalseVal,
6244 BuildFnTy &MatchInfo) {
6245 // Match: select (fcmp cond x, y) x, y
6246 // select (fcmp cond x, y) y, x
6247 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6248 LLT DstTy = MRI.getType(Dst);
6249 // Bail out early on pointers, since we'll never want to fold to a min/max.
6250 if (DstTy.isPointer())
6251 return false;
6252 // Match a floating point compare with a less-than/greater-than predicate.
6253 // TODO: Allow multiple users of the compare if they are all selects.
6254 CmpInst::Predicate Pred;
6255 Register CmpLHS, CmpRHS;
6256 if (!mi_match(Cond, MRI,
6258 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6259 CmpInst::isEquality(Pred))
6260 return false;
6261 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6262 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6263 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6264 return false;
6265 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6266 std::swap(CmpLHS, CmpRHS);
6267 Pred = CmpInst::getSwappedPredicate(Pred);
6268 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6269 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6270 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6271 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6272 }
6273 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6274 return false;
6275 // Decide what type of max/min this should be based off of the predicate.
6276 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6277 if (!Opc || !isLegal({Opc, {DstTy}}))
6278 return false;
6279 // Comparisons between signed zero and zero may have different results...
6280 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6281 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6282 // We don't know if a comparison between two 0s will give us a consistent
6283 // result. Be conservative and only proceed if at least one side is
6284 // non-zero.
6285 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6286 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6287 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6288 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6289 return false;
6290 }
6291 }
6292 MatchInfo = [=](MachineIRBuilder &B) {
6293 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6294 };
6295 return true;
6296}
6297
6299 BuildFnTy &MatchInfo) {
6300 // TODO: Handle integer cases.
6301 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6302 // Condition may be fed by a truncated compare.
6303 Register Cond = MI.getOperand(1).getReg();
6304 Register MaybeTrunc;
6305 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6306 Cond = MaybeTrunc;
6307 Register Dst = MI.getOperand(0).getReg();
6308 Register TrueVal = MI.getOperand(2).getReg();
6309 Register FalseVal = MI.getOperand(3).getReg();
6310 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6311}
6312
6314 BuildFnTy &MatchInfo) {
6315 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6316 // (X + Y) == X --> Y == 0
6317 // (X + Y) != X --> Y != 0
6318 // (X - Y) == X --> Y == 0
6319 // (X - Y) != X --> Y != 0
6320 // (X ^ Y) == X --> Y == 0
6321 // (X ^ Y) != X --> Y != 0
6322 Register Dst = MI.getOperand(0).getReg();
6323 CmpInst::Predicate Pred;
6324 Register X, Y, OpLHS, OpRHS;
6325 bool MatchedSub = mi_match(
6326 Dst, MRI,
6327 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6328 if (MatchedSub && X != OpLHS)
6329 return false;
6330 if (!MatchedSub) {
6331 if (!mi_match(Dst, MRI,
6332 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6333 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6334 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6335 return false;
6336 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6337 }
6338 MatchInfo = [=](MachineIRBuilder &B) {
6339 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6340 B.buildICmp(Pred, Dst, Y, Zero);
6341 };
6342 return CmpInst::isEquality(Pred) && Y.isValid();
6343}
6344
6346 Register ShiftReg = MI.getOperand(2).getReg();
6347 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6348 auto IsShiftTooBig = [&](const Constant *C) {
6349 auto *CI = dyn_cast<ConstantInt>(C);
6350 return CI && CI->uge(ResTy.getScalarSizeInBits());
6351 };
6352 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6353}
6354
6356 unsigned LHSOpndIdx = 1;
6357 unsigned RHSOpndIdx = 2;
6358 switch (MI.getOpcode()) {
6359 case TargetOpcode::G_UADDO:
6360 case TargetOpcode::G_SADDO:
6361 case TargetOpcode::G_UMULO:
6362 case TargetOpcode::G_SMULO:
6363 LHSOpndIdx = 2;
6364 RHSOpndIdx = 3;
6365 break;
6366 default:
6367 break;
6368 }
6369 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6370 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6371 if (!getIConstantVRegVal(LHS, MRI)) {
6372 // Skip commuting if LHS is not a constant. But, LHS may be a
6373 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6374 // have a constant on the RHS.
6375 if (MRI.getVRegDef(LHS)->getOpcode() !=
6376 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6377 return false;
6378 }
6379 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6380 return MRI.getVRegDef(RHS)->getOpcode() !=
6381 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6383}
6384
6386 Register LHS = MI.getOperand(1).getReg();
6387 Register RHS = MI.getOperand(2).getReg();
6388 std::optional<FPValueAndVReg> ValAndVReg;
6389 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6390 return false;
6391 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6392}
6393
6396 unsigned LHSOpndIdx = 1;
6397 unsigned RHSOpndIdx = 2;
6398 switch (MI.getOpcode()) {
6399 case TargetOpcode::G_UADDO:
6400 case TargetOpcode::G_SADDO:
6401 case TargetOpcode::G_UMULO:
6402 case TargetOpcode::G_SMULO:
6403 LHSOpndIdx = 2;
6404 RHSOpndIdx = 3;
6405 break;
6406 default:
6407 break;
6408 }
6409 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6410 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6411 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6412 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6414}
6415
6416bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6417 LLT SrcTy = MRI.getType(Src);
6418 if (SrcTy.isFixedVector())
6419 return isConstantSplatVector(Src, 1, AllowUndefs);
6420 if (SrcTy.isScalar()) {
6421 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6422 return true;
6423 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6424 return IConstant && IConstant->Value == 1;
6425 }
6426 return false; // scalable vector
6427}
6428
6429bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6430 LLT SrcTy = MRI.getType(Src);
6431 if (SrcTy.isFixedVector())
6432 return isConstantSplatVector(Src, 0, AllowUndefs);
6433 if (SrcTy.isScalar()) {
6434 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6435 return true;
6436 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6437 return IConstant && IConstant->Value == 0;
6438 }
6439 return false; // scalable vector
6440}
6441
6442// Ignores COPYs during conformance checks.
6443// FIXME scalable vectors.
6444bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6445 bool AllowUndefs) {
6446 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6447 if (!BuildVector)
6448 return false;
6449 unsigned NumSources = BuildVector->getNumSources();
6450
6451 for (unsigned I = 0; I < NumSources; ++I) {
6452 GImplicitDef *ImplicitDef =
6453 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6454 if (ImplicitDef && AllowUndefs)
6455 continue;
6456 if (ImplicitDef && !AllowUndefs)
6457 return false;
6458 std::optional<ValueAndVReg> IConstant =
6460 if (IConstant && IConstant->Value == SplatValue)
6461 continue;
6462 return false;
6463 }
6464 return true;
6465}
6466
6467// Ignores COPYs during lookups.
6468// FIXME scalable vectors
6469std::optional<APInt>
6470CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6471 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6472 if (IConstant)
6473 return IConstant->Value;
6474
6475 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6476 if (!BuildVector)
6477 return std::nullopt;
6478 unsigned NumSources = BuildVector->getNumSources();
6479
6480 std::optional<APInt> Value = std::nullopt;
6481 for (unsigned I = 0; I < NumSources; ++I) {
6482 std::optional<ValueAndVReg> IConstant =
6484 if (!IConstant)
6485 return std::nullopt;
6486 if (!Value)
6487 Value = IConstant->Value;
6488 else if (*Value != IConstant->Value)
6489 return std::nullopt;
6490 }
6491 return Value;
6492}
6493
6494// FIXME G_SPLAT_VECTOR
6495bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6496 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6497 if (IConstant)
6498 return true;
6499
6500 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6501 if (!BuildVector)
6502 return false;
6503
6504 unsigned NumSources = BuildVector->getNumSources();
6505 for (unsigned I = 0; I < NumSources; ++I) {
6506 std::optional<ValueAndVReg> IConstant =
6508 if (!IConstant)
6509 return false;
6510 }
6511 return true;
6512}
6513
6514// TODO: use knownbits to determine zeros
6515bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6516 BuildFnTy &MatchInfo) {
6517 uint32_t Flags = Select->getFlags();
6518 Register Dest = Select->getReg(0);
6519 Register Cond = Select->getCondReg();
6520 Register True = Select->getTrueReg();
6521 Register False = Select->getFalseReg();
6522 LLT CondTy = MRI.getType(Select->getCondReg());
6523 LLT TrueTy = MRI.getType(Select->getTrueReg());
6524
6525 // We only do this combine for scalar boolean conditions.
6526 if (CondTy != LLT::scalar(1))
6527 return false;
6528
6529 if (TrueTy.isPointer())
6530 return false;
6531
6532 // Both are scalars.
6533 std::optional<ValueAndVReg> TrueOpt =
6535 std::optional<ValueAndVReg> FalseOpt =
6537
6538 if (!TrueOpt || !FalseOpt)
6539 return false;
6540
6541 APInt TrueValue = TrueOpt->Value;
6542 APInt FalseValue = FalseOpt->Value;
6543
6544 // select Cond, 1, 0 --> zext (Cond)
6545 if (TrueValue.isOne() && FalseValue.isZero()) {
6546 MatchInfo = [=](MachineIRBuilder &B) {
6547 B.setInstrAndDebugLoc(*Select);
6548 B.buildZExtOrTrunc(Dest, Cond);
6549 };
6550 return true;
6551 }
6552
6553 // select Cond, -1, 0 --> sext (Cond)
6554 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6555 MatchInfo = [=](MachineIRBuilder &B) {
6556 B.setInstrAndDebugLoc(*Select);
6557 B.buildSExtOrTrunc(Dest, Cond);
6558 };
6559 return true;
6560 }
6561
6562 // select Cond, 0, 1 --> zext (!Cond)
6563 if (TrueValue.isZero() && FalseValue.isOne()) {
6564 MatchInfo = [=](MachineIRBuilder &B) {
6565 B.setInstrAndDebugLoc(*Select);
6567 B.buildNot(Inner, Cond);
6568 B.buildZExtOrTrunc(Dest, Inner);
6569 };
6570 return true;
6571 }
6572
6573 // select Cond, 0, -1 --> sext (!Cond)
6574 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6575 MatchInfo = [=](MachineIRBuilder &B) {
6576 B.setInstrAndDebugLoc(*Select);
6578 B.buildNot(Inner, Cond);
6579 B.buildSExtOrTrunc(Dest, Inner);
6580 };
6581 return true;
6582 }
6583
6584 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6585 if (TrueValue - 1 == FalseValue) {
6586 MatchInfo = [=](MachineIRBuilder &B) {
6587 B.setInstrAndDebugLoc(*Select);
6589 B.buildZExtOrTrunc(Inner, Cond);
6590 B.buildAdd(Dest, Inner, False);
6591 };
6592 return true;
6593 }
6594
6595 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6596 if (TrueValue + 1 == FalseValue) {
6597 MatchInfo = [=](MachineIRBuilder &B) {
6598 B.setInstrAndDebugLoc(*Select);
6600 B.buildSExtOrTrunc(Inner, Cond);
6601 B.buildAdd(Dest, Inner, False);
6602 };
6603 return true;
6604 }
6605
6606 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6607 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6608 MatchInfo = [=](MachineIRBuilder &B) {
6609 B.setInstrAndDebugLoc(*Select);
6611 B.buildZExtOrTrunc(Inner, Cond);
6612 // The shift amount must be scalar.
6613 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6614 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6615 B.buildShl(Dest, Inner, ShAmtC, Flags);
6616 };
6617 return true;
6618 }
6619 // select Cond, -1, C --> or (sext Cond), C
6620 if (TrueValue.isAllOnes()) {
6621 MatchInfo = [=](MachineIRBuilder &B) {
6622 B.setInstrAndDebugLoc(*Select);
6624 B.buildSExtOrTrunc(Inner, Cond);
6625 B.buildOr(Dest, Inner, False, Flags);
6626 };
6627 return true;
6628 }
6629
6630 // select Cond, C, -1 --> or (sext (not Cond)), C
6631 if (FalseValue.isAllOnes()) {
6632 MatchInfo = [=](MachineIRBuilder &B) {
6633 B.setInstrAndDebugLoc(*Select);
6635 B.buildNot(Not, Cond);
6637 B.buildSExtOrTrunc(Inner, Not);
6638 B.buildOr(Dest, Inner, True, Flags);
6639 };
6640 return true;
6641 }
6642
6643 return false;
6644}
6645
6646// TODO: use knownbits to determine zeros
6647bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6648 BuildFnTy &MatchInfo) {
6649 uint32_t Flags = Select->getFlags();
6650 Register DstReg = Select->getReg(0);
6651 Register Cond = Select->getCondReg();
6652 Register True = Select->getTrueReg();
6653 Register False = Select->getFalseReg();
6654 LLT CondTy = MRI.getType(Select->getCondReg());
6655 LLT TrueTy = MRI.getType(Select->getTrueReg());
6656
6657 // Boolean or fixed vector of booleans.
6658 if (CondTy.isScalableVector() ||
6659 (CondTy.isFixedVector() &&
6660 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6661 CondTy.getScalarSizeInBits() != 1)
6662 return false;
6663
6664 if (CondTy != TrueTy)
6665 return false;
6666
6667 // select Cond, Cond, F --> or Cond, F
6668 // select Cond, 1, F --> or Cond, F
6669 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6670 MatchInfo = [=](MachineIRBuilder &B) {
6671 B.setInstrAndDebugLoc(*Select);
6673 B.buildZExtOrTrunc(Ext, Cond);
6674 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6675 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6676 };
6677 return true;
6678 }
6679
6680 // select Cond, T, Cond --> and Cond, T
6681 // select Cond, T, 0 --> and Cond, T
6682 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6683 MatchInfo = [=](MachineIRBuilder &B) {
6684 B.setInstrAndDebugLoc(*Select);
6686 B.buildZExtOrTrunc(Ext, Cond);
6687 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6688 B.buildAnd(DstReg, Ext, FreezeTrue);
6689 };
6690 return true;
6691 }
6692
6693 // select Cond, T, 1 --> or (not Cond), T
6694 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6695 MatchInfo = [=](MachineIRBuilder &B) {
6696 B.setInstrAndDebugLoc(*Select);
6697 // First the not.
6699 B.buildNot(Inner, Cond);
6700 // Then an ext to match the destination register.
6702 B.buildZExtOrTrunc(Ext, Inner);
6703 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6704 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6705 };
6706 return true;
6707 }
6708
6709 // select Cond, 0, F --> and (not Cond), F
6710 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6711 MatchInfo = [=](MachineIRBuilder &B) {
6712 B.setInstrAndDebugLoc(*Select);
6713 // First the not.
6715 B.buildNot(Inner, Cond);
6716 // Then an ext to match the destination register.
6718 B.buildZExtOrTrunc(Ext, Inner);
6719 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6720 B.buildAnd(DstReg, Ext, FreezeFalse);
6721 };
6722 return true;
6723 }
6724
6725 return false;
6726}
6727
6728bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
6729 BuildFnTy &MatchInfo) {
6730 Register DstReg = Select->getReg(0);
6731 Register Cond = Select->getCondReg();
6732 Register True = Select->getTrueReg();
6733 Register False = Select->getFalseReg();
6734 LLT DstTy = MRI.getType(DstReg);
6735
6736 if (DstTy.isPointer())
6737 return false;
6738
6739 // We need an G_ICMP on the condition register.
6740 GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
6741 if (!Cmp)
6742 return false;
6743
6744 // We want to fold the icmp and replace the select.
6745 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6746 return false;
6747
6748 CmpInst::Predicate Pred = Cmp->getCond();
6749 // We need a larger or smaller predicate for
6750 // canonicalization.
6751 if (CmpInst::isEquality(Pred))
6752 return false;
6753
6754 Register CmpLHS = Cmp->getLHSReg();
6755 Register CmpRHS = Cmp->getRHSReg();
6756
6757 // We can swap CmpLHS and CmpRHS for higher hitrate.
6758 if (True == CmpRHS && False == CmpLHS) {
6759 std::swap(CmpLHS, CmpRHS);
6760 Pred = CmpInst::getSwappedPredicate(Pred);
6761 }
6762
6763 // (icmp X, Y) ? X : Y -> integer minmax.
6764 // see matchSelectPattern in ValueTracking.
6765 // Legality between G_SELECT and integer minmax can differ.
6766 if (True == CmpLHS && False == CmpRHS) {
6767 switch (Pred) {
6768 case ICmpInst::ICMP_UGT:
6769 case ICmpInst::ICMP_UGE: {
6770 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6771 return false;
6772 MatchInfo = [=](MachineIRBuilder &B) {
6773 B.buildUMax(DstReg, True, False);
6774 };
6775 return true;
6776 }
6777 case ICmpInst::ICMP_SGT:
6778 case ICmpInst::ICMP_SGE: {
6779 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6780 return false;
6781 MatchInfo = [=](MachineIRBuilder &B) {
6782 B.buildSMax(DstReg, True, False);
6783 };
6784 return true;
6785 }
6786 case ICmpInst::ICMP_ULT:
6787 case ICmpInst::ICMP_ULE: {
6788 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6789 return false;
6790 MatchInfo = [=](MachineIRBuilder &B) {
6791 B.buildUMin(DstReg, True, False);
6792 };
6793 return true;
6794 }
6795 case ICmpInst::ICMP_SLT:
6796 case ICmpInst::ICMP_SLE: {
6797 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6798 return false;
6799 MatchInfo = [=](MachineIRBuilder &B) {
6800 B.buildSMin(DstReg, True, False);
6801 };
6802 return true;
6803 }
6804 default:
6805 return false;
6806 }
6807 }
6808
6809 return false;
6810}
6811
6813 GSelect *Select = cast<GSelect>(&MI);
6814
6815 if (tryFoldSelectOfConstants(Select, MatchInfo))
6816 return true;
6817
6818 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6819 return true;
6820
6821 if (tryFoldSelectToIntMinMax(Select, MatchInfo))
6822 return true;
6823
6824 return false;
6825}
6826
6827/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6828/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6829/// into a single comparison using range-based reasoning.
6830/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6831bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6832 BuildFnTy &MatchInfo) {
6833 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6834 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6835 Register DstReg = Logic->getReg(0);
6836 Register LHS = Logic->getLHSReg();
6837 Register RHS = Logic->getRHSReg();
6838 unsigned Flags = Logic->getFlags();
6839
6840 // We need an G_ICMP on the LHS register.
6841 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6842 if (!Cmp1)
6843 return false;
6844
6845 // We need an G_ICMP on the RHS register.
6846 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6847 if (!Cmp2)
6848 return false;
6849
6850 // We want to fold the icmps.
6851 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6852 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6853 return false;
6854
6855 APInt C1;
6856 APInt C2;
6857 std::optional<ValueAndVReg> MaybeC1 =
6859 if (!MaybeC1)
6860 return false;
6861 C1 = MaybeC1->Value;
6862
6863 std::optional<ValueAndVReg> MaybeC2 =
6865 if (!MaybeC2)
6866 return false;
6867 C2 = MaybeC2->Value;
6868
6869 Register R1 = Cmp1->getLHSReg();
6870 Register R2 = Cmp2->getLHSReg();
6871 CmpInst::Predicate Pred1 = Cmp1->getCond();
6872 CmpInst::Predicate Pred2 = Cmp2->getCond();
6873 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6874 LLT CmpOperandTy = MRI.getType(R1);
6875
6876 if (CmpOperandTy.isPointer())
6877 return false;
6878
6879 // We build ands, adds, and constants of type CmpOperandTy.
6880 // They must be legal to build.
6881 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6882 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6883 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6884 return false;
6885
6886 // Look through add of a constant offset on R1, R2, or both operands. This
6887 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6888 std::optional<APInt> Offset1;
6889 std::optional<APInt> Offset2;
6890 if (R1 != R2) {
6891 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6892 std::optional<ValueAndVReg> MaybeOffset1 =
6894 if (MaybeOffset1) {
6895 R1 = Add->getLHSReg();
6896 Offset1 = MaybeOffset1->Value;
6897 }
6898 }
6899 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6900 std::optional<ValueAndVReg> MaybeOffset2 =
6902 if (MaybeOffset2) {
6903 R2 = Add->getLHSReg();
6904 Offset2 = MaybeOffset2->Value;
6905 }
6906 }
6907 }
6908
6909 if (R1 != R2)
6910 return false;
6911
6912 // We calculate the icmp ranges including maybe offsets.
6914 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
6915 if (Offset1)
6916 CR1 = CR1.subtract(*Offset1);
6917
6919 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
6920 if (Offset2)
6921 CR2 = CR2.subtract(*Offset2);
6922
6923 bool CreateMask = false;
6924 APInt LowerDiff;
6925 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
6926 if (!CR) {
6927 // We need non-wrapping ranges.
6928 if (CR1.isWrappedSet() || CR2.isWrappedSet())
6929 return false;
6930
6931 // Check whether we have equal-size ranges that only differ by one bit.
6932 // In that case we can apply a mask to map one range onto the other.
6933 LowerDiff = CR1.getLower() ^ CR2.getLower();
6934 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
6935 APInt CR1Size = CR1.getUpper() - CR1.getLower();
6936 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
6937 CR1Size != CR2.getUpper() - CR2.getLower())
6938 return false;
6939
6940 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
6941 CreateMask = true;
6942 }
6943
6944 if (IsAnd)
6945 CR = CR->inverse();
6946
6947 CmpInst::Predicate NewPred;
6948 APInt NewC, Offset;
6949 CR->getEquivalentICmp(NewPred, NewC, Offset);
6950
6951 // We take the result type of one of the original icmps, CmpTy, for
6952 // the to be build icmp. The operand type, CmpOperandTy, is used for
6953 // the other instructions and constants to be build. The types of
6954 // the parameters and output are the same for add and and. CmpTy
6955 // and the type of DstReg might differ. That is why we zext or trunc
6956 // the icmp into the destination register.
6957
6958 MatchInfo = [=](MachineIRBuilder &B) {
6959 if (CreateMask && Offset != 0) {
6960 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6961 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6962 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6963 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
6964 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6965 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6966 B.buildZExtOrTrunc(DstReg, ICmp);
6967 } else if (CreateMask && Offset == 0) {
6968 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6969 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6970 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6971 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
6972 B.buildZExtOrTrunc(DstReg, ICmp);
6973 } else if (!CreateMask && Offset != 0) {
6974 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6975 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
6976 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6977 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6978 B.buildZExtOrTrunc(DstReg, ICmp);
6979 } else if (!CreateMask && Offset == 0) {
6980 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6981 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
6982 B.buildZExtOrTrunc(DstReg, ICmp);
6983 } else {
6984 llvm_unreachable("unexpected configuration of CreateMask and Offset");
6985 }
6986 };
6987 return true;
6988}
6989
6990bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
6991 BuildFnTy &MatchInfo) {
6992 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
6993 Register DestReg = Logic->getReg(0);
6994 Register LHS = Logic->getLHSReg();
6995 Register RHS = Logic->getRHSReg();
6996 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6997
6998 // We need a compare on the LHS register.
6999 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7000 if (!Cmp1)
7001 return false;
7002
7003 // We need a compare on the RHS register.
7004 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7005 if (!Cmp2)
7006 return false;
7007
7008 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7009 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7010
7011 // We build one fcmp, want to fold the fcmps, replace the logic op,
7012 // and the fcmps must have the same shape.
7014 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7015 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7016 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7017 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7018 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7019 return false;
7020
7021 CmpInst::Predicate PredL = Cmp1->getCond();
7022 CmpInst::Predicate PredR = Cmp2->getCond();
7023 Register LHS0 = Cmp1->getLHSReg();
7024 Register LHS1 = Cmp1->getRHSReg();
7025 Register RHS0 = Cmp2->getLHSReg();
7026 Register RHS1 = Cmp2->getRHSReg();
7027
7028 if (LHS0 == RHS1 && LHS1 == RHS0) {
7029 // Swap RHS operands to match LHS.
7030 PredR = CmpInst::getSwappedPredicate(PredR);
7031 std::swap(RHS0, RHS1);
7032 }
7033
7034 if (LHS0 == RHS0 && LHS1 == RHS1) {
7035 // We determine the new predicate.
7036 unsigned CmpCodeL = getFCmpCode(PredL);
7037 unsigned CmpCodeR = getFCmpCode(PredR);
7038 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7039 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7040 MatchInfo = [=](MachineIRBuilder &B) {
7041 // The fcmp predicates fill the lower part of the enum.
7042 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7043 if (Pred == FCmpInst::FCMP_FALSE &&
7045 auto False = B.buildConstant(CmpTy, 0);
7046 B.buildZExtOrTrunc(DestReg, False);
7047 } else if (Pred == FCmpInst::FCMP_TRUE &&
7049 auto True =
7050 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7051 CmpTy.isVector() /*isVector*/,
7052 true /*isFP*/));
7053 B.buildZExtOrTrunc(DestReg, True);
7054 } else { // We take the predicate without predicate optimizations.
7055 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7056 B.buildZExtOrTrunc(DestReg, Cmp);
7057 }
7058 };
7059 return true;
7060 }
7061
7062 return false;
7063}
7064
7066 GAnd *And = cast<GAnd>(&MI);
7067
7068 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7069 return true;
7070
7071 if (tryFoldLogicOfFCmps(And, MatchInfo))
7072 return true;
7073
7074 return false;
7075}
7076
7078 GOr *Or = cast<GOr>(&MI);
7079
7080 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7081 return true;
7082
7083 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7084 return true;
7085
7086 return false;
7087}
7088
7090 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7091
7092 // Addo has no flags
7093 Register Dst = Add->getReg(0);
7094 Register Carry = Add->getReg(1);
7095 Register LHS = Add->getLHSReg();
7096 Register RHS = Add->getRHSReg();
7097 bool IsSigned = Add->isSigned();
7098 LLT DstTy = MRI.getType(Dst);
7099 LLT CarryTy = MRI.getType(Carry);
7100
7101 // Fold addo, if the carry is dead -> add, undef.
7102 if (MRI.use_nodbg_empty(Carry) &&
7103 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7104 MatchInfo = [=](MachineIRBuilder &B) {
7105 B.buildAdd(Dst, LHS, RHS);
7106 B.buildUndef(Carry);
7107 };
7108 return true;
7109 }
7110
7111 // Canonicalize constant to RHS.
7112 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7113 if (IsSigned) {
7114 MatchInfo = [=](MachineIRBuilder &B) {
7115 B.buildSAddo(Dst, Carry, RHS, LHS);
7116 };
7117 return true;
7118 }
7119 // !IsSigned
7120 MatchInfo = [=](MachineIRBuilder &B) {
7121 B.buildUAddo(Dst, Carry, RHS, LHS);
7122 };
7123 return true;
7124 }
7125
7126 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7127 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7128
7129 // Fold addo(c1, c2) -> c3, carry.
7130 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7132 bool Overflow;
7133 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7134 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7135 MatchInfo = [=](MachineIRBuilder &B) {
7136 B.buildConstant(Dst, Result);
7137 B.buildConstant(Carry, Overflow);
7138 };
7139 return true;
7140 }
7141
7142 // Fold (addo x, 0) -> x, no carry
7143 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7144 MatchInfo = [=](MachineIRBuilder &B) {
7145 B.buildCopy(Dst, LHS);
7146 B.buildConstant(Carry, 0);
7147 };
7148 return true;
7149 }
7150
7151 // Given 2 constant operands whose sum does not overflow:
7152 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7153 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7154 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7155 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7156 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7157 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7158 std::optional<APInt> MaybeAddRHS =
7159 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7160 if (MaybeAddRHS) {
7161 bool Overflow;
7162 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7163 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7164 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7165 if (IsSigned) {
7166 MatchInfo = [=](MachineIRBuilder &B) {
7167 auto ConstRHS = B.buildConstant(DstTy, NewC);
7168 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7169 };
7170 return true;
7171 }
7172 // !IsSigned
7173 MatchInfo = [=](MachineIRBuilder &B) {
7174 auto ConstRHS = B.buildConstant(DstTy, NewC);
7175 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7176 };
7177 return true;
7178 }
7179 }
7180 };
7181
7182 // We try to combine addo to non-overflowing add.
7183 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7185 return false;
7186
7187 // We try to combine uaddo to non-overflowing add.
7188 if (!IsSigned) {
7189 ConstantRange CRLHS =
7190 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7191 ConstantRange CRRHS =
7192 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7193
7194 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7196 return false;
7198 MatchInfo = [=](MachineIRBuilder &B) {
7199 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7200 B.buildConstant(Carry, 0);
7201 };
7202 return true;
7203 }
7206 MatchInfo = [=](MachineIRBuilder &B) {
7207 B.buildAdd(Dst, LHS, RHS);
7208 B.buildConstant(Carry, 1);
7209 };
7210 return true;
7211 }
7212 }
7213 return false;
7214 }
7215
7216 // We try to combine saddo to non-overflowing add.
7217
7218 // If LHS and RHS each have at least two sign bits, then there is no signed
7219 // overflow.
7220 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7221 MatchInfo = [=](MachineIRBuilder &B) {
7222 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7223 B.buildConstant(Carry, 0);
7224 };
7225 return true;
7226 }
7227
7228 ConstantRange CRLHS =
7229 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7230 ConstantRange CRRHS =
7231 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7232
7233 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7235 return false;
7237 MatchInfo = [=](MachineIRBuilder &B) {
7238 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7239 B.buildConstant(Carry, 0);
7240 };
7241 return true;
7242 }
7245 MatchInfo = [=](MachineIRBuilder &B) {
7246 B.buildAdd(Dst, LHS, RHS);
7247 B.buildConstant(Carry, 1);
7248 };
7249 return true;
7250 }
7251 }
7252
7253 return false;
7254}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1096
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1725
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1565
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:1255
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:1010
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:995
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValue() const
Definition: Constants.h:312
const APFloat & getValueAPF() const
Definition: Constants.h:311
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:391
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:561
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:386
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1412
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:438
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:293
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1372
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1525
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:713
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1495
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1540
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1572
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:644
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1475
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:199
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1405
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:932
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:427
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1597
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:446
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:472
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1390
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:494
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:502
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:542
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:508
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:548
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:524
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:528
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:552
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:532
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:518
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...