LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
228 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
229 "Invalid instruction");
230 bool IsUndef = true;
231 MachineInstr *Undef = nullptr;
232
233 // Walk over all the operands of concat vectors and check if they are
234 // build_vector themselves or undef.
235 // Then collect their operands in Ops.
236 for (const MachineOperand &MO : MI.uses()) {
237 Register Reg = MO.getReg();
238 MachineInstr *Def = MRI.getVRegDef(Reg);
239 assert(Def && "Operand not defined");
240 if (!MRI.hasOneNonDBGUse(Reg))
241 return false;
242 switch (Def->getOpcode()) {
243 case TargetOpcode::G_BUILD_VECTOR:
244 IsUndef = false;
245 // Remember the operands of the build_vector to fold
246 // them into the yet-to-build flattened concat vectors.
247 for (const MachineOperand &BuildVecMO : Def->uses())
248 Ops.push_back(BuildVecMO.getReg());
249 break;
250 case TargetOpcode::G_IMPLICIT_DEF: {
251 LLT OpType = MRI.getType(Reg);
252 // Keep one undef value for all the undef operands.
253 if (!Undef) {
254 Builder.setInsertPt(*MI.getParent(), MI);
255 Undef = Builder.buildUndef(OpType.getScalarType());
256 }
257 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
258 OpType.getScalarType() &&
259 "All undefs should have the same type");
260 // Break the undef vector in as many scalar elements as needed
261 // for the flattening.
262 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
263 EltIdx != EltEnd; ++EltIdx)
264 Ops.push_back(Undef->getOperand(0).getReg());
265 break;
266 }
267 default:
268 return false;
269 }
270 }
271
272 // Check if the combine is illegal
273 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
275 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
276 return false;
277 }
278
279 if (IsUndef)
280 Ops.clear();
281
282 return true;
283}
286 // We determined that the concat_vectors can be flatten.
287 // Generate the flattened build_vector.
288 Register DstReg = MI.getOperand(0).getReg();
289 Builder.setInsertPt(*MI.getParent(), MI);
290 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
291
292 // Note: IsUndef is sort of redundant. We could have determine it by
293 // checking that at all Ops are undef. Alternatively, we could have
294 // generate a build_vector of undefs and rely on another combine to
295 // clean that up. For now, given we already gather this information
296 // in matchCombineConcatVectors, just save compile time and issue the
297 // right thing.
298 if (Ops.empty())
299 Builder.buildUndef(NewDstReg);
300 else
301 Builder.buildBuildVector(NewDstReg, Ops);
302 MI.eraseFromParent();
303 replaceRegWith(MRI, DstReg, NewDstReg);
304}
305
308 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
309 auto ConcatMI1 =
310 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
311 auto ConcatMI2 =
312 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
313 if (!ConcatMI1 || !ConcatMI2)
314 return false;
315
316 // Check that the sources of the Concat instructions have the same type
317 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
318 MRI.getType(ConcatMI2->getSourceReg(0)))
319 return false;
320
321 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
322 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
323 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
324 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
325 // Check if the index takes a whole source register from G_CONCAT_VECTORS
326 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
327 if (Mask[i] == -1) {
328 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
329 if (i + j >= Mask.size())
330 return false;
331 if (Mask[i + j] != -1)
332 return false;
333 }
335 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
336 return false;
337 Ops.push_back(0);
338 } else if (Mask[i] % ConcatSrcNumElt == 0) {
339 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
340 if (i + j >= Mask.size())
341 return false;
342 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
343 return false;
344 }
345 // Retrieve the source register from its respective G_CONCAT_VECTORS
346 // instruction
347 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
348 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
349 } else {
350 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
351 ConcatMI1->getNumSources()));
352 }
353 } else {
354 return false;
355 }
356 }
357
359 {TargetOpcode::G_CONCAT_VECTORS,
360 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
361 return false;
362
363 return !Ops.empty();
364}
365
368 LLT SrcTy = MRI.getType(Ops[0]);
369 Register UndefReg = 0;
370
371 for (unsigned i = 0; i < Ops.size(); i++) {
372 if (Ops[i] == 0) {
373 if (UndefReg == 0)
374 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
375 Ops[i] = UndefReg;
376 }
377 }
378
379 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
380 MI.eraseFromParent();
381}
382
385 if (matchCombineShuffleVector(MI, Ops)) {
387 return true;
388 }
389 return false;
390}
391
394 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
395 "Invalid instruction kind");
396 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
397 Register Src1 = MI.getOperand(1).getReg();
398 LLT SrcType = MRI.getType(Src1);
399 // As bizarre as it may look, shuffle vector can actually produce
400 // scalar! This is because at the IR level a <1 x ty> shuffle
401 // vector is perfectly valid.
402 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
403 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
404
405 // If the resulting vector is smaller than the size of the source
406 // vectors being concatenated, we won't be able to replace the
407 // shuffle vector into a concat_vectors.
408 //
409 // Note: We may still be able to produce a concat_vectors fed by
410 // extract_vector_elt and so on. It is less clear that would
411 // be better though, so don't bother for now.
412 //
413 // If the destination is a scalar, the size of the sources doesn't
414 // matter. we will lower the shuffle to a plain copy. This will
415 // work only if the source and destination have the same size. But
416 // that's covered by the next condition.
417 //
418 // TODO: If the size between the source and destination don't match
419 // we could still emit an extract vector element in that case.
420 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
421 return false;
422
423 // Check that the shuffle mask can be broken evenly between the
424 // different sources.
425 if (DstNumElts % SrcNumElts != 0)
426 return false;
427
428 // Mask length is a multiple of the source vector length.
429 // Check if the shuffle is some kind of concatenation of the input
430 // vectors.
431 unsigned NumConcat = DstNumElts / SrcNumElts;
432 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
433 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
434 for (unsigned i = 0; i != DstNumElts; ++i) {
435 int Idx = Mask[i];
436 // Undef value.
437 if (Idx < 0)
438 continue;
439 // Ensure the indices in each SrcType sized piece are sequential and that
440 // the same source is used for the whole piece.
441 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
442 (ConcatSrcs[i / SrcNumElts] >= 0 &&
443 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
444 return false;
445 // Remember which source this index came from.
446 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
447 }
448
449 // The shuffle is concatenating multiple vectors together.
450 // Collect the different operands for that.
451 Register UndefReg;
452 Register Src2 = MI.getOperand(2).getReg();
453 for (auto Src : ConcatSrcs) {
454 if (Src < 0) {
455 if (!UndefReg) {
456 Builder.setInsertPt(*MI.getParent(), MI);
457 UndefReg = Builder.buildUndef(SrcType).getReg(0);
458 }
459 Ops.push_back(UndefReg);
460 } else if (Src == 0)
461 Ops.push_back(Src1);
462 else
463 Ops.push_back(Src2);
464 }
465 return true;
466}
467
469 const ArrayRef<Register> Ops) {
470 Register DstReg = MI.getOperand(0).getReg();
471 Builder.setInsertPt(*MI.getParent(), MI);
472 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
473
474 if (Ops.size() == 1)
475 Builder.buildCopy(NewDstReg, Ops[0]);
476 else
477 Builder.buildMergeLikeInstr(NewDstReg, Ops);
478
479 MI.eraseFromParent();
480 replaceRegWith(MRI, DstReg, NewDstReg);
481}
482
484 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
485 "Invalid instruction kind");
486
487 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
488 return Mask.size() == 1;
489}
490
492 Register DstReg = MI.getOperand(0).getReg();
493 Builder.setInsertPt(*MI.getParent(), MI);
494
495 int I = MI.getOperand(3).getShuffleMask()[0];
496 Register Src1 = MI.getOperand(1).getReg();
497 LLT Src1Ty = MRI.getType(Src1);
498 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
499 Register SrcReg;
500 if (I >= Src1NumElts) {
501 SrcReg = MI.getOperand(2).getReg();
502 I -= Src1NumElts;
503 } else if (I >= 0)
504 SrcReg = Src1;
505
506 if (I < 0)
507 Builder.buildUndef(DstReg);
508 else if (!MRI.getType(SrcReg).isVector())
509 Builder.buildCopy(DstReg, SrcReg);
510 else
512
513 MI.eraseFromParent();
514}
515
516namespace {
517
518/// Select a preference between two uses. CurrentUse is the current preference
519/// while *ForCandidate is attributes of the candidate under consideration.
520PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
521 PreferredTuple &CurrentUse,
522 const LLT TyForCandidate,
523 unsigned OpcodeForCandidate,
524 MachineInstr *MIForCandidate) {
525 if (!CurrentUse.Ty.isValid()) {
526 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
527 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
528 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
529 return CurrentUse;
530 }
531
532 // We permit the extend to hoist through basic blocks but this is only
533 // sensible if the target has extending loads. If you end up lowering back
534 // into a load and extend during the legalizer then the end result is
535 // hoisting the extend up to the load.
536
537 // Prefer defined extensions to undefined extensions as these are more
538 // likely to reduce the number of instructions.
539 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
540 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
541 return CurrentUse;
542 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
543 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
544 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
545
546 // Prefer sign extensions to zero extensions as sign-extensions tend to be
547 // more expensive. Don't do this if the load is already a zero-extend load
548 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
549 // later.
550 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
551 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
552 OpcodeForCandidate == TargetOpcode::G_ZEXT)
553 return CurrentUse;
554 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
555 OpcodeForCandidate == TargetOpcode::G_SEXT)
556 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
557 }
558
559 // This is potentially target specific. We've chosen the largest type
560 // because G_TRUNC is usually free. One potential catch with this is that
561 // some targets have a reduced number of larger registers than smaller
562 // registers and this choice potentially increases the live-range for the
563 // larger value.
564 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
565 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
566 }
567 return CurrentUse;
568}
569
570/// Find a suitable place to insert some instructions and insert them. This
571/// function accounts for special cases like inserting before a PHI node.
572/// The current strategy for inserting before PHI's is to duplicate the
573/// instructions for each predecessor. However, while that's ok for G_TRUNC
574/// on most targets since it generally requires no code, other targets/cases may
575/// want to try harder to find a dominating block.
576static void InsertInsnsWithoutSideEffectsBeforeUse(
579 MachineOperand &UseMO)>
580 Inserter) {
581 MachineInstr &UseMI = *UseMO.getParent();
582
583 MachineBasicBlock *InsertBB = UseMI.getParent();
584
585 // If the use is a PHI then we want the predecessor block instead.
586 if (UseMI.isPHI()) {
587 MachineOperand *PredBB = std::next(&UseMO);
588 InsertBB = PredBB->getMBB();
589 }
590
591 // If the block is the same block as the def then we want to insert just after
592 // the def instead of at the start of the block.
593 if (InsertBB == DefMI.getParent()) {
595 Inserter(InsertBB, std::next(InsertPt), UseMO);
596 return;
597 }
598
599 // Otherwise we want the start of the BB
600 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
601}
602} // end anonymous namespace
603
605 PreferredTuple Preferred;
606 if (matchCombineExtendingLoads(MI, Preferred)) {
607 applyCombineExtendingLoads(MI, Preferred);
608 return true;
609 }
610 return false;
611}
612
613static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
614 unsigned CandidateLoadOpc;
615 switch (ExtOpc) {
616 case TargetOpcode::G_ANYEXT:
617 CandidateLoadOpc = TargetOpcode::G_LOAD;
618 break;
619 case TargetOpcode::G_SEXT:
620 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
621 break;
622 case TargetOpcode::G_ZEXT:
623 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
624 break;
625 default:
626 llvm_unreachable("Unexpected extend opc");
627 }
628 return CandidateLoadOpc;
629}
630
632 PreferredTuple &Preferred) {
633 // We match the loads and follow the uses to the extend instead of matching
634 // the extends and following the def to the load. This is because the load
635 // must remain in the same position for correctness (unless we also add code
636 // to find a safe place to sink it) whereas the extend is freely movable.
637 // It also prevents us from duplicating the load for the volatile case or just
638 // for performance.
639 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
640 if (!LoadMI)
641 return false;
642
643 Register LoadReg = LoadMI->getDstReg();
644
645 LLT LoadValueTy = MRI.getType(LoadReg);
646 if (!LoadValueTy.isScalar())
647 return false;
648
649 // Most architectures are going to legalize <s8 loads into at least a 1 byte
650 // load, and the MMOs can only describe memory accesses in multiples of bytes.
651 // If we try to perform extload combining on those, we can end up with
652 // %a(s8) = extload %ptr (load 1 byte from %ptr)
653 // ... which is an illegal extload instruction.
654 if (LoadValueTy.getSizeInBits() < 8)
655 return false;
656
657 // For non power-of-2 types, they will very likely be legalized into multiple
658 // loads. Don't bother trying to match them into extending loads.
659 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
660 return false;
661
662 // Find the preferred type aside from the any-extends (unless it's the only
663 // one) and non-extending ops. We'll emit an extending load to that type and
664 // and emit a variant of (extend (trunc X)) for the others according to the
665 // relative type sizes. At the same time, pick an extend to use based on the
666 // extend involved in the chosen type.
667 unsigned PreferredOpcode =
668 isa<GLoad>(&MI)
669 ? TargetOpcode::G_ANYEXT
670 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
671 Preferred = {LLT(), PreferredOpcode, nullptr};
672 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
673 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
674 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
675 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
676 const auto &MMO = LoadMI->getMMO();
677 // Don't do anything for atomics.
678 if (MMO.isAtomic())
679 continue;
680 // Check for legality.
681 if (!isPreLegalize()) {
682 LegalityQuery::MemDesc MMDesc(MMO);
683 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
684 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
685 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
686 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
687 .Action != LegalizeActions::Legal)
688 continue;
689 }
690 Preferred = ChoosePreferredUse(MI, Preferred,
691 MRI.getType(UseMI.getOperand(0).getReg()),
692 UseMI.getOpcode(), &UseMI);
693 }
694 }
695
696 // There were no extends
697 if (!Preferred.MI)
698 return false;
699 // It should be impossible to chose an extend without selecting a different
700 // type since by definition the result of an extend is larger.
701 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
702
703 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
704 return true;
705}
706
708 PreferredTuple &Preferred) {
709 // Rewrite the load to the chosen extending load.
710 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
711
712 // Inserter to insert a truncate back to the original type at a given point
713 // with some basic CSE to limit truncate duplication to one per BB.
715 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
716 MachineBasicBlock::iterator InsertBefore,
717 MachineOperand &UseMO) {
718 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
719 if (PreviouslyEmitted) {
721 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
723 return;
724 }
725
726 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
727 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
728 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
729 EmittedInsns[InsertIntoBB] = NewMI;
730 replaceRegOpWith(MRI, UseMO, NewDstReg);
731 };
732
734 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
735 MI.setDesc(Builder.getTII().get(LoadOpc));
736
737 // Rewrite all the uses to fix up the types.
738 auto &LoadValue = MI.getOperand(0);
740 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
741 Uses.push_back(&UseMO);
742
743 for (auto *UseMO : Uses) {
744 MachineInstr *UseMI = UseMO->getParent();
745
746 // If the extend is compatible with the preferred extend then we should fix
747 // up the type and extend so that it uses the preferred use.
748 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
749 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
750 Register UseDstReg = UseMI->getOperand(0).getReg();
751 MachineOperand &UseSrcMO = UseMI->getOperand(1);
752 const LLT UseDstTy = MRI.getType(UseDstReg);
753 if (UseDstReg != ChosenDstReg) {
754 if (Preferred.Ty == UseDstTy) {
755 // If the use has the same type as the preferred use, then merge
756 // the vregs and erase the extend. For example:
757 // %1:_(s8) = G_LOAD ...
758 // %2:_(s32) = G_SEXT %1(s8)
759 // %3:_(s32) = G_ANYEXT %1(s8)
760 // ... = ... %3(s32)
761 // rewrites to:
762 // %2:_(s32) = G_SEXTLOAD ...
763 // ... = ... %2(s32)
764 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
766 UseMO->getParent()->eraseFromParent();
767 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
768 // If the preferred size is smaller, then keep the extend but extend
769 // from the result of the extending load. For example:
770 // %1:_(s8) = G_LOAD ...
771 // %2:_(s32) = G_SEXT %1(s8)
772 // %3:_(s64) = G_ANYEXT %1(s8)
773 // ... = ... %3(s64)
774 /// rewrites to:
775 // %2:_(s32) = G_SEXTLOAD ...
776 // %3:_(s64) = G_ANYEXT %2:_(s32)
777 // ... = ... %3(s64)
778 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
779 } else {
780 // If the preferred size is large, then insert a truncate. For
781 // example:
782 // %1:_(s8) = G_LOAD ...
783 // %2:_(s64) = G_SEXT %1(s8)
784 // %3:_(s32) = G_ZEXT %1(s8)
785 // ... = ... %3(s32)
786 /// rewrites to:
787 // %2:_(s64) = G_SEXTLOAD ...
788 // %4:_(s8) = G_TRUNC %2:_(s32)
789 // %3:_(s64) = G_ZEXT %2:_(s8)
790 // ... = ... %3(s64)
791 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
792 InsertTruncAt);
793 }
794 continue;
795 }
796 // The use is (one of) the uses of the preferred use we chose earlier.
797 // We're going to update the load to def this value later so just erase
798 // the old extend.
800 UseMO->getParent()->eraseFromParent();
801 continue;
802 }
803
804 // The use isn't an extend. Truncate back to the type we originally loaded.
805 // This is free on many targets.
806 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
807 }
808
809 MI.getOperand(0).setReg(ChosenDstReg);
811}
812
814 BuildFnTy &MatchInfo) {
815 assert(MI.getOpcode() == TargetOpcode::G_AND);
816
817 // If we have the following code:
818 // %mask = G_CONSTANT 255
819 // %ld = G_LOAD %ptr, (load s16)
820 // %and = G_AND %ld, %mask
821 //
822 // Try to fold it into
823 // %ld = G_ZEXTLOAD %ptr, (load s8)
824
825 Register Dst = MI.getOperand(0).getReg();
826 if (MRI.getType(Dst).isVector())
827 return false;
828
829 auto MaybeMask =
830 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
831 if (!MaybeMask)
832 return false;
833
834 APInt MaskVal = MaybeMask->Value;
835
836 if (!MaskVal.isMask())
837 return false;
838
839 Register SrcReg = MI.getOperand(1).getReg();
840 // Don't use getOpcodeDef() here since intermediate instructions may have
841 // multiple users.
842 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
843 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
844 return false;
845
846 Register LoadReg = LoadMI->getDstReg();
847 LLT RegTy = MRI.getType(LoadReg);
848 Register PtrReg = LoadMI->getPointerReg();
849 unsigned RegSize = RegTy.getSizeInBits();
850 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
851 unsigned MaskSizeBits = MaskVal.countr_one();
852
853 // The mask may not be larger than the in-memory type, as it might cover sign
854 // extended bits
855 if (MaskSizeBits > LoadSizeBits.getValue())
856 return false;
857
858 // If the mask covers the whole destination register, there's nothing to
859 // extend
860 if (MaskSizeBits >= RegSize)
861 return false;
862
863 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
864 // at least byte loads. Avoid creating such loads here
865 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
866 return false;
867
868 const MachineMemOperand &MMO = LoadMI->getMMO();
869 LegalityQuery::MemDesc MemDesc(MMO);
870
871 // Don't modify the memory access size if this is atomic/volatile, but we can
872 // still adjust the opcode to indicate the high bit behavior.
873 if (LoadMI->isSimple())
874 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
875 else if (LoadSizeBits.getValue() > MaskSizeBits ||
876 LoadSizeBits.getValue() == RegSize)
877 return false;
878
879 // TODO: Could check if it's legal with the reduced or original memory size.
881 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
882 return false;
883
884 MatchInfo = [=](MachineIRBuilder &B) {
885 B.setInstrAndDebugLoc(*LoadMI);
886 auto &MF = B.getMF();
887 auto PtrInfo = MMO.getPointerInfo();
888 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
889 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
890 LoadMI->eraseFromParent();
891 };
892 return true;
893}
894
896 const MachineInstr &UseMI) {
897 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
898 "shouldn't consider debug uses");
899 assert(DefMI.getParent() == UseMI.getParent());
900 if (&DefMI == &UseMI)
901 return true;
902 const MachineBasicBlock &MBB = *DefMI.getParent();
903 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
904 return &MI == &DefMI || &MI == &UseMI;
905 });
906 if (DefOrUse == MBB.end())
907 llvm_unreachable("Block must contain both DefMI and UseMI!");
908 return &*DefOrUse == &DefMI;
909}
910
912 const MachineInstr &UseMI) {
913 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
914 "shouldn't consider debug uses");
915 if (MDT)
916 return MDT->dominates(&DefMI, &UseMI);
917 else if (DefMI.getParent() != UseMI.getParent())
918 return false;
919
920 return isPredecessor(DefMI, UseMI);
921}
922
924 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
925 Register SrcReg = MI.getOperand(1).getReg();
926 Register LoadUser = SrcReg;
927
928 if (MRI.getType(SrcReg).isVector())
929 return false;
930
931 Register TruncSrc;
932 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
933 LoadUser = TruncSrc;
934
935 uint64_t SizeInBits = MI.getOperand(2).getImm();
936 // If the source is a G_SEXTLOAD from the same bit width, then we don't
937 // need any extend at all, just a truncate.
938 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
939 // If truncating more than the original extended value, abort.
940 auto LoadSizeBits = LoadMI->getMemSizeInBits();
941 if (TruncSrc &&
942 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
943 return false;
944 if (LoadSizeBits == SizeInBits)
945 return true;
946 }
947 return false;
948}
949
951 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
952 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
953 MI.eraseFromParent();
954}
955
957 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
958 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
959
960 Register DstReg = MI.getOperand(0).getReg();
961 LLT RegTy = MRI.getType(DstReg);
962
963 // Only supports scalars for now.
964 if (RegTy.isVector())
965 return false;
966
967 Register SrcReg = MI.getOperand(1).getReg();
968 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
969 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
970 return false;
971
972 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
973
974 // If the sign extend extends from a narrower width than the load's width,
975 // then we can narrow the load width when we combine to a G_SEXTLOAD.
976 // Avoid widening the load at all.
977 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
978
979 // Don't generate G_SEXTLOADs with a < 1 byte width.
980 if (NewSizeBits < 8)
981 return false;
982 // Don't bother creating a non-power-2 sextload, it will likely be broken up
983 // anyway for most targets.
984 if (!isPowerOf2_32(NewSizeBits))
985 return false;
986
987 const MachineMemOperand &MMO = LoadDef->getMMO();
988 LegalityQuery::MemDesc MMDesc(MMO);
989
990 // Don't modify the memory access size if this is atomic/volatile, but we can
991 // still adjust the opcode to indicate the high bit behavior.
992 if (LoadDef->isSimple())
993 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
994 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
995 return false;
996
997 // TODO: Could check if it's legal with the reduced or original memory size.
998 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
999 {MRI.getType(LoadDef->getDstReg()),
1000 MRI.getType(LoadDef->getPointerReg())},
1001 {MMDesc}}))
1002 return false;
1003
1004 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1005 return true;
1006}
1007
1009 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1010 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1011 Register LoadReg;
1012 unsigned ScalarSizeBits;
1013 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1014 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1015
1016 // If we have the following:
1017 // %ld = G_LOAD %ptr, (load 2)
1018 // %ext = G_SEXT_INREG %ld, 8
1019 // ==>
1020 // %ld = G_SEXTLOAD %ptr (load 1)
1021
1022 auto &MMO = LoadDef->getMMO();
1023 Builder.setInstrAndDebugLoc(*LoadDef);
1024 auto &MF = Builder.getMF();
1025 auto PtrInfo = MMO.getPointerInfo();
1026 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1027 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1028 LoadDef->getPointerReg(), *NewMMO);
1029 MI.eraseFromParent();
1030}
1031
1033 if (Ty.isVector())
1035 Ty.getNumElements());
1036 return IntegerType::get(C, Ty.getSizeInBits());
1037}
1038
1039/// Return true if 'MI' is a load or a store that may be fold it's address
1040/// operand into the load / store addressing mode.
1044 auto *MF = MI->getMF();
1045 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1046 if (!Addr)
1047 return false;
1048
1049 AM.HasBaseReg = true;
1050 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1051 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1052 else
1053 AM.Scale = 1; // [reg +/- reg]
1054
1055 return TLI.isLegalAddressingMode(
1056 MF->getDataLayout(), AM,
1057 getTypeForLLT(MI->getMMO().getMemoryType(),
1058 MF->getFunction().getContext()),
1059 MI->getMMO().getAddrSpace());
1060}
1061
1062static unsigned getIndexedOpc(unsigned LdStOpc) {
1063 switch (LdStOpc) {
1064 case TargetOpcode::G_LOAD:
1065 return TargetOpcode::G_INDEXED_LOAD;
1066 case TargetOpcode::G_STORE:
1067 return TargetOpcode::G_INDEXED_STORE;
1068 case TargetOpcode::G_ZEXTLOAD:
1069 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1070 case TargetOpcode::G_SEXTLOAD:
1071 return TargetOpcode::G_INDEXED_SEXTLOAD;
1072 default:
1073 llvm_unreachable("Unexpected opcode");
1074 }
1075}
1076
1077bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1078 // Check for legality.
1079 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1080 LLT Ty = MRI.getType(LdSt.getReg(0));
1081 LLT MemTy = LdSt.getMMO().getMemoryType();
1083 {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
1084 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1085 SmallVector<LLT> OpTys;
1086 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1087 OpTys = {PtrTy, Ty, Ty};
1088 else
1089 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1090
1091 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1092 return isLegal(Q);
1093}
1094
1096 "post-index-use-threshold", cl::Hidden, cl::init(32),
1097 cl::desc("Number of uses of a base pointer to check before it is no longer "
1098 "considered for post-indexing."));
1099
1100bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1102 bool &RematOffset) {
1103 // We're looking for the following pattern, for either load or store:
1104 // %baseptr:_(p0) = ...
1105 // G_STORE %val(s64), %baseptr(p0)
1106 // %offset:_(s64) = G_CONSTANT i64 -256
1107 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1108 const auto &TLI = getTargetLowering();
1109
1110 Register Ptr = LdSt.getPointerReg();
1111 // If the store is the only use, don't bother.
1112 if (MRI.hasOneNonDBGUse(Ptr))
1113 return false;
1114
1115 if (!isIndexedLoadStoreLegal(LdSt))
1116 return false;
1117
1118 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1119 return false;
1120
1121 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1122 auto *PtrDef = MRI.getVRegDef(Ptr);
1123
1124 unsigned NumUsesChecked = 0;
1125 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1126 if (++NumUsesChecked > PostIndexUseThreshold)
1127 return false; // Try to avoid exploding compile time.
1128
1129 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1130 // The use itself might be dead. This can happen during combines if DCE
1131 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1132 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1133 continue;
1134
1135 // Check the user of this isn't the store, otherwise we'd be generate a
1136 // indexed store defining its own use.
1137 if (StoredValDef == &Use)
1138 continue;
1139
1140 Offset = PtrAdd->getOffsetReg();
1141 if (!ForceLegalIndexing &&
1142 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1143 /*IsPre*/ false, MRI))
1144 continue;
1145
1146 // Make sure the offset calculation is before the potentially indexed op.
1147 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1148 RematOffset = false;
1149 if (!dominates(*OffsetDef, LdSt)) {
1150 // If the offset however is just a G_CONSTANT, we can always just
1151 // rematerialize it where we need it.
1152 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1153 continue;
1154 RematOffset = true;
1155 }
1156
1157 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1158 if (&BasePtrUse == PtrDef)
1159 continue;
1160
1161 // If the user is a later load/store that can be post-indexed, then don't
1162 // combine this one.
1163 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1164 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1165 dominates(LdSt, *BasePtrLdSt) &&
1166 isIndexedLoadStoreLegal(*BasePtrLdSt))
1167 return false;
1168
1169 // Now we're looking for the key G_PTR_ADD instruction, which contains
1170 // the offset add that we want to fold.
1171 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1172 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1173 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1174 // If the use is in a different block, then we may produce worse code
1175 // due to the extra register pressure.
1176 if (BaseUseUse.getParent() != LdSt.getParent())
1177 return false;
1178
1179 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1180 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1181 return false;
1182 }
1183 if (!dominates(LdSt, BasePtrUse))
1184 return false; // All use must be dominated by the load/store.
1185 }
1186 }
1187
1188 Addr = PtrAdd->getReg(0);
1189 Base = PtrAdd->getBaseReg();
1190 return true;
1191 }
1192
1193 return false;
1194}
1195
1196bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1198 auto &MF = *LdSt.getParent()->getParent();
1199 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1200
1201 Addr = LdSt.getPointerReg();
1204 return false;
1205
1206 if (!ForceLegalIndexing &&
1207 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1208 return false;
1209
1210 if (!isIndexedLoadStoreLegal(LdSt))
1211 return false;
1212
1214 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1215 return false;
1216
1217 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1218 // Would require a copy.
1219 if (Base == St->getValueReg())
1220 return false;
1221
1222 // We're expecting one use of Addr in MI, but it could also be the
1223 // value stored, which isn't actually dominated by the instruction.
1224 if (St->getValueReg() == Addr)
1225 return false;
1226 }
1227
1228 // Avoid increasing cross-block register pressure.
1229 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1230 if (AddrUse.getParent() != LdSt.getParent())
1231 return false;
1232
1233 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1234 // That might allow us to end base's liveness here by adjusting the constant.
1235 bool RealUse = false;
1236 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1237 if (!dominates(LdSt, AddrUse))
1238 return false; // All use must be dominated by the load/store.
1239
1240 // If Ptr may be folded in addressing mode of other use, then it's
1241 // not profitable to do this transformation.
1242 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1243 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1244 RealUse = true;
1245 } else {
1246 RealUse = true;
1247 }
1248 }
1249 return RealUse;
1250}
1251
1253 BuildFnTy &MatchInfo) {
1254 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1255
1256 // Check if there is a load that defines the vector being extracted from.
1257 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1258 if (!LoadMI)
1259 return false;
1260
1261 Register Vector = MI.getOperand(1).getReg();
1262 LLT VecEltTy = MRI.getType(Vector).getElementType();
1263
1264 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1265
1266 // Checking whether we should reduce the load width.
1268 return false;
1269
1270 // Check if the defining load is simple.
1271 if (!LoadMI->isSimple())
1272 return false;
1273
1274 // If the vector element type is not a multiple of a byte then we are unable
1275 // to correctly compute an address to load only the extracted element as a
1276 // scalar.
1277 if (!VecEltTy.isByteSized())
1278 return false;
1279
1280 // Check for load fold barriers between the extraction and the load.
1281 if (MI.getParent() != LoadMI->getParent())
1282 return false;
1283 const unsigned MaxIter = 20;
1284 unsigned Iter = 0;
1285 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1286 if (II->isLoadFoldBarrier())
1287 return false;
1288 if (Iter++ == MaxIter)
1289 return false;
1290 }
1291
1292 // Check if the new load that we are going to create is legal
1293 // if we are in the post-legalization phase.
1294 MachineMemOperand MMO = LoadMI->getMMO();
1295 Align Alignment = MMO.getAlign();
1296 MachinePointerInfo PtrInfo;
1298
1299 // Finding the appropriate PtrInfo if offset is a known constant.
1300 // This is required to create the memory operand for the narrowed load.
1301 // This machine memory operand object helps us infer about legality
1302 // before we proceed to combine the instruction.
1303 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1304 int Elt = CVal->getZExtValue();
1305 // FIXME: should be (ABI size)*Elt.
1306 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1307 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1308 } else {
1309 // Discard the pointer info except the address space because the memory
1310 // operand can't represent this new access since the offset is variable.
1311 Offset = VecEltTy.getSizeInBits() / 8;
1313 }
1314
1315 Alignment = commonAlignment(Alignment, Offset);
1316
1317 Register VecPtr = LoadMI->getPointerReg();
1318 LLT PtrTy = MRI.getType(VecPtr);
1319
1320 MachineFunction &MF = *MI.getMF();
1321 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1322
1323 LegalityQuery::MemDesc MMDesc(*NewMMO);
1324
1325 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1326
1328 return false;
1329
1330 // Load must be allowed and fast on the target.
1332 auto &DL = MF.getDataLayout();
1333 unsigned Fast = 0;
1334 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1335 &Fast) ||
1336 !Fast)
1337 return false;
1338
1339 Register Result = MI.getOperand(0).getReg();
1340 Register Index = MI.getOperand(2).getReg();
1341
1342 MatchInfo = [=](MachineIRBuilder &B) {
1343 GISelObserverWrapper DummyObserver;
1344 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1345 //// Get pointer to the vector element.
1346 Register finalPtr = Helper.getVectorElementPointer(
1347 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1348 Index);
1349 // New G_LOAD instruction.
1350 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1351 // Remove original GLOAD instruction.
1352 LoadMI->eraseFromParent();
1353 };
1354
1355 return true;
1356}
1357
1360 auto &LdSt = cast<GLoadStore>(MI);
1361
1362 if (LdSt.isAtomic())
1363 return false;
1364
1365 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1366 MatchInfo.Offset);
1367 if (!MatchInfo.IsPre &&
1368 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1369 MatchInfo.Offset, MatchInfo.RematOffset))
1370 return false;
1371
1372 return true;
1373}
1374
1377 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1378 unsigned Opcode = MI.getOpcode();
1379 bool IsStore = Opcode == TargetOpcode::G_STORE;
1380 unsigned NewOpcode = getIndexedOpc(Opcode);
1381
1382 // If the offset constant didn't happen to dominate the load/store, we can
1383 // just clone it as needed.
1384 if (MatchInfo.RematOffset) {
1385 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1386 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1387 *OldCst->getOperand(1).getCImm());
1388 MatchInfo.Offset = NewCst.getReg(0);
1389 }
1390
1391 auto MIB = Builder.buildInstr(NewOpcode);
1392 if (IsStore) {
1393 MIB.addDef(MatchInfo.Addr);
1394 MIB.addUse(MI.getOperand(0).getReg());
1395 } else {
1396 MIB.addDef(MI.getOperand(0).getReg());
1397 MIB.addDef(MatchInfo.Addr);
1398 }
1399
1400 MIB.addUse(MatchInfo.Base);
1401 MIB.addUse(MatchInfo.Offset);
1402 MIB.addImm(MatchInfo.IsPre);
1403 MIB->cloneMemRefs(*MI.getMF(), MI);
1404 MI.eraseFromParent();
1405 AddrDef.eraseFromParent();
1406
1407 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1408}
1409
1411 MachineInstr *&OtherMI) {
1412 unsigned Opcode = MI.getOpcode();
1413 bool IsDiv, IsSigned;
1414
1415 switch (Opcode) {
1416 default:
1417 llvm_unreachable("Unexpected opcode!");
1418 case TargetOpcode::G_SDIV:
1419 case TargetOpcode::G_UDIV: {
1420 IsDiv = true;
1421 IsSigned = Opcode == TargetOpcode::G_SDIV;
1422 break;
1423 }
1424 case TargetOpcode::G_SREM:
1425 case TargetOpcode::G_UREM: {
1426 IsDiv = false;
1427 IsSigned = Opcode == TargetOpcode::G_SREM;
1428 break;
1429 }
1430 }
1431
1432 Register Src1 = MI.getOperand(1).getReg();
1433 unsigned DivOpcode, RemOpcode, DivremOpcode;
1434 if (IsSigned) {
1435 DivOpcode = TargetOpcode::G_SDIV;
1436 RemOpcode = TargetOpcode::G_SREM;
1437 DivremOpcode = TargetOpcode::G_SDIVREM;
1438 } else {
1439 DivOpcode = TargetOpcode::G_UDIV;
1440 RemOpcode = TargetOpcode::G_UREM;
1441 DivremOpcode = TargetOpcode::G_UDIVREM;
1442 }
1443
1444 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1445 return false;
1446
1447 // Combine:
1448 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1449 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1450 // into:
1451 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1452
1453 // Combine:
1454 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1455 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1456 // into:
1457 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1458
1459 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1460 if (MI.getParent() == UseMI.getParent() &&
1461 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1462 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1463 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1464 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1465 OtherMI = &UseMI;
1466 return true;
1467 }
1468 }
1469
1470 return false;
1471}
1472
1474 MachineInstr *&OtherMI) {
1475 unsigned Opcode = MI.getOpcode();
1476 assert(OtherMI && "OtherMI shouldn't be empty.");
1477
1478 Register DestDivReg, DestRemReg;
1479 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1480 DestDivReg = MI.getOperand(0).getReg();
1481 DestRemReg = OtherMI->getOperand(0).getReg();
1482 } else {
1483 DestDivReg = OtherMI->getOperand(0).getReg();
1484 DestRemReg = MI.getOperand(0).getReg();
1485 }
1486
1487 bool IsSigned =
1488 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1489
1490 // Check which instruction is first in the block so we don't break def-use
1491 // deps by "moving" the instruction incorrectly. Also keep track of which
1492 // instruction is first so we pick it's operands, avoiding use-before-def
1493 // bugs.
1494 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1495 Builder.setInstrAndDebugLoc(*FirstInst);
1496
1497 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1498 : TargetOpcode::G_UDIVREM,
1499 {DestDivReg, DestRemReg},
1500 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1501 MI.eraseFromParent();
1502 OtherMI->eraseFromParent();
1503}
1504
1506 MachineInstr *&BrCond) {
1507 assert(MI.getOpcode() == TargetOpcode::G_BR);
1508
1509 // Try to match the following:
1510 // bb1:
1511 // G_BRCOND %c1, %bb2
1512 // G_BR %bb3
1513 // bb2:
1514 // ...
1515 // bb3:
1516
1517 // The above pattern does not have a fall through to the successor bb2, always
1518 // resulting in a branch no matter which path is taken. Here we try to find
1519 // and replace that pattern with conditional branch to bb3 and otherwise
1520 // fallthrough to bb2. This is generally better for branch predictors.
1521
1522 MachineBasicBlock *MBB = MI.getParent();
1524 if (BrIt == MBB->begin())
1525 return false;
1526 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1527
1528 BrCond = &*std::prev(BrIt);
1529 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1530 return false;
1531
1532 // Check that the next block is the conditional branch target. Also make sure
1533 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1534 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1535 return BrCondTarget != MI.getOperand(0).getMBB() &&
1536 MBB->isLayoutSuccessor(BrCondTarget);
1537}
1538
1540 MachineInstr *&BrCond) {
1541 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1543 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1544 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1545 // this to i1 only since we might not know for sure what kind of
1546 // compare generated the condition value.
1547 auto True = Builder.buildConstant(
1548 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1549 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1550
1551 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1553 MI.getOperand(0).setMBB(FallthroughBB);
1555
1556 // Change the conditional branch to use the inverted condition and
1557 // new target block.
1558 Observer.changingInstr(*BrCond);
1559 BrCond->getOperand(0).setReg(Xor.getReg(0));
1560 BrCond->getOperand(1).setMBB(BrTarget);
1561 Observer.changedInstr(*BrCond);
1562}
1563
1564
1566 MachineIRBuilder HelperBuilder(MI);
1567 GISelObserverWrapper DummyObserver;
1568 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1569 return Helper.lowerMemcpyInline(MI) ==
1571}
1572
1574 MachineIRBuilder HelperBuilder(MI);
1575 GISelObserverWrapper DummyObserver;
1576 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1577 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1579}
1580
1582 const MachineRegisterInfo &MRI,
1583 const APFloat &Val) {
1584 APFloat Result(Val);
1585 switch (MI.getOpcode()) {
1586 default:
1587 llvm_unreachable("Unexpected opcode!");
1588 case TargetOpcode::G_FNEG: {
1589 Result.changeSign();
1590 return Result;
1591 }
1592 case TargetOpcode::G_FABS: {
1593 Result.clearSign();
1594 return Result;
1595 }
1596 case TargetOpcode::G_FPTRUNC: {
1597 bool Unused;
1598 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1600 &Unused);
1601 return Result;
1602 }
1603 case TargetOpcode::G_FSQRT: {
1604 bool Unused;
1606 &Unused);
1607 Result = APFloat(sqrt(Result.convertToDouble()));
1608 break;
1609 }
1610 case TargetOpcode::G_FLOG2: {
1611 bool Unused;
1613 &Unused);
1614 Result = APFloat(log2(Result.convertToDouble()));
1615 break;
1616 }
1617 }
1618 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1619 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1620 // `G_FLOG2` reach here.
1621 bool Unused;
1622 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1623 return Result;
1624}
1625
1627 const ConstantFP *Cst) {
1628 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1629 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1630 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1631 MI.eraseFromParent();
1632}
1633
1635 PtrAddChain &MatchInfo) {
1636 // We're trying to match the following pattern:
1637 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1638 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1639 // -->
1640 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1641
1642 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1643 return false;
1644
1645 Register Add2 = MI.getOperand(1).getReg();
1646 Register Imm1 = MI.getOperand(2).getReg();
1647 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1648 if (!MaybeImmVal)
1649 return false;
1650
1651 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1652 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1653 return false;
1654
1655 Register Base = Add2Def->getOperand(1).getReg();
1656 Register Imm2 = Add2Def->getOperand(2).getReg();
1657 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1658 if (!MaybeImm2Val)
1659 return false;
1660
1661 // Check if the new combined immediate forms an illegal addressing mode.
1662 // Do not combine if it was legal before but would get illegal.
1663 // To do so, we need to find a load/store user of the pointer to get
1664 // the access type.
1665 Type *AccessTy = nullptr;
1666 auto &MF = *MI.getMF();
1667 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1668 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1669 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1670 MF.getFunction().getContext());
1671 break;
1672 }
1673 }
1675 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1676 AMNew.BaseOffs = CombinedImm.getSExtValue();
1677 if (AccessTy) {
1678 AMNew.HasBaseReg = true;
1680 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1681 AMOld.HasBaseReg = true;
1682 unsigned AS = MRI.getType(Add2).getAddressSpace();
1683 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1684 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1685 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1686 return false;
1687 }
1688
1689 // Pass the combined immediate to the apply function.
1690 MatchInfo.Imm = AMNew.BaseOffs;
1691 MatchInfo.Base = Base;
1692 MatchInfo.Bank = getRegBank(Imm2);
1693 return true;
1694}
1695
1697 PtrAddChain &MatchInfo) {
1698 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1699 MachineIRBuilder MIB(MI);
1700 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1701 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1702 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1704 MI.getOperand(1).setReg(MatchInfo.Base);
1705 MI.getOperand(2).setReg(NewOffset.getReg(0));
1707}
1708
1710 RegisterImmPair &MatchInfo) {
1711 // We're trying to match the following pattern with any of
1712 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1713 // %t1 = SHIFT %base, G_CONSTANT imm1
1714 // %root = SHIFT %t1, G_CONSTANT imm2
1715 // -->
1716 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1717
1718 unsigned Opcode = MI.getOpcode();
1719 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1720 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1721 Opcode == TargetOpcode::G_USHLSAT) &&
1722 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1723
1724 Register Shl2 = MI.getOperand(1).getReg();
1725 Register Imm1 = MI.getOperand(2).getReg();
1726 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1727 if (!MaybeImmVal)
1728 return false;
1729
1730 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1731 if (Shl2Def->getOpcode() != Opcode)
1732 return false;
1733
1734 Register Base = Shl2Def->getOperand(1).getReg();
1735 Register Imm2 = Shl2Def->getOperand(2).getReg();
1736 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1737 if (!MaybeImm2Val)
1738 return false;
1739
1740 // Pass the combined immediate to the apply function.
1741 MatchInfo.Imm =
1742 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1743 MatchInfo.Reg = Base;
1744
1745 // There is no simple replacement for a saturating unsigned left shift that
1746 // exceeds the scalar size.
1747 if (Opcode == TargetOpcode::G_USHLSAT &&
1748 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1749 return false;
1750
1751 return true;
1752}
1753
1755 RegisterImmPair &MatchInfo) {
1756 unsigned Opcode = MI.getOpcode();
1757 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1758 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1759 Opcode == TargetOpcode::G_USHLSAT) &&
1760 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1761
1762 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1763 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1764 auto Imm = MatchInfo.Imm;
1765
1766 if (Imm >= ScalarSizeInBits) {
1767 // Any logical shift that exceeds scalar size will produce zero.
1768 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1769 Builder.buildConstant(MI.getOperand(0), 0);
1770 MI.eraseFromParent();
1771 return;
1772 }
1773 // Arithmetic shift and saturating signed left shift have no effect beyond
1774 // scalar size.
1775 Imm = ScalarSizeInBits - 1;
1776 }
1777
1778 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1779 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1781 MI.getOperand(1).setReg(MatchInfo.Reg);
1782 MI.getOperand(2).setReg(NewImm);
1784}
1785
1787 ShiftOfShiftedLogic &MatchInfo) {
1788 // We're trying to match the following pattern with any of
1789 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1790 // with any of G_AND/G_OR/G_XOR logic instructions.
1791 // %t1 = SHIFT %X, G_CONSTANT C0
1792 // %t2 = LOGIC %t1, %Y
1793 // %root = SHIFT %t2, G_CONSTANT C1
1794 // -->
1795 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1796 // %t4 = SHIFT %Y, G_CONSTANT C1
1797 // %root = LOGIC %t3, %t4
1798 unsigned ShiftOpcode = MI.getOpcode();
1799 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1800 ShiftOpcode == TargetOpcode::G_ASHR ||
1801 ShiftOpcode == TargetOpcode::G_LSHR ||
1802 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1803 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1804 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1805
1806 // Match a one-use bitwise logic op.
1807 Register LogicDest = MI.getOperand(1).getReg();
1808 if (!MRI.hasOneNonDBGUse(LogicDest))
1809 return false;
1810
1811 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1812 unsigned LogicOpcode = LogicMI->getOpcode();
1813 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1814 LogicOpcode != TargetOpcode::G_XOR)
1815 return false;
1816
1817 // Find a matching one-use shift by constant.
1818 const Register C1 = MI.getOperand(2).getReg();
1819 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1820 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1821 return false;
1822
1823 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1824
1825 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1826 // Shift should match previous one and should be a one-use.
1827 if (MI->getOpcode() != ShiftOpcode ||
1828 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1829 return false;
1830
1831 // Must be a constant.
1832 auto MaybeImmVal =
1833 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1834 if (!MaybeImmVal)
1835 return false;
1836
1837 ShiftVal = MaybeImmVal->Value.getSExtValue();
1838 return true;
1839 };
1840
1841 // Logic ops are commutative, so check each operand for a match.
1842 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1843 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1844 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1845 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1846 uint64_t C0Val;
1847
1848 if (matchFirstShift(LogicMIOp1, C0Val)) {
1849 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1850 MatchInfo.Shift2 = LogicMIOp1;
1851 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1852 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1853 MatchInfo.Shift2 = LogicMIOp2;
1854 } else
1855 return false;
1856
1857 MatchInfo.ValSum = C0Val + C1Val;
1858
1859 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1860 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1861 return false;
1862
1863 MatchInfo.Logic = LogicMI;
1864 return true;
1865}
1866
1868 ShiftOfShiftedLogic &MatchInfo) {
1869 unsigned Opcode = MI.getOpcode();
1870 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1871 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1872 Opcode == TargetOpcode::G_SSHLSAT) &&
1873 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1874
1875 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1876 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1877
1878 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1879
1880 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1881 Register Shift1 =
1882 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1883
1884 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1885 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1886 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1887 // remove old shift1. And it will cause crash later. So erase it earlier to
1888 // avoid the crash.
1889 MatchInfo.Shift2->eraseFromParent();
1890
1891 Register Shift2Const = MI.getOperand(2).getReg();
1892 Register Shift2 = Builder
1893 .buildInstr(Opcode, {DestType},
1894 {MatchInfo.LogicNonShiftReg, Shift2Const})
1895 .getReg(0);
1896
1897 Register Dest = MI.getOperand(0).getReg();
1898 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1899
1900 // This was one use so it's safe to remove it.
1901 MatchInfo.Logic->eraseFromParent();
1902
1903 MI.eraseFromParent();
1904}
1905
1907 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1908 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1909 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1910 auto &Shl = cast<GenericMachineInstr>(MI);
1911 Register DstReg = Shl.getReg(0);
1912 Register SrcReg = Shl.getReg(1);
1913 Register ShiftReg = Shl.getReg(2);
1914 Register X, C1;
1915
1916 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1917 return false;
1918
1919 if (!mi_match(SrcReg, MRI,
1921 m_GOr(m_Reg(X), m_Reg(C1))))))
1922 return false;
1923
1924 APInt C1Val, C2Val;
1925 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1926 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1927 return false;
1928
1929 auto *SrcDef = MRI.getVRegDef(SrcReg);
1930 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1931 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1932 LLT SrcTy = MRI.getType(SrcReg);
1933 MatchInfo = [=](MachineIRBuilder &B) {
1934 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1935 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
1936 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
1937 };
1938 return true;
1939}
1940
1942 unsigned &ShiftVal) {
1943 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1944 auto MaybeImmVal =
1945 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1946 if (!MaybeImmVal)
1947 return false;
1948
1949 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1950 return (static_cast<int32_t>(ShiftVal) != -1);
1951}
1952
1954 unsigned &ShiftVal) {
1955 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1956 MachineIRBuilder MIB(MI);
1957 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1958 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1960 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1961 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1963}
1964
1965// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1967 RegisterImmPair &MatchData) {
1968 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1969 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
1970 return false;
1971
1972 Register LHS = MI.getOperand(1).getReg();
1973
1974 Register ExtSrc;
1975 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1976 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1977 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1978 return false;
1979
1980 Register RHS = MI.getOperand(2).getReg();
1981 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
1982 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
1983 if (!MaybeShiftAmtVal)
1984 return false;
1985
1986 if (LI) {
1987 LLT SrcTy = MRI.getType(ExtSrc);
1988
1989 // We only really care about the legality with the shifted value. We can
1990 // pick any type the constant shift amount, so ask the target what to
1991 // use. Otherwise we would have to guess and hope it is reported as legal.
1992 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1993 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1994 return false;
1995 }
1996
1997 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
1998 MatchData.Reg = ExtSrc;
1999 MatchData.Imm = ShiftAmt;
2000
2001 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
2002 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2003 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2004}
2005
2007 const RegisterImmPair &MatchData) {
2008 Register ExtSrcReg = MatchData.Reg;
2009 int64_t ShiftAmtVal = MatchData.Imm;
2010
2011 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2012 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2013 auto NarrowShift =
2014 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2015 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2016 MI.eraseFromParent();
2017}
2018
2020 Register &MatchInfo) {
2021 GMerge &Merge = cast<GMerge>(MI);
2022 SmallVector<Register, 16> MergedValues;
2023 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2024 MergedValues.emplace_back(Merge.getSourceReg(I));
2025
2026 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2027 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2028 return false;
2029
2030 for (unsigned I = 0; I < MergedValues.size(); ++I)
2031 if (MergedValues[I] != Unmerge->getReg(I))
2032 return false;
2033
2034 MatchInfo = Unmerge->getSourceReg();
2035 return true;
2036}
2037
2039 const MachineRegisterInfo &MRI) {
2040 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2041 ;
2042
2043 return Reg;
2044}
2045
2048 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2049 "Expected an unmerge");
2050 auto &Unmerge = cast<GUnmerge>(MI);
2051 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2052
2053 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2054 if (!SrcInstr)
2055 return false;
2056
2057 // Check the source type of the merge.
2058 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2059 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2060 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2061 if (SrcMergeTy != Dst0Ty && !SameSize)
2062 return false;
2063 // They are the same now (modulo a bitcast).
2064 // We can collect all the src registers.
2065 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2066 Operands.push_back(SrcInstr->getSourceReg(Idx));
2067 return true;
2068}
2069
2072 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2073 "Expected an unmerge");
2074 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2075 "Not enough operands to replace all defs");
2076 unsigned NumElems = MI.getNumOperands() - 1;
2077
2078 LLT SrcTy = MRI.getType(Operands[0]);
2079 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2080 bool CanReuseInputDirectly = DstTy == SrcTy;
2081 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2082 Register DstReg = MI.getOperand(Idx).getReg();
2083 Register SrcReg = Operands[Idx];
2084
2085 // This combine may run after RegBankSelect, so we need to be aware of
2086 // register banks.
2087 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2088 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2089 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2090 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2091 }
2092
2093 if (CanReuseInputDirectly)
2094 replaceRegWith(MRI, DstReg, SrcReg);
2095 else
2096 Builder.buildCast(DstReg, SrcReg);
2097 }
2098 MI.eraseFromParent();
2099}
2100
2102 SmallVectorImpl<APInt> &Csts) {
2103 unsigned SrcIdx = MI.getNumOperands() - 1;
2104 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2105 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2106 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2107 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2108 return false;
2109 // Break down the big constant in smaller ones.
2110 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2111 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2112 ? CstVal.getCImm()->getValue()
2113 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2114
2115 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2116 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2117 // Unmerge a constant.
2118 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2119 Csts.emplace_back(Val.trunc(ShiftAmt));
2120 Val = Val.lshr(ShiftAmt);
2121 }
2122
2123 return true;
2124}
2125
2127 SmallVectorImpl<APInt> &Csts) {
2128 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2129 "Expected an unmerge");
2130 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2131 "Not enough operands to replace all defs");
2132 unsigned NumElems = MI.getNumOperands() - 1;
2133 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2134 Register DstReg = MI.getOperand(Idx).getReg();
2135 Builder.buildConstant(DstReg, Csts[Idx]);
2136 }
2137
2138 MI.eraseFromParent();
2139}
2140
2142 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2143 unsigned SrcIdx = MI.getNumOperands() - 1;
2144 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2145 MatchInfo = [&MI](MachineIRBuilder &B) {
2146 unsigned NumElems = MI.getNumOperands() - 1;
2147 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2148 Register DstReg = MI.getOperand(Idx).getReg();
2149 B.buildUndef(DstReg);
2150 }
2151 };
2152 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2153}
2154
2156 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2157 "Expected an unmerge");
2158 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2159 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2160 return false;
2161 // Check that all the lanes are dead except the first one.
2162 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2163 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2164 return false;
2165 }
2166 return true;
2167}
2168
2170 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2171 Register Dst0Reg = MI.getOperand(0).getReg();
2172 Builder.buildTrunc(Dst0Reg, SrcReg);
2173 MI.eraseFromParent();
2174}
2175
2177 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2178 "Expected an unmerge");
2179 Register Dst0Reg = MI.getOperand(0).getReg();
2180 LLT Dst0Ty = MRI.getType(Dst0Reg);
2181 // G_ZEXT on vector applies to each lane, so it will
2182 // affect all destinations. Therefore we won't be able
2183 // to simplify the unmerge to just the first definition.
2184 if (Dst0Ty.isVector())
2185 return false;
2186 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2187 LLT SrcTy = MRI.getType(SrcReg);
2188 if (SrcTy.isVector())
2189 return false;
2190
2191 Register ZExtSrcReg;
2192 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2193 return false;
2194
2195 // Finally we can replace the first definition with
2196 // a zext of the source if the definition is big enough to hold
2197 // all of ZExtSrc bits.
2198 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2199 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2200}
2201
2203 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2204 "Expected an unmerge");
2205
2206 Register Dst0Reg = MI.getOperand(0).getReg();
2207
2208 MachineInstr *ZExtInstr =
2209 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2210 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2211 "Expecting a G_ZEXT");
2212
2213 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2214 LLT Dst0Ty = MRI.getType(Dst0Reg);
2215 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2216
2217 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2218 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2219 } else {
2220 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2221 "ZExt src doesn't fit in destination");
2222 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2223 }
2224
2225 Register ZeroReg;
2226 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2227 if (!ZeroReg)
2228 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2229 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2230 }
2231 MI.eraseFromParent();
2232}
2233
2235 unsigned TargetShiftSize,
2236 unsigned &ShiftVal) {
2237 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2238 MI.getOpcode() == TargetOpcode::G_LSHR ||
2239 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2240
2241 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2242 if (Ty.isVector()) // TODO:
2243 return false;
2244
2245 // Don't narrow further than the requested size.
2246 unsigned Size = Ty.getSizeInBits();
2247 if (Size <= TargetShiftSize)
2248 return false;
2249
2250 auto MaybeImmVal =
2251 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2252 if (!MaybeImmVal)
2253 return false;
2254
2255 ShiftVal = MaybeImmVal->Value.getSExtValue();
2256 return ShiftVal >= Size / 2 && ShiftVal < Size;
2257}
2258
2260 const unsigned &ShiftVal) {
2261 Register DstReg = MI.getOperand(0).getReg();
2262 Register SrcReg = MI.getOperand(1).getReg();
2263 LLT Ty = MRI.getType(SrcReg);
2264 unsigned Size = Ty.getSizeInBits();
2265 unsigned HalfSize = Size / 2;
2266 assert(ShiftVal >= HalfSize);
2267
2268 LLT HalfTy = LLT::scalar(HalfSize);
2269
2270 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2271 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2272
2273 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2274 Register Narrowed = Unmerge.getReg(1);
2275
2276 // dst = G_LSHR s64:x, C for C >= 32
2277 // =>
2278 // lo, hi = G_UNMERGE_VALUES x
2279 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2280
2281 if (NarrowShiftAmt != 0) {
2282 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2283 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2284 }
2285
2286 auto Zero = Builder.buildConstant(HalfTy, 0);
2287 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2288 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2289 Register Narrowed = Unmerge.getReg(0);
2290 // dst = G_SHL s64:x, C for C >= 32
2291 // =>
2292 // lo, hi = G_UNMERGE_VALUES x
2293 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2294 if (NarrowShiftAmt != 0) {
2295 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2296 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2297 }
2298
2299 auto Zero = Builder.buildConstant(HalfTy, 0);
2300 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2301 } else {
2302 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2303 auto Hi = Builder.buildAShr(
2304 HalfTy, Unmerge.getReg(1),
2305 Builder.buildConstant(HalfTy, HalfSize - 1));
2306
2307 if (ShiftVal == HalfSize) {
2308 // (G_ASHR i64:x, 32) ->
2309 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2310 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2311 } else if (ShiftVal == Size - 1) {
2312 // Don't need a second shift.
2313 // (G_ASHR i64:x, 63) ->
2314 // %narrowed = (G_ASHR hi_32(x), 31)
2315 // G_MERGE_VALUES %narrowed, %narrowed
2316 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2317 } else {
2318 auto Lo = Builder.buildAShr(
2319 HalfTy, Unmerge.getReg(1),
2320 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2321
2322 // (G_ASHR i64:x, C) ->, for C >= 32
2323 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2324 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2325 }
2326 }
2327
2328 MI.eraseFromParent();
2329}
2330
2332 unsigned TargetShiftAmount) {
2333 unsigned ShiftAmt;
2334 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2335 applyCombineShiftToUnmerge(MI, ShiftAmt);
2336 return true;
2337 }
2338
2339 return false;
2340}
2341
2343 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2344 Register DstReg = MI.getOperand(0).getReg();
2345 LLT DstTy = MRI.getType(DstReg);
2346 Register SrcReg = MI.getOperand(1).getReg();
2347 return mi_match(SrcReg, MRI,
2348 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2349}
2350
2352 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2353 Register DstReg = MI.getOperand(0).getReg();
2354 Builder.buildCopy(DstReg, Reg);
2355 MI.eraseFromParent();
2356}
2357
2359 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2360 Register DstReg = MI.getOperand(0).getReg();
2361 Builder.buildZExtOrTrunc(DstReg, Reg);
2362 MI.eraseFromParent();
2363}
2364
2366 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2367 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2368 Register LHS = MI.getOperand(1).getReg();
2369 Register RHS = MI.getOperand(2).getReg();
2370 LLT IntTy = MRI.getType(LHS);
2371
2372 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2373 // instruction.
2374 PtrReg.second = false;
2375 for (Register SrcReg : {LHS, RHS}) {
2376 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2377 // Don't handle cases where the integer is implicitly converted to the
2378 // pointer width.
2379 LLT PtrTy = MRI.getType(PtrReg.first);
2380 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2381 return true;
2382 }
2383
2384 PtrReg.second = true;
2385 }
2386
2387 return false;
2388}
2389
2391 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2392 Register Dst = MI.getOperand(0).getReg();
2393 Register LHS = MI.getOperand(1).getReg();
2394 Register RHS = MI.getOperand(2).getReg();
2395
2396 const bool DoCommute = PtrReg.second;
2397 if (DoCommute)
2398 std::swap(LHS, RHS);
2399 LHS = PtrReg.first;
2400
2401 LLT PtrTy = MRI.getType(LHS);
2402
2403 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2404 Builder.buildPtrToInt(Dst, PtrAdd);
2405 MI.eraseFromParent();
2406}
2407
2409 APInt &NewCst) {
2410 auto &PtrAdd = cast<GPtrAdd>(MI);
2411 Register LHS = PtrAdd.getBaseReg();
2412 Register RHS = PtrAdd.getOffsetReg();
2414
2415 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2416 APInt Cst;
2417 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2418 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2419 // G_INTTOPTR uses zero-extension
2420 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2421 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2422 return true;
2423 }
2424 }
2425
2426 return false;
2427}
2428
2430 APInt &NewCst) {
2431 auto &PtrAdd = cast<GPtrAdd>(MI);
2432 Register Dst = PtrAdd.getReg(0);
2433
2434 Builder.buildConstant(Dst, NewCst);
2435 PtrAdd.eraseFromParent();
2436}
2437
2439 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2440 Register DstReg = MI.getOperand(0).getReg();
2441 Register SrcReg = MI.getOperand(1).getReg();
2442 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2443 if (OriginalSrcReg.isValid())
2444 SrcReg = OriginalSrcReg;
2445 LLT DstTy = MRI.getType(DstReg);
2446 return mi_match(SrcReg, MRI,
2447 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2448}
2449
2451 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2452 Register DstReg = MI.getOperand(0).getReg();
2453 Register SrcReg = MI.getOperand(1).getReg();
2454 LLT DstTy = MRI.getType(DstReg);
2455 if (mi_match(SrcReg, MRI,
2456 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2457 unsigned DstSize = DstTy.getScalarSizeInBits();
2458 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2459 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2460 }
2461 return false;
2462}
2463
2465 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2466 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2467 MI.getOpcode() == TargetOpcode::G_SEXT ||
2468 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2469 "Expected a G_[ASZ]EXT");
2470 Register SrcReg = MI.getOperand(1).getReg();
2471 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2472 if (OriginalSrcReg.isValid())
2473 SrcReg = OriginalSrcReg;
2474 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2475 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2476 unsigned Opc = MI.getOpcode();
2477 unsigned SrcOpc = SrcMI->getOpcode();
2478 if (Opc == SrcOpc ||
2479 (Opc == TargetOpcode::G_ANYEXT &&
2480 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2481 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2482 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2483 return true;
2484 }
2485 return false;
2486}
2487
2489 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2490 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2491 MI.getOpcode() == TargetOpcode::G_SEXT ||
2492 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2493 "Expected a G_[ASZ]EXT");
2494
2495 Register Reg = std::get<0>(MatchInfo);
2496 unsigned SrcExtOp = std::get<1>(MatchInfo);
2497
2498 // Combine exts with the same opcode.
2499 if (MI.getOpcode() == SrcExtOp) {
2501 MI.getOperand(1).setReg(Reg);
2503 return;
2504 }
2505
2506 // Combine:
2507 // - anyext([sz]ext x) to [sz]ext x
2508 // - sext(zext x) to zext x
2509 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2510 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2511 SrcExtOp == TargetOpcode::G_ZEXT)) {
2512 Register DstReg = MI.getOperand(0).getReg();
2513 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2514 MI.eraseFromParent();
2515 }
2516}
2517
2519 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2520 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2521 Register SrcReg = MI.getOperand(1).getReg();
2522 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2523 unsigned SrcOpc = SrcMI->getOpcode();
2524 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2525 SrcOpc == TargetOpcode::G_ZEXT) {
2526 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2527 return true;
2528 }
2529 return false;
2530}
2531
2533 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2534 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2535 Register SrcReg = MatchInfo.first;
2536 unsigned SrcExtOp = MatchInfo.second;
2537 Register DstReg = MI.getOperand(0).getReg();
2538 LLT SrcTy = MRI.getType(SrcReg);
2539 LLT DstTy = MRI.getType(DstReg);
2540 if (SrcTy == DstTy) {
2541 MI.eraseFromParent();
2542 replaceRegWith(MRI, DstReg, SrcReg);
2543 return;
2544 }
2545 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2546 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2547 else
2548 Builder.buildTrunc(DstReg, SrcReg);
2549 MI.eraseFromParent();
2550}
2551
2553 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2554 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2555
2556 // ShiftTy > 32 > TruncTy -> 32
2557 if (ShiftSize > 32 && TruncSize < 32)
2558 return ShiftTy.changeElementSize(32);
2559
2560 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2561 // Some targets like it, some don't, some only like it under certain
2562 // conditions/processor versions, etc.
2563 // A TL hook might be needed for this.
2564
2565 // Don't combine
2566 return ShiftTy;
2567}
2568
2570 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2571 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2572 Register DstReg = MI.getOperand(0).getReg();
2573 Register SrcReg = MI.getOperand(1).getReg();
2574
2575 if (!MRI.hasOneNonDBGUse(SrcReg))
2576 return false;
2577
2578 LLT SrcTy = MRI.getType(SrcReg);
2579 LLT DstTy = MRI.getType(DstReg);
2580
2581 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2582 const auto &TL = getTargetLowering();
2583
2584 LLT NewShiftTy;
2585 switch (SrcMI->getOpcode()) {
2586 default:
2587 return false;
2588 case TargetOpcode::G_SHL: {
2589 NewShiftTy = DstTy;
2590
2591 // Make sure new shift amount is legal.
2592 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2593 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2594 return false;
2595 break;
2596 }
2597 case TargetOpcode::G_LSHR:
2598 case TargetOpcode::G_ASHR: {
2599 // For right shifts, we conservatively do not do the transform if the TRUNC
2600 // has any STORE users. The reason is that if we change the type of the
2601 // shift, we may break the truncstore combine.
2602 //
2603 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2604 for (auto &User : MRI.use_instructions(DstReg))
2605 if (User.getOpcode() == TargetOpcode::G_STORE)
2606 return false;
2607
2608 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2609 if (NewShiftTy == SrcTy)
2610 return false;
2611
2612 // Make sure we won't lose information by truncating the high bits.
2613 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2614 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2615 DstTy.getScalarSizeInBits()))
2616 return false;
2617 break;
2618 }
2619 }
2620
2622 {SrcMI->getOpcode(),
2623 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2624 return false;
2625
2626 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2627 return true;
2628}
2629
2631 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2632 MachineInstr *ShiftMI = MatchInfo.first;
2633 LLT NewShiftTy = MatchInfo.second;
2634
2635 Register Dst = MI.getOperand(0).getReg();
2636 LLT DstTy = MRI.getType(Dst);
2637
2638 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2639 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2640 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2641
2642 Register NewShift =
2643 Builder
2644 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2645 .getReg(0);
2646
2647 if (NewShiftTy == DstTy)
2648 replaceRegWith(MRI, Dst, NewShift);
2649 else
2650 Builder.buildTrunc(Dst, NewShift);
2651
2652 eraseInst(MI);
2653}
2654
2656 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2657 return MO.isReg() &&
2658 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2659 });
2660}
2661
2663 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2664 return !MO.isReg() ||
2665 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2666 });
2667}
2668
2670 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2671 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2672 return all_of(Mask, [](int Elt) { return Elt < 0; });
2673}
2674
2676 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2677 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2678 MRI);
2679}
2680
2682 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2683 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2684 MRI);
2685}
2686
2688 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2689 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2690 "Expected an insert/extract element op");
2691 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2692 unsigned IdxIdx =
2693 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2694 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2695 if (!Idx)
2696 return false;
2697 return Idx->getZExtValue() >= VecTy.getNumElements();
2698}
2699
2701 GSelect &SelMI = cast<GSelect>(MI);
2702 auto Cst =
2704 if (!Cst)
2705 return false;
2706 OpIdx = Cst->isZero() ? 3 : 2;
2707 return true;
2708}
2709
2710void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2711
2713 const MachineOperand &MOP2) {
2714 if (!MOP1.isReg() || !MOP2.isReg())
2715 return false;
2716 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2717 if (!InstAndDef1)
2718 return false;
2719 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2720 if (!InstAndDef2)
2721 return false;
2722 MachineInstr *I1 = InstAndDef1->MI;
2723 MachineInstr *I2 = InstAndDef2->MI;
2724
2725 // Handle a case like this:
2726 //
2727 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2728 //
2729 // Even though %0 and %1 are produced by the same instruction they are not
2730 // the same values.
2731 if (I1 == I2)
2732 return MOP1.getReg() == MOP2.getReg();
2733
2734 // If we have an instruction which loads or stores, we can't guarantee that
2735 // it is identical.
2736 //
2737 // For example, we may have
2738 //
2739 // %x1 = G_LOAD %addr (load N from @somewhere)
2740 // ...
2741 // call @foo
2742 // ...
2743 // %x2 = G_LOAD %addr (load N from @somewhere)
2744 // ...
2745 // %or = G_OR %x1, %x2
2746 //
2747 // It's possible that @foo will modify whatever lives at the address we're
2748 // loading from. To be safe, let's just assume that all loads and stores
2749 // are different (unless we have something which is guaranteed to not
2750 // change.)
2751 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2752 return false;
2753
2754 // If both instructions are loads or stores, they are equal only if both
2755 // are dereferenceable invariant loads with the same number of bits.
2756 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2757 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2758 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2759 if (!LS1 || !LS2)
2760 return false;
2761
2762 if (!I2->isDereferenceableInvariantLoad() ||
2763 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2764 return false;
2765 }
2766
2767 // Check for physical registers on the instructions first to avoid cases
2768 // like this:
2769 //
2770 // %a = COPY $physreg
2771 // ...
2772 // SOMETHING implicit-def $physreg
2773 // ...
2774 // %b = COPY $physreg
2775 //
2776 // These copies are not equivalent.
2777 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2778 return MO.isReg() && MO.getReg().isPhysical();
2779 })) {
2780 // Check if we have a case like this:
2781 //
2782 // %a = COPY $physreg
2783 // %b = COPY %a
2784 //
2785 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2786 // From that, we know that they must have the same value, since they must
2787 // have come from the same COPY.
2788 return I1->isIdenticalTo(*I2);
2789 }
2790
2791 // We don't have any physical registers, so we don't necessarily need the
2792 // same vreg defs.
2793 //
2794 // On the off-chance that there's some target instruction feeding into the
2795 // instruction, let's use produceSameValue instead of isIdenticalTo.
2796 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2797 // Handle instructions with multiple defs that produce same values. Values
2798 // are same for operands with same index.
2799 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2800 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2801 // I1 and I2 are different instructions but produce same values,
2802 // %1 and %6 are same, %1 and %7 are not the same value.
2803 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
2804 I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
2805 }
2806 return false;
2807}
2808
2810 if (!MOP.isReg())
2811 return false;
2812 auto *MI = MRI.getVRegDef(MOP.getReg());
2813 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2814 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2815 MaybeCst->getSExtValue() == C;
2816}
2817
2819 if (!MOP.isReg())
2820 return false;
2821 std::optional<FPValueAndVReg> MaybeCst;
2822 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2823 return false;
2824
2825 return MaybeCst->Value.isExactlyValue(C);
2826}
2827
2829 unsigned OpIdx) {
2830 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2831 Register OldReg = MI.getOperand(0).getReg();
2832 Register Replacement = MI.getOperand(OpIdx).getReg();
2833 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2834 MI.eraseFromParent();
2835 replaceRegWith(MRI, OldReg, Replacement);
2836}
2837
2839 Register Replacement) {
2840 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2841 Register OldReg = MI.getOperand(0).getReg();
2842 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2843 MI.eraseFromParent();
2844 replaceRegWith(MRI, OldReg, Replacement);
2845}
2846
2848 unsigned ConstIdx) {
2849 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2850 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2851
2852 // Get the shift amount
2853 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2854 if (!VRegAndVal)
2855 return false;
2856
2857 // Return true of shift amount >= Bitwidth
2858 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2859}
2860
2862 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2863 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2864 "This is not a funnel shift operation");
2865
2866 Register ConstReg = MI.getOperand(3).getReg();
2867 LLT ConstTy = MRI.getType(ConstReg);
2868 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2869
2870 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2871 assert((VRegAndVal) && "Value is not a constant");
2872
2873 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2874 APInt NewConst = VRegAndVal->Value.urem(
2875 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2876
2877 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2879 MI.getOpcode(), {MI.getOperand(0)},
2880 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2881
2882 MI.eraseFromParent();
2883}
2884
2886 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2887 // Match (cond ? x : x)
2888 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2889 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2890 MRI);
2891}
2892
2894 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2895 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2896 MRI);
2897}
2898
2900 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2901 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2902 MRI);
2903}
2904
2906 MachineOperand &MO = MI.getOperand(OpIdx);
2907 return MO.isReg() &&
2908 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2909}
2910
2912 unsigned OpIdx) {
2913 MachineOperand &MO = MI.getOperand(OpIdx);
2914 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2915}
2916
2918 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2919 Builder.buildFConstant(MI.getOperand(0), C);
2920 MI.eraseFromParent();
2921}
2922
2924 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2925 Builder.buildConstant(MI.getOperand(0), C);
2926 MI.eraseFromParent();
2927}
2928
2930 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2931 Builder.buildConstant(MI.getOperand(0), C);
2932 MI.eraseFromParent();
2933}
2934
2936 ConstantFP *CFP) {
2937 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2938 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2939 MI.eraseFromParent();
2940}
2941
2943 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2944 Builder.buildUndef(MI.getOperand(0));
2945 MI.eraseFromParent();
2946}
2947
2949 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2950 Register LHS = MI.getOperand(1).getReg();
2951 Register RHS = MI.getOperand(2).getReg();
2952 Register &NewLHS = std::get<0>(MatchInfo);
2953 Register &NewRHS = std::get<1>(MatchInfo);
2954
2955 // Helper lambda to check for opportunities for
2956 // ((0-A) + B) -> B - A
2957 // (A + (0-B)) -> A - B
2958 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2959 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2960 return false;
2961 NewLHS = MaybeNewLHS;
2962 return true;
2963 };
2964
2965 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2966}
2967
2970 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2971 "Invalid opcode");
2972 Register DstReg = MI.getOperand(0).getReg();
2973 LLT DstTy = MRI.getType(DstReg);
2974 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2975 unsigned NumElts = DstTy.getNumElements();
2976 // If this MI is part of a sequence of insert_vec_elts, then
2977 // don't do the combine in the middle of the sequence.
2978 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2979 TargetOpcode::G_INSERT_VECTOR_ELT)
2980 return false;
2981 MachineInstr *CurrInst = &MI;
2982 MachineInstr *TmpInst;
2983 int64_t IntImm;
2984 Register TmpReg;
2985 MatchInfo.resize(NumElts);
2986 while (mi_match(
2987 CurrInst->getOperand(0).getReg(), MRI,
2988 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2989 if (IntImm >= NumElts || IntImm < 0)
2990 return false;
2991 if (!MatchInfo[IntImm])
2992 MatchInfo[IntImm] = TmpReg;
2993 CurrInst = TmpInst;
2994 }
2995 // Variable index.
2996 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2997 return false;
2998 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2999 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3000 if (!MatchInfo[I - 1].isValid())
3001 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3002 }
3003 return true;
3004 }
3005 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3006 // overwritten, bail out.
3007 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3008 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3009}
3010
3013 Register UndefReg;
3014 auto GetUndef = [&]() {
3015 if (UndefReg)
3016 return UndefReg;
3017 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3018 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3019 return UndefReg;
3020 };
3021 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
3022 if (!MatchInfo[I])
3023 MatchInfo[I] = GetUndef();
3024 }
3025 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3026 MI.eraseFromParent();
3027}
3028
3030 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
3031 Register SubLHS, SubRHS;
3032 std::tie(SubLHS, SubRHS) = MatchInfo;
3033 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3034 MI.eraseFromParent();
3035}
3036
3039 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3040 //
3041 // Creates the new hand + logic instruction (but does not insert them.)
3042 //
3043 // On success, MatchInfo is populated with the new instructions. These are
3044 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3045 unsigned LogicOpcode = MI.getOpcode();
3046 assert(LogicOpcode == TargetOpcode::G_AND ||
3047 LogicOpcode == TargetOpcode::G_OR ||
3048 LogicOpcode == TargetOpcode::G_XOR);
3049 MachineIRBuilder MIB(MI);
3050 Register Dst = MI.getOperand(0).getReg();
3051 Register LHSReg = MI.getOperand(1).getReg();
3052 Register RHSReg = MI.getOperand(2).getReg();
3053
3054 // Don't recompute anything.
3055 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3056 return false;
3057
3058 // Make sure we have (hand x, ...), (hand y, ...)
3059 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3060 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3061 if (!LeftHandInst || !RightHandInst)
3062 return false;
3063 unsigned HandOpcode = LeftHandInst->getOpcode();
3064 if (HandOpcode != RightHandInst->getOpcode())
3065 return false;
3066 if (!LeftHandInst->getOperand(1).isReg() ||
3067 !RightHandInst->getOperand(1).isReg())
3068 return false;
3069
3070 // Make sure the types match up, and if we're doing this post-legalization,
3071 // we end up with legal types.
3072 Register X = LeftHandInst->getOperand(1).getReg();
3073 Register Y = RightHandInst->getOperand(1).getReg();
3074 LLT XTy = MRI.getType(X);
3075 LLT YTy = MRI.getType(Y);
3076 if (!XTy.isValid() || XTy != YTy)
3077 return false;
3078
3079 // Optional extra source register.
3080 Register ExtraHandOpSrcReg;
3081 switch (HandOpcode) {
3082 default:
3083 return false;
3084 case TargetOpcode::G_ANYEXT:
3085 case TargetOpcode::G_SEXT:
3086 case TargetOpcode::G_ZEXT: {
3087 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3088 break;
3089 }
3090 case TargetOpcode::G_AND:
3091 case TargetOpcode::G_ASHR:
3092 case TargetOpcode::G_LSHR:
3093 case TargetOpcode::G_SHL: {
3094 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3095 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3096 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3097 return false;
3098 ExtraHandOpSrcReg = ZOp.getReg();
3099 break;
3100 }
3101 }
3102
3103 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3104 return false;
3105
3106 // Record the steps to build the new instructions.
3107 //
3108 // Steps to build (logic x, y)
3109 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3110 OperandBuildSteps LogicBuildSteps = {
3111 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3112 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3113 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3114 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3115
3116 // Steps to build hand (logic x, y), ...z
3117 OperandBuildSteps HandBuildSteps = {
3118 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3119 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3120 if (ExtraHandOpSrcReg.isValid())
3121 HandBuildSteps.push_back(
3122 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3123 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3124
3125 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3126 return true;
3127}
3128
3131 assert(MatchInfo.InstrsToBuild.size() &&
3132 "Expected at least one instr to build?");
3133 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3134 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3135 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3136 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3137 for (auto &OperandFn : InstrToBuild.OperandFns)
3138 OperandFn(Instr);
3139 }
3140 MI.eraseFromParent();
3141}
3142
3144 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3145 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3146 int64_t ShlCst, AshrCst;
3147 Register Src;
3148 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3149 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3150 m_ICstOrSplat(AshrCst))))
3151 return false;
3152 if (ShlCst != AshrCst)
3153 return false;
3155 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3156 return false;
3157 MatchInfo = std::make_tuple(Src, ShlCst);
3158 return true;
3159}
3160
3162 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3163 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3164 Register Src;
3165 int64_t ShiftAmt;
3166 std::tie(Src, ShiftAmt) = MatchInfo;
3167 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3168 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3169 MI.eraseFromParent();
3170}
3171
3172/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3174 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3175 assert(MI.getOpcode() == TargetOpcode::G_AND);
3176
3177 Register Dst = MI.getOperand(0).getReg();
3178 LLT Ty = MRI.getType(Dst);
3179
3180 Register R;
3181 int64_t C1;
3182 int64_t C2;
3183 if (!mi_match(
3184 Dst, MRI,
3185 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3186 return false;
3187
3188 MatchInfo = [=](MachineIRBuilder &B) {
3189 if (C1 & C2) {
3190 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3191 return;
3192 }
3193 auto Zero = B.buildConstant(Ty, 0);
3194 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3195 };
3196 return true;
3197}
3198
3200 Register &Replacement) {
3201 // Given
3202 //
3203 // %y:_(sN) = G_SOMETHING
3204 // %x:_(sN) = G_SOMETHING
3205 // %res:_(sN) = G_AND %x, %y
3206 //
3207 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3208 //
3209 // Patterns like this can appear as a result of legalization. E.g.
3210 //
3211 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3212 // %one:_(s32) = G_CONSTANT i32 1
3213 // %and:_(s32) = G_AND %cmp, %one
3214 //
3215 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3216 assert(MI.getOpcode() == TargetOpcode::G_AND);
3217 if (!KB)
3218 return false;
3219
3220 Register AndDst = MI.getOperand(0).getReg();
3221 Register LHS = MI.getOperand(1).getReg();
3222 Register RHS = MI.getOperand(2).getReg();
3223 KnownBits LHSBits = KB->getKnownBits(LHS);
3224 KnownBits RHSBits = KB->getKnownBits(RHS);
3225
3226 // Check that x & Mask == x.
3227 // x & 1 == x, always
3228 // x & 0 == x, only if x is also 0
3229 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3230 //
3231 // Check if we can replace AndDst with the LHS of the G_AND
3232 if (canReplaceReg(AndDst, LHS, MRI) &&
3233 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3234 Replacement = LHS;
3235 return true;
3236 }
3237
3238 // Check if we can replace AndDst with the RHS of the G_AND
3239 if (canReplaceReg(AndDst, RHS, MRI) &&
3240 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3241 Replacement = RHS;
3242 return true;
3243 }
3244
3245 return false;
3246}
3247
3249 // Given
3250 //
3251 // %y:_(sN) = G_SOMETHING
3252 // %x:_(sN) = G_SOMETHING
3253 // %res:_(sN) = G_OR %x, %y
3254 //
3255 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3256 assert(MI.getOpcode() == TargetOpcode::G_OR);
3257 if (!KB)
3258 return false;
3259
3260 Register OrDst = MI.getOperand(0).getReg();
3261 Register LHS = MI.getOperand(1).getReg();
3262 Register RHS = MI.getOperand(2).getReg();
3263 KnownBits LHSBits = KB->getKnownBits(LHS);
3264 KnownBits RHSBits = KB->getKnownBits(RHS);
3265
3266 // Check that x | Mask == x.
3267 // x | 0 == x, always
3268 // x | 1 == x, only if x is also 1
3269 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3270 //
3271 // Check if we can replace OrDst with the LHS of the G_OR
3272 if (canReplaceReg(OrDst, LHS, MRI) &&
3273 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3274 Replacement = LHS;
3275 return true;
3276 }
3277
3278 // Check if we can replace OrDst with the RHS of the G_OR
3279 if (canReplaceReg(OrDst, RHS, MRI) &&
3280 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3281 Replacement = RHS;
3282 return true;
3283 }
3284
3285 return false;
3286}
3287
3289 // If the input is already sign extended, just drop the extension.
3290 Register Src = MI.getOperand(1).getReg();
3291 unsigned ExtBits = MI.getOperand(2).getImm();
3292 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3293 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3294}
3295
3296static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3297 int64_t Cst, bool IsVector, bool IsFP) {
3298 // For i1, Cst will always be -1 regardless of boolean contents.
3299 return (ScalarSizeBits == 1 && Cst == -1) ||
3300 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3301}
3302
3304 SmallVectorImpl<Register> &RegsToNegate) {
3305 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3306 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3307 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3308 Register XorSrc;
3309 Register CstReg;
3310 // We match xor(src, true) here.
3311 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3312 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3313 return false;
3314
3315 if (!MRI.hasOneNonDBGUse(XorSrc))
3316 return false;
3317
3318 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3319 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3320 // list of tree nodes to visit.
3321 RegsToNegate.push_back(XorSrc);
3322 // Remember whether the comparisons are all integer or all floating point.
3323 bool IsInt = false;
3324 bool IsFP = false;
3325 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3326 Register Reg = RegsToNegate[I];
3327 if (!MRI.hasOneNonDBGUse(Reg))
3328 return false;
3329 MachineInstr *Def = MRI.getVRegDef(Reg);
3330 switch (Def->getOpcode()) {
3331 default:
3332 // Don't match if the tree contains anything other than ANDs, ORs and
3333 // comparisons.
3334 return false;
3335 case TargetOpcode::G_ICMP:
3336 if (IsFP)
3337 return false;
3338 IsInt = true;
3339 // When we apply the combine we will invert the predicate.
3340 break;
3341 case TargetOpcode::G_FCMP:
3342 if (IsInt)
3343 return false;
3344 IsFP = true;
3345 // When we apply the combine we will invert the predicate.
3346 break;
3347 case TargetOpcode::G_AND:
3348 case TargetOpcode::G_OR:
3349 // Implement De Morgan's laws:
3350 // ~(x & y) -> ~x | ~y
3351 // ~(x | y) -> ~x & ~y
3352 // When we apply the combine we will change the opcode and recursively
3353 // negate the operands.
3354 RegsToNegate.push_back(Def->getOperand(1).getReg());
3355 RegsToNegate.push_back(Def->getOperand(2).getReg());
3356 break;
3357 }
3358 }
3359
3360 // Now we know whether the comparisons are integer or floating point, check
3361 // the constant in the xor.
3362 int64_t Cst;
3363 if (Ty.isVector()) {
3364 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3365 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3366 if (!MaybeCst)
3367 return false;
3368 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3369 return false;
3370 } else {
3371 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3372 return false;
3373 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3374 return false;
3375 }
3376
3377 return true;
3378}
3379
3381 SmallVectorImpl<Register> &RegsToNegate) {
3382 for (Register Reg : RegsToNegate) {
3383 MachineInstr *Def = MRI.getVRegDef(Reg);
3384 Observer.changingInstr(*Def);
3385 // For each comparison, invert the opcode. For each AND and OR, change the
3386 // opcode.
3387 switch (Def->getOpcode()) {
3388 default:
3389 llvm_unreachable("Unexpected opcode");
3390 case TargetOpcode::G_ICMP:
3391 case TargetOpcode::G_FCMP: {
3392 MachineOperand &PredOp = Def->getOperand(1);
3395 PredOp.setPredicate(NewP);
3396 break;
3397 }
3398 case TargetOpcode::G_AND:
3399 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3400 break;
3401 case TargetOpcode::G_OR:
3402 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3403 break;
3404 }
3405 Observer.changedInstr(*Def);
3406 }
3407
3408 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3409 MI.eraseFromParent();
3410}
3411
3413 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3414 // Match (xor (and x, y), y) (or any of its commuted cases)
3415 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3416 Register &X = MatchInfo.first;
3417 Register &Y = MatchInfo.second;
3418 Register AndReg = MI.getOperand(1).getReg();
3419 Register SharedReg = MI.getOperand(2).getReg();
3420
3421 // Find a G_AND on either side of the G_XOR.
3422 // Look for one of
3423 //
3424 // (xor (and x, y), SharedReg)
3425 // (xor SharedReg, (and x, y))
3426 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3427 std::swap(AndReg, SharedReg);
3428 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3429 return false;
3430 }
3431
3432 // Only do this if we'll eliminate the G_AND.
3433 if (!MRI.hasOneNonDBGUse(AndReg))
3434 return false;
3435
3436 // We can combine if SharedReg is the same as either the LHS or RHS of the
3437 // G_AND.
3438 if (Y != SharedReg)
3439 std::swap(X, Y);
3440 return Y == SharedReg;
3441}
3442
3444 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3445 // Fold (xor (and x, y), y) -> (and (not x), y)
3446 Register X, Y;
3447 std::tie(X, Y) = MatchInfo;
3448 auto Not = Builder.buildNot(MRI.getType(X), X);
3450 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3451 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3452 MI.getOperand(2).setReg(Y);
3454}
3455
3457 auto &PtrAdd = cast<GPtrAdd>(MI);
3458 Register DstReg = PtrAdd.getReg(0);
3459 LLT Ty = MRI.getType(DstReg);
3461
3462 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3463 return false;
3464
3465 if (Ty.isPointer()) {
3466 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3467 return ConstVal && *ConstVal == 0;
3468 }
3469
3470 assert(Ty.isVector() && "Expecting a vector type");
3471 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3472 return isBuildVectorAllZeros(*VecMI, MRI);
3473}
3474
3476 auto &PtrAdd = cast<GPtrAdd>(MI);
3477 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3478 PtrAdd.eraseFromParent();
3479}
3480
3481/// The second source operand is known to be a power of 2.
3483 Register DstReg = MI.getOperand(0).getReg();
3484 Register Src0 = MI.getOperand(1).getReg();
3485 Register Pow2Src1 = MI.getOperand(2).getReg();
3486 LLT Ty = MRI.getType(DstReg);
3487
3488 // Fold (urem x, pow2) -> (and x, pow2-1)
3489 auto NegOne = Builder.buildConstant(Ty, -1);
3490 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3491 Builder.buildAnd(DstReg, Src0, Add);
3492 MI.eraseFromParent();
3493}
3494
3496 unsigned &SelectOpNo) {
3497 Register LHS = MI.getOperand(1).getReg();
3498 Register RHS = MI.getOperand(2).getReg();
3499
3500 Register OtherOperandReg = RHS;
3501 SelectOpNo = 1;
3503
3504 // Don't do this unless the old select is going away. We want to eliminate the
3505 // binary operator, not replace a binop with a select.
3506 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3508 OtherOperandReg = LHS;
3509 SelectOpNo = 2;
3511 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3513 return false;
3514 }
3515
3516 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3517 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3518
3519 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3520 /*AllowFP*/ true,
3521 /*AllowOpaqueConstants*/ false))
3522 return false;
3523 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3524 /*AllowFP*/ true,
3525 /*AllowOpaqueConstants*/ false))
3526 return false;
3527
3528 unsigned BinOpcode = MI.getOpcode();
3529
3530 // We know that one of the operands is a select of constants. Now verify that
3531 // the other binary operator operand is either a constant, or we can handle a
3532 // variable.
3533 bool CanFoldNonConst =
3534 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3535 (isNullOrNullSplat(*SelectLHS, MRI) ||
3536 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3537 (isNullOrNullSplat(*SelectRHS, MRI) ||
3538 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3539 if (CanFoldNonConst)
3540 return true;
3541
3542 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3543 /*AllowFP*/ true,
3544 /*AllowOpaqueConstants*/ false);
3545}
3546
3547/// \p SelectOperand is the operand in binary operator \p MI that is the select
3548/// to fold.
3550 const unsigned &SelectOperand) {
3551 Register Dst = MI.getOperand(0).getReg();
3552 Register LHS = MI.getOperand(1).getReg();
3553 Register RHS = MI.getOperand(2).getReg();
3554 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3555
3556 Register SelectCond = Select->getOperand(1).getReg();
3557 Register SelectTrue = Select->getOperand(2).getReg();
3558 Register SelectFalse = Select->getOperand(3).getReg();
3559
3560 LLT Ty = MRI.getType(Dst);
3561 unsigned BinOpcode = MI.getOpcode();
3562
3563 Register FoldTrue, FoldFalse;
3564
3565 // We have a select-of-constants followed by a binary operator with a
3566 // constant. Eliminate the binop by pulling the constant math into the select.
3567 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3568 if (SelectOperand == 1) {
3569 // TODO: SelectionDAG verifies this actually constant folds before
3570 // committing to the combine.
3571
3572 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3573 FoldFalse =
3574 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3575 } else {
3576 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3577 FoldFalse =
3578 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3579 }
3580
3581 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3582 MI.eraseFromParent();
3583}
3584
3585std::optional<SmallVector<Register, 8>>
3586CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3587 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3588 // We want to detect if Root is part of a tree which represents a bunch
3589 // of loads being merged into a larger load. We'll try to recognize patterns
3590 // like, for example:
3591 //
3592 // Reg Reg
3593 // \ /
3594 // OR_1 Reg
3595 // \ /
3596 // OR_2
3597 // \ Reg
3598 // .. /
3599 // Root
3600 //
3601 // Reg Reg Reg Reg
3602 // \ / \ /
3603 // OR_1 OR_2
3604 // \ /
3605 // \ /
3606 // ...
3607 // Root
3608 //
3609 // Each "Reg" may have been produced by a load + some arithmetic. This
3610 // function will save each of them.
3611 SmallVector<Register, 8> RegsToVisit;
3613
3614 // In the "worst" case, we're dealing with a load for each byte. So, there
3615 // are at most #bytes - 1 ORs.
3616 const unsigned MaxIter =
3617 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3618 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3619 if (Ors.empty())
3620 break;
3621 const MachineInstr *Curr = Ors.pop_back_val();
3622 Register OrLHS = Curr->getOperand(1).getReg();
3623 Register OrRHS = Curr->getOperand(2).getReg();
3624
3625 // In the combine, we want to elimate the entire tree.
3626 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3627 return std::nullopt;
3628
3629 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3630 // something that may be a load + arithmetic.
3631 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3632 Ors.push_back(Or);
3633 else
3634 RegsToVisit.push_back(OrLHS);
3635 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3636 Ors.push_back(Or);
3637 else
3638 RegsToVisit.push_back(OrRHS);
3639 }
3640
3641 // We're going to try and merge each register into a wider power-of-2 type,
3642 // so we ought to have an even number of registers.
3643 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3644 return std::nullopt;
3645 return RegsToVisit;
3646}
3647
3648/// Helper function for findLoadOffsetsForLoadOrCombine.
3649///
3650/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3651/// and then moving that value into a specific byte offset.
3652///
3653/// e.g. x[i] << 24
3654///
3655/// \returns The load instruction and the byte offset it is moved into.
3656static std::optional<std::pair<GZExtLoad *, int64_t>>
3657matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3658 const MachineRegisterInfo &MRI) {
3659 assert(MRI.hasOneNonDBGUse(Reg) &&
3660 "Expected Reg to only have one non-debug use?");
3661 Register MaybeLoad;
3662 int64_t Shift;
3663 if (!mi_match(Reg, MRI,
3664 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3665 Shift = 0;
3666 MaybeLoad = Reg;
3667 }
3668
3669 if (Shift % MemSizeInBits != 0)
3670 return std::nullopt;
3671
3672 // TODO: Handle other types of loads.
3673 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3674 if (!Load)
3675 return std::nullopt;
3676
3677 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3678 return std::nullopt;
3679
3680 return std::make_pair(Load, Shift / MemSizeInBits);
3681}
3682
3683std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3684CombinerHelper::findLoadOffsetsForLoadOrCombine(
3686 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3687
3688 // Each load found for the pattern. There should be one for each RegsToVisit.
3690
3691 // The lowest index used in any load. (The lowest "i" for each x[i].)
3692 int64_t LowestIdx = INT64_MAX;
3693
3694 // The load which uses the lowest index.
3695 GZExtLoad *LowestIdxLoad = nullptr;
3696
3697 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3698 SmallSet<int64_t, 8> SeenIdx;
3699
3700 // Ensure each load is in the same MBB.
3701 // TODO: Support multiple MachineBasicBlocks.
3702 MachineBasicBlock *MBB = nullptr;
3703 const MachineMemOperand *MMO = nullptr;
3704
3705 // Earliest instruction-order load in the pattern.
3706 GZExtLoad *EarliestLoad = nullptr;
3707
3708 // Latest instruction-order load in the pattern.
3709 GZExtLoad *LatestLoad = nullptr;
3710
3711 // Base pointer which every load should share.
3713
3714 // We want to find a load for each register. Each load should have some
3715 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3716 // track of the load which uses the lowest index. Later, we will check if we
3717 // can use its pointer in the final, combined load.
3718 for (auto Reg : RegsToVisit) {
3719 // Find the load, and find the position that it will end up in (e.g. a
3720 // shifted) value.
3721 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3722 if (!LoadAndPos)
3723 return std::nullopt;
3724 GZExtLoad *Load;
3725 int64_t DstPos;
3726 std::tie(Load, DstPos) = *LoadAndPos;
3727
3728 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3729 // it is difficult to check for stores/calls/etc between loads.
3730 MachineBasicBlock *LoadMBB = Load->getParent();
3731 if (!MBB)
3732 MBB = LoadMBB;
3733 if (LoadMBB != MBB)
3734 return std::nullopt;
3735
3736 // Make sure that the MachineMemOperands of every seen load are compatible.
3737 auto &LoadMMO = Load->getMMO();
3738 if (!MMO)
3739 MMO = &LoadMMO;
3740 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3741 return std::nullopt;
3742
3743 // Find out what the base pointer and index for the load is.
3744 Register LoadPtr;
3745 int64_t Idx;
3746 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3747 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3748 LoadPtr = Load->getOperand(1).getReg();
3749 Idx = 0;
3750 }
3751
3752 // Don't combine things like a[i], a[i] -> a bigger load.
3753 if (!SeenIdx.insert(Idx).second)
3754 return std::nullopt;
3755
3756 // Every load must share the same base pointer; don't combine things like:
3757 //
3758 // a[i], b[i + 1] -> a bigger load.
3759 if (!BasePtr.isValid())
3760 BasePtr = LoadPtr;
3761 if (BasePtr != LoadPtr)
3762 return std::nullopt;
3763
3764 if (Idx < LowestIdx) {
3765 LowestIdx = Idx;
3766 LowestIdxLoad = Load;
3767 }
3768
3769 // Keep track of the byte offset that this load ends up at. If we have seen
3770 // the byte offset, then stop here. We do not want to combine:
3771 //
3772 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3773 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3774 return std::nullopt;
3775 Loads.insert(Load);
3776
3777 // Keep track of the position of the earliest/latest loads in the pattern.
3778 // We will check that there are no load fold barriers between them later
3779 // on.
3780 //
3781 // FIXME: Is there a better way to check for load fold barriers?
3782 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3783 EarliestLoad = Load;
3784 if (!LatestLoad || dominates(*LatestLoad, *Load))
3785 LatestLoad = Load;
3786 }
3787
3788 // We found a load for each register. Let's check if each load satisfies the
3789 // pattern.
3790 assert(Loads.size() == RegsToVisit.size() &&
3791 "Expected to find a load for each register?");
3792 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3793 LatestLoad && "Expected at least two loads?");
3794
3795 // Check if there are any stores, calls, etc. between any of the loads. If
3796 // there are, then we can't safely perform the combine.
3797 //
3798 // MaxIter is chosen based off the (worst case) number of iterations it
3799 // typically takes to succeed in the LLVM test suite plus some padding.
3800 //
3801 // FIXME: Is there a better way to check for load fold barriers?
3802 const unsigned MaxIter = 20;
3803 unsigned Iter = 0;
3804 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3805 LatestLoad->getIterator())) {
3806 if (Loads.count(&MI))
3807 continue;
3808 if (MI.isLoadFoldBarrier())
3809 return std::nullopt;
3810 if (Iter++ == MaxIter)
3811 return std::nullopt;
3812 }
3813
3814 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3815}
3816
3818 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3819 assert(MI.getOpcode() == TargetOpcode::G_OR);
3820 MachineFunction &MF = *MI.getMF();
3821 // Assuming a little-endian target, transform:
3822 // s8 *a = ...
3823 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3824 // =>
3825 // s32 val = *((i32)a)
3826 //
3827 // s8 *a = ...
3828 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3829 // =>
3830 // s32 val = BSWAP(*((s32)a))
3831 Register Dst = MI.getOperand(0).getReg();
3832 LLT Ty = MRI.getType(Dst);
3833 if (Ty.isVector())
3834 return false;
3835
3836 // We need to combine at least two loads into this type. Since the smallest
3837 // possible load is into a byte, we need at least a 16-bit wide type.
3838 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3839 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3840 return false;
3841
3842 // Match a collection of non-OR instructions in the pattern.
3843 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3844 if (!RegsToVisit)
3845 return false;
3846
3847 // We have a collection of non-OR instructions. Figure out how wide each of
3848 // the small loads should be based off of the number of potential loads we
3849 // found.
3850 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3851 if (NarrowMemSizeInBits % 8 != 0)
3852 return false;
3853
3854 // Check if each register feeding into each OR is a load from the same
3855 // base pointer + some arithmetic.
3856 //
3857 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3858 //
3859 // Also verify that each of these ends up putting a[i] into the same memory
3860 // offset as a load into a wide type would.
3862 GZExtLoad *LowestIdxLoad, *LatestLoad;
3863 int64_t LowestIdx;
3864 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3865 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3866 if (!MaybeLoadInfo)
3867 return false;
3868 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3869
3870 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3871 // we found before, check if this corresponds to a big or little endian byte
3872 // pattern. If it does, then we can represent it using a load + possibly a
3873 // BSWAP.
3874 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3875 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3876 if (!IsBigEndian)
3877 return false;
3878 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3879 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3880 return false;
3881
3882 // Make sure that the load from the lowest index produces offset 0 in the
3883 // final value.
3884 //
3885 // This ensures that we won't combine something like this:
3886 //
3887 // load x[i] -> byte 2
3888 // load x[i+1] -> byte 0 ---> wide_load x[i]
3889 // load x[i+2] -> byte 1
3890 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3891 const unsigned ZeroByteOffset =
3892 *IsBigEndian
3893 ? bigEndianByteAt(NumLoadsInTy, 0)
3894 : littleEndianByteAt(NumLoadsInTy, 0);
3895 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3896 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3897 ZeroOffsetIdx->second != LowestIdx)
3898 return false;
3899
3900 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3901 // may not use index 0.
3902 Register Ptr = LowestIdxLoad->getPointerReg();
3903 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3904 LegalityQuery::MemDesc MMDesc(MMO);
3905 MMDesc.MemoryTy = Ty;
3907 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3908 return false;
3909 auto PtrInfo = MMO.getPointerInfo();
3910 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3911
3912 // Load must be allowed and fast on the target.
3914 auto &DL = MF.getDataLayout();
3915 unsigned Fast = 0;
3916 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3917 !Fast)
3918 return false;
3919
3920 MatchInfo = [=](MachineIRBuilder &MIB) {
3921 MIB.setInstrAndDebugLoc(*LatestLoad);
3922 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3923 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3924 if (NeedsBSwap)
3925 MIB.buildBSwap(Dst, LoadDst);
3926 };
3927 return true;
3928}
3929
3931 MachineInstr *&ExtMI) {
3932 auto &PHI = cast<GPhi>(MI);
3933 Register DstReg = PHI.getReg(0);
3934
3935 // TODO: Extending a vector may be expensive, don't do this until heuristics
3936 // are better.
3937 if (MRI.getType(DstReg).isVector())
3938 return false;
3939
3940 // Try to match a phi, whose only use is an extend.
3941 if (!MRI.hasOneNonDBGUse(DstReg))
3942 return false;
3943 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3944 switch (ExtMI->getOpcode()) {
3945 case TargetOpcode::G_ANYEXT:
3946 return true; // G_ANYEXT is usually free.
3947 case TargetOpcode::G_ZEXT:
3948 case TargetOpcode::G_SEXT:
3949 break;
3950 default:
3951 return false;
3952 }
3953
3954 // If the target is likely to fold this extend away, don't propagate.
3956 return false;
3957
3958 // We don't want to propagate the extends unless there's a good chance that
3959 // they'll be optimized in some way.
3960 // Collect the unique incoming values.
3962 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3963 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3964 switch (DefMI->getOpcode()) {
3965 case TargetOpcode::G_LOAD:
3966 case TargetOpcode::G_TRUNC:
3967 case TargetOpcode::G_SEXT:
3968 case TargetOpcode::G_ZEXT:
3969 case TargetOpcode::G_ANYEXT:
3970 case TargetOpcode::G_CONSTANT:
3971 InSrcs.insert(DefMI);
3972 // Don't try to propagate if there are too many places to create new
3973 // extends, chances are it'll increase code size.
3974 if (InSrcs.size() > 2)
3975 return false;
3976 break;
3977 default:
3978 return false;
3979 }
3980 }
3981 return true;
3982}
3983
3985 MachineInstr *&ExtMI) {
3986 auto &PHI = cast<GPhi>(MI);
3987 Register DstReg = ExtMI->getOperand(0).getReg();
3988 LLT ExtTy = MRI.getType(DstReg);
3989
3990 // Propagate the extension into the block of each incoming reg's block.
3991 // Use a SetVector here because PHIs can have duplicate edges, and we want
3992 // deterministic iteration order.
3995 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3996 auto SrcReg = PHI.getIncomingValue(I);
3997 auto *SrcMI = MRI.getVRegDef(SrcReg);
3998 if (!SrcMIs.insert(SrcMI))
3999 continue;
4000
4001 // Build an extend after each src inst.
4002 auto *MBB = SrcMI->getParent();
4003 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4004 if (InsertPt != MBB->end() && InsertPt->isPHI())
4005 InsertPt = MBB->getFirstNonPHI();
4006
4007 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4008 Builder.setDebugLoc(MI.getDebugLoc());
4009 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4010 OldToNewSrcMap[SrcMI] = NewExt;
4011 }
4012
4013 // Create a new phi with the extended inputs.
4015 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4016 NewPhi.addDef(DstReg);
4017 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4018 if (!MO.isReg()) {
4019 NewPhi.addMBB(MO.getMBB());
4020 continue;
4021 }
4022 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4023 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4024 }
4025 Builder.insertInstr(NewPhi);
4026 ExtMI->eraseFromParent();
4027}
4028
4030 Register &Reg) {
4031 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4032 // If we have a constant index, look for a G_BUILD_VECTOR source
4033 // and find the source register that the index maps to.
4034 Register SrcVec = MI.getOperand(1).getReg();
4035 LLT SrcTy = MRI.getType(SrcVec);
4036
4037 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4038 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4039 return false;
4040
4041 unsigned VecIdx = Cst->Value.getZExtValue();
4042
4043 // Check if we have a build_vector or build_vector_trunc with an optional
4044 // trunc in front.
4045 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4046 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4047 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4048 }
4049
4050 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4051 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4052 return false;
4053
4054 EVT Ty(getMVTForLLT(SrcTy));
4055 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4056 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4057 return false;
4058
4059 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4060 return true;
4061}
4062
4064 Register &Reg) {
4065 // Check the type of the register, since it may have come from a
4066 // G_BUILD_VECTOR_TRUNC.
4067 LLT ScalarTy = MRI.getType(Reg);
4068 Register DstReg = MI.getOperand(0).getReg();
4069 LLT DstTy = MRI.getType(DstReg);
4070
4071 if (ScalarTy != DstTy) {
4072 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4073 Builder.buildTrunc(DstReg, Reg);
4074 MI.eraseFromParent();
4075 return;
4076 }
4078}
4079
4082 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4083 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4084 // This combine tries to find build_vector's which have every source element
4085 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4086 // the masked load scalarization is run late in the pipeline. There's already
4087 // a combine for a similar pattern starting from the extract, but that
4088 // doesn't attempt to do it if there are multiple uses of the build_vector,
4089 // which in this case is true. Starting the combine from the build_vector
4090 // feels more natural than trying to find sibling nodes of extracts.
4091 // E.g.
4092 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4093 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4094 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4095 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4096 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4097 // ==>
4098 // replace ext{1,2,3,4} with %s{1,2,3,4}
4099
4100 Register DstReg = MI.getOperand(0).getReg();
4101 LLT DstTy = MRI.getType(DstReg);
4102 unsigned NumElts = DstTy.getNumElements();
4103
4104 SmallBitVector ExtractedElts(NumElts);
4105 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4106 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4107 return false;
4108 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4109 if (!Cst)
4110 return false;
4111 unsigned Idx = Cst->getZExtValue();
4112 if (Idx >= NumElts)
4113 return false; // Out of range.
4114 ExtractedElts.set(Idx);
4115 SrcDstPairs.emplace_back(
4116 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4117 }
4118 // Match if every element was extracted.
4119 return ExtractedElts.all();
4120}
4121
4124 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4125 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4126 for (auto &Pair : SrcDstPairs) {
4127 auto *ExtMI = Pair.second;
4128 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4129 ExtMI->eraseFromParent();
4130 }
4131 MI.eraseFromParent();
4132}
4133
4135 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4136 applyBuildFnNoErase(MI, MatchInfo);
4137 MI.eraseFromParent();
4138}
4139
4141 BuildFnTy &MatchInfo) {
4144 MatchInfo(Builder);
4145 Root->eraseFromParent();
4146}
4147
4149 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4150 MatchInfo(Builder);
4151}
4152
4154 BuildFnTy &MatchInfo) {
4155 assert(MI.getOpcode() == TargetOpcode::G_OR);
4156
4157 Register Dst = MI.getOperand(0).getReg();
4158 LLT Ty = MRI.getType(Dst);
4159 unsigned BitWidth = Ty.getScalarSizeInBits();
4160
4161 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4162 unsigned FshOpc = 0;
4163
4164 // Match (or (shl ...), (lshr ...)).
4165 if (!mi_match(Dst, MRI,
4166 // m_GOr() handles the commuted version as well.
4167 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4168 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4169 return false;
4170
4171 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4172 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4173 int64_t CstShlAmt, CstLShrAmt;
4174 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4175 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4176 CstShlAmt + CstLShrAmt == BitWidth) {
4177 FshOpc = TargetOpcode::G_FSHR;
4178 Amt = LShrAmt;
4179
4180 } else if (mi_match(LShrAmt, MRI,
4182 ShlAmt == Amt) {
4183 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4184 FshOpc = TargetOpcode::G_FSHL;
4185
4186 } else if (mi_match(ShlAmt, MRI,
4188 LShrAmt == Amt) {
4189 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4190 FshOpc = TargetOpcode::G_FSHR;
4191
4192 } else {
4193 return false;
4194 }
4195
4196 LLT AmtTy = MRI.getType(Amt);
4197 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4198 return false;
4199
4200 MatchInfo = [=](MachineIRBuilder &B) {
4201 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4202 };
4203 return true;
4204}
4205
4206/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4208 unsigned Opc = MI.getOpcode();
4209 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4210 Register X = MI.getOperand(1).getReg();
4211 Register Y = MI.getOperand(2).getReg();
4212 if (X != Y)
4213 return false;
4214 unsigned RotateOpc =
4215 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4216 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4217}
4218
4220 unsigned Opc = MI.getOpcode();
4221 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4222 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4224 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4225 : TargetOpcode::G_ROTR));
4226 MI.removeOperand(2);
4228}
4229
4230// Fold (rot x, c) -> (rot x, c % BitSize)
4232 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4233 MI.getOpcode() == TargetOpcode::G_ROTR);
4234 unsigned Bitsize =
4235 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4236 Register AmtReg = MI.getOperand(2).getReg();
4237 bool OutOfRange = false;
4238 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4239 if (auto *CI = dyn_cast<ConstantInt>(C))
4240 OutOfRange |= CI->getValue().uge(Bitsize);
4241 return true;
4242 };
4243 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4244}
4245
4247 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4248 MI.getOpcode() == TargetOpcode::G_ROTR);
4249 unsigned Bitsize =
4250 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4251 Register Amt = MI.getOperand(2).getReg();
4252 LLT AmtTy = MRI.getType(Amt);
4253 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4254 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4256 MI.getOperand(2).setReg(Amt);
4258}
4259
4261 int64_t &MatchInfo) {
4262 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4263 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4264 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4265 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4266 std::optional<bool> KnownVal;
4267 switch (Pred) {
4268 default:
4269 llvm_unreachable("Unexpected G_ICMP predicate?");
4270 case CmpInst::ICMP_EQ:
4271 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4272 break;
4273 case CmpInst::ICMP_NE:
4274 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4275 break;
4276 case CmpInst::ICMP_SGE:
4277 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4278 break;
4279 case CmpInst::ICMP_SGT:
4280 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4281 break;
4282 case CmpInst::ICMP_SLE:
4283 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4284 break;
4285 case CmpInst::ICMP_SLT:
4286 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4287 break;
4288 case CmpInst::ICMP_UGE:
4289 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4290 break;
4291 case CmpInst::ICMP_UGT:
4292 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4293 break;
4294 case CmpInst::ICMP_ULE:
4295 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4296 break;
4297 case CmpInst::ICMP_ULT:
4298 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4299 break;
4300 }
4301 if (!KnownVal)
4302 return false;
4303 MatchInfo =
4304 *KnownVal
4306 /*IsVector = */
4307 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4308 /* IsFP = */ false)
4309 : 0;
4310 return true;
4311}
4312
4314 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4315 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4316 // Given:
4317 //
4318 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4319 // %cmp = G_ICMP ne %x, 0
4320 //
4321 // Or:
4322 //
4323 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4324 // %cmp = G_ICMP eq %x, 1
4325 //
4326 // We can replace %cmp with %x assuming true is 1 on the target.
4327 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4328 if (!CmpInst::isEquality(Pred))
4329 return false;
4330 Register Dst = MI.getOperand(0).getReg();
4331 LLT DstTy = MRI.getType(Dst);
4333 /* IsFP = */ false) != 1)
4334 return false;
4335 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4336 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4337 return false;
4338 Register LHS = MI.getOperand(2).getReg();
4339 auto KnownLHS = KB->getKnownBits(LHS);
4340 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4341 return false;
4342 // Make sure replacing Dst with the LHS is a legal operation.
4343 LLT LHSTy = MRI.getType(LHS);
4344 unsigned LHSSize = LHSTy.getSizeInBits();
4345 unsigned DstSize = DstTy.getSizeInBits();
4346 unsigned Op = TargetOpcode::COPY;
4347 if (DstSize != LHSSize)
4348 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4349 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4350 return false;
4351 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4352 return true;
4353}
4354
4355// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4357 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4358 assert(MI.getOpcode() == TargetOpcode::G_AND);
4359
4360 // Ignore vector types to simplify matching the two constants.
4361 // TODO: do this for vectors and scalars via a demanded bits analysis.
4362 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4363 if (Ty.isVector())
4364 return false;
4365
4366 Register Src;
4367 Register AndMaskReg;
4368 int64_t AndMaskBits;
4369 int64_t OrMaskBits;
4370 if (!mi_match(MI, MRI,
4371 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4372 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4373 return false;
4374
4375 // Check if OrMask could turn on any bits in Src.
4376 if (AndMaskBits & OrMaskBits)
4377 return false;
4378
4379 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4381 // Canonicalize the result to have the constant on the RHS.
4382 if (MI.getOperand(1).getReg() == AndMaskReg)
4383 MI.getOperand(2).setReg(AndMaskReg);
4384 MI.getOperand(1).setReg(Src);
4386 };
4387 return true;
4388}
4389
4390/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4392 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4393 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4394 Register Dst = MI.getOperand(0).getReg();
4395 Register Src = MI.getOperand(1).getReg();
4396 LLT Ty = MRI.getType(Src);
4398 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4399 return false;
4400 int64_t Width = MI.getOperand(2).getImm();
4401 Register ShiftSrc;
4402 int64_t ShiftImm;
4403 if (!mi_match(
4404 Src, MRI,
4405 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4406 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4407 return false;
4408 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4409 return false;
4410
4411 MatchInfo = [=](MachineIRBuilder &B) {
4412 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4413 auto Cst2 = B.buildConstant(ExtractTy, Width);
4414 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4415 };
4416 return true;
4417}
4418
4419/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4421 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4422 assert(MI.getOpcode() == TargetOpcode::G_AND);
4423 Register Dst = MI.getOperand(0).getReg();
4424 LLT Ty = MRI.getType(Dst);
4426 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4427 return false;
4428
4429 int64_t AndImm, LSBImm;
4430 Register ShiftSrc;
4431 const unsigned Size = Ty.getScalarSizeInBits();
4432 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4433 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4434 m_ICst(AndImm))))
4435 return false;
4436
4437 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4438 auto MaybeMask = static_cast<uint64_t>(AndImm);
4439 if (MaybeMask & (MaybeMask + 1))
4440 return false;
4441
4442 // LSB must fit within the register.
4443 if (static_cast<uint64_t>(LSBImm) >= Size)
4444 return false;
4445
4446 uint64_t Width = APInt(Size, AndImm).countr_one();
4447 MatchInfo = [=](MachineIRBuilder &B) {
4448 auto WidthCst = B.buildConstant(ExtractTy, Width);
4449 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4450 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4451 };
4452 return true;
4453}
4454
4456 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4457 const unsigned Opcode = MI.getOpcode();
4458 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4459
4460 const Register Dst = MI.getOperand(0).getReg();
4461
4462 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4463 ? TargetOpcode::G_SBFX
4464 : TargetOpcode::G_UBFX;
4465
4466 // Check if the type we would use for the extract is legal
4467 LLT Ty = MRI.getType(Dst);
4469 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4470 return false;
4471
4472 Register ShlSrc;
4473 int64_t ShrAmt;
4474 int64_t ShlAmt;
4475 const unsigned Size = Ty.getScalarSizeInBits();
4476
4477 // Try to match shr (shl x, c1), c2
4478 if (!mi_match(Dst, MRI,
4479 m_BinOp(Opcode,
4480 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4481 m_ICst(ShrAmt))))
4482 return false;
4483
4484 // Make sure that the shift sizes can fit a bitfield extract
4485 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4486 return false;
4487
4488 // Skip this combine if the G_SEXT_INREG combine could handle it
4489 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4490 return false;
4491
4492 // Calculate start position and width of the extract
4493 const int64_t Pos = ShrAmt - ShlAmt;
4494 const int64_t Width = Size - ShrAmt;
4495
4496 MatchInfo = [=](MachineIRBuilder &B) {
4497 auto WidthCst = B.buildConstant(ExtractTy, Width);
4498 auto PosCst = B.buildConstant(ExtractTy, Pos);
4499 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4500 };
4501 return true;
4502}
4503
4505 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4506 const unsigned Opcode = MI.getOpcode();
4507 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4508
4509 const Register Dst = MI.getOperand(0).getReg();
4510 LLT Ty = MRI.getType(Dst);
4512 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4513 return false;
4514
4515 // Try to match shr (and x, c1), c2
4516 Register AndSrc;
4517 int64_t ShrAmt;
4518 int64_t SMask;
4519 if (!mi_match(Dst, MRI,
4520 m_BinOp(Opcode,
4521 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4522 m_ICst(ShrAmt))))
4523 return false;
4524
4525 const unsigned Size = Ty.getScalarSizeInBits();
4526 if (ShrAmt < 0 || ShrAmt >= Size)
4527 return false;
4528
4529 // If the shift subsumes the mask, emit the 0 directly.
4530 if (0 == (SMask >> ShrAmt)) {
4531 MatchInfo = [=](MachineIRBuilder &B) {
4532 B.buildConstant(Dst, 0);
4533 };
4534 return true;
4535 }
4536
4537 // Check that ubfx can do the extraction, with no holes in the mask.
4538 uint64_t UMask = SMask;
4539 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4540 UMask &= maskTrailingOnes<uint64_t>(Size);
4541 if (!isMask_64(UMask))
4542 return false;
4543
4544 // Calculate start position and width of the extract.
4545 const int64_t Pos = ShrAmt;
4546 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4547
4548 // It's preferable to keep the shift, rather than form G_SBFX.
4549 // TODO: remove the G_AND via demanded bits analysis.
4550 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4551 return false;
4552
4553 MatchInfo = [=](MachineIRBuilder &B) {
4554 auto WidthCst = B.buildConstant(ExtractTy, Width);
4555 auto PosCst = B.buildConstant(ExtractTy, Pos);
4556 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4557 };
4558 return true;
4559}
4560
4561bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4562 MachineInstr &MI) {
4563 auto &PtrAdd = cast<GPtrAdd>(MI);
4564
4565 Register Src1Reg = PtrAdd.getBaseReg();
4566 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4567 if (!Src1Def)
4568 return false;
4569
4570 Register Src2Reg = PtrAdd.getOffsetReg();
4571
4572 if (MRI.hasOneNonDBGUse(Src1Reg))
4573 return false;
4574
4575 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4576 if (!C1)
4577 return false;
4578 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4579 if (!C2)
4580 return false;
4581
4582 const APInt &C1APIntVal = *C1;
4583 const APInt &C2APIntVal = *C2;
4584 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4585
4586 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4587 // This combine may end up running before ptrtoint/inttoptr combines
4588 // manage to eliminate redundant conversions, so try to look through them.
4589 MachineInstr *ConvUseMI = &UseMI;
4590 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4591 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4592 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4593 Register DefReg = ConvUseMI->getOperand(0).getReg();
4594 if (!MRI.hasOneNonDBGUse(DefReg))
4595 break;
4596 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4597 ConvUseOpc = ConvUseMI->getOpcode();
4598 }
4599 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4600 if (!LdStMI)
4601 continue;
4602 // Is x[offset2] already not a legal addressing mode? If so then
4603 // reassociating the constants breaks nothing (we test offset2 because
4604 // that's the one we hope to fold into the load or store).
4606 AM.HasBaseReg = true;
4607 AM.BaseOffs = C2APIntVal.getSExtValue();
4608 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4609 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4610 PtrAdd.getMF()->getFunction().getContext());
4611 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4612 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4613 AccessTy, AS))
4614 continue;
4615
4616 // Would x[offset1+offset2] still be a legal addressing mode?
4617 AM.BaseOffs = CombinedValue;
4618 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4619 AccessTy, AS))
4620 return true;
4621 }
4622
4623 return false;
4624}
4625
4627 MachineInstr *RHS,
4628 BuildFnTy &MatchInfo) {
4629 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4630 Register Src1Reg = MI.getOperand(1).getReg();
4631 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4632 return false;
4633 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4634 if (!C2)
4635 return false;
4636
4637 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4638 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4639
4640 auto NewBase =
4641 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4643 MI.getOperand(1).setReg(NewBase.getReg(0));
4644 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4646 };
4647 return !reassociationCanBreakAddressingModePattern(MI);
4648}
4649
4651 MachineInstr *LHS,
4652 MachineInstr *RHS,
4653 BuildFnTy &MatchInfo) {
4654 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4655 // if and only if (G_PTR_ADD X, C) has one use.
4656 Register LHSBase;
4657 std::optional<ValueAndVReg> LHSCstOff;
4658 if (!mi_match(MI.getBaseReg(), MRI,
4659 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4660 return false;
4661
4662 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4663 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4664 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4665 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4666 // doesn't happen.
4667 LHSPtrAdd->moveBefore(&MI);
4668 Register RHSReg = MI.getOffsetReg();
4669 // set VReg will cause type mismatch if it comes from extend/trunc
4670 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4672 MI.getOperand(2).setReg(NewCst.getReg(0));
4674 Observer.changingInstr(*LHSPtrAdd);
4675 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4676 Observer.changedInstr(*LHSPtrAdd);
4677 };
4678 return !reassociationCanBreakAddressingModePattern(MI);
4679}
4680
4682 MachineInstr *LHS,
4683 MachineInstr *RHS,
4684 BuildFnTy &MatchInfo) {
4685 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4686 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4687 if (!LHSPtrAdd)
4688 return false;
4689
4690 Register Src2Reg = MI.getOperand(2).getReg();
4691 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4692 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4693 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4694 if (!C1)
4695 return false;
4696 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4697 if (!C2)
4698 return false;
4699
4700 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4701 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4703 MI.getOperand(1).setReg(LHSSrc1);
4704 MI.getOperand(2).setReg(NewCst.getReg(0));
4706 };
4707 return !reassociationCanBreakAddressingModePattern(MI);
4708}
4709
4711 BuildFnTy &MatchInfo) {
4712 auto &PtrAdd = cast<GPtrAdd>(MI);
4713 // We're trying to match a few pointer computation patterns here for
4714 // re-association opportunities.
4715 // 1) Isolating a constant operand to be on the RHS, e.g.:
4716 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4717 //
4718 // 2) Folding two constants in each sub-tree as long as such folding
4719 // doesn't break a legal addressing mode.
4720 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4721 //
4722 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4723 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4724 // iif (G_PTR_ADD X, C) has one use.
4725 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4726 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4727
4728 // Try to match example 2.
4729 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4730 return true;
4731
4732 // Try to match example 3.
4733 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4734 return true;
4735
4736 // Try to match example 1.
4737 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4738 return true;
4739
4740 return false;
4741}
4743 Register OpLHS, Register OpRHS,
4744 BuildFnTy &MatchInfo) {
4745 LLT OpRHSTy = MRI.getType(OpRHS);
4746 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4747
4748 if (OpLHSDef->getOpcode() != Opc)
4749 return false;
4750
4751 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4752 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4753 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4754
4755 // If the inner op is (X op C), pull the constant out so it can be folded with
4756 // other constants in the expression tree. Folding is not guaranteed so we
4757 // might have (C1 op C2). In that case do not pull a constant out because it
4758 // won't help and can lead to infinite loops.
4761 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4762 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4763 MatchInfo = [=](MachineIRBuilder &B) {
4764 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4765 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4766 };
4767 return true;
4768 }
4769 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4770 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4771 // iff (op x, c1) has one use
4772 MatchInfo = [=](MachineIRBuilder &B) {
4773 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4774 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4775 };
4776 return true;
4777 }
4778 }
4779
4780 return false;
4781}
4782
4784 BuildFnTy &MatchInfo) {
4785 // We don't check if the reassociation will break a legal addressing mode
4786 // here since pointer arithmetic is handled by G_PTR_ADD.
4787 unsigned Opc = MI.getOpcode();
4788 Register DstReg = MI.getOperand(0).getReg();
4789 Register LHSReg = MI.getOperand(1).getReg();
4790 Register RHSReg = MI.getOperand(2).getReg();
4791
4792 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4793 return true;
4794 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4795 return true;
4796 return false;
4797}
4798
4800 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4801 Register SrcOp = MI.getOperand(1).getReg();
4802
4803 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4804 MatchInfo = *MaybeCst;
4805 return true;
4806 }
4807
4808 return false;
4809}
4810
4812 Register Op1 = MI.getOperand(1).getReg();
4813 Register Op2 = MI.getOperand(2).getReg();
4814 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4815 if (!MaybeCst)
4816 return false;
4817 MatchInfo = *MaybeCst;
4818 return true;
4819}
4820
4822 Register Op1 = MI.getOperand(1).getReg();
4823 Register Op2 = MI.getOperand(2).getReg();
4824 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4825 if (!MaybeCst)
4826 return false;
4827 MatchInfo =
4828 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4829 return true;
4830}
4831
4833 ConstantFP *&MatchInfo) {
4834 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4835 MI.getOpcode() == TargetOpcode::G_FMAD);
4836 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4837
4838 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4839 if (!Op3Cst)
4840 return false;
4841
4842 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4843 if (!Op2Cst)
4844 return false;
4845
4846 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4847 if (!Op1Cst)
4848 return false;
4849
4850 APFloat Op1F = Op1Cst->getValueAPF();
4851 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4853 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4854 return true;
4855}
4856
4858 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4859 // Look for a binop feeding into an AND with a mask:
4860 //
4861 // %add = G_ADD %lhs, %rhs
4862 // %and = G_AND %add, 000...11111111
4863 //
4864 // Check if it's possible to perform the binop at a narrower width and zext
4865 // back to the original width like so:
4866 //
4867 // %narrow_lhs = G_TRUNC %lhs
4868 // %narrow_rhs = G_TRUNC %rhs
4869 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4870 // %new_add = G_ZEXT %narrow_add
4871 // %and = G_AND %new_add, 000...11111111
4872 //
4873 // This can allow later combines to eliminate the G_AND if it turns out
4874 // that the mask is irrelevant.
4875 assert(MI.getOpcode() == TargetOpcode::G_AND);
4876 Register Dst = MI.getOperand(0).getReg();
4877 Register AndLHS = MI.getOperand(1).getReg();
4878 Register AndRHS = MI.getOperand(2).getReg();
4879 LLT WideTy = MRI.getType(Dst);
4880
4881 // If the potential binop has more than one use, then it's possible that one
4882 // of those uses will need its full width.
4883 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4884 return false;
4885
4886 // Check if the LHS feeding the AND is impacted by the high bits that we're
4887 // masking out.
4888 //
4889 // e.g. for 64-bit x, y:
4890 //
4891 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4892 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4893 if (!LHSInst)
4894 return false;
4895 unsigned LHSOpc = LHSInst->getOpcode();
4896 switch (LHSOpc) {
4897 default:
4898 return false;
4899 case TargetOpcode::G_ADD:
4900 case TargetOpcode::G_SUB:
4901 case TargetOpcode::G_MUL:
4902 case TargetOpcode::G_AND:
4903 case TargetOpcode::G_OR:
4904 case TargetOpcode::G_XOR:
4905 break;
4906 }
4907
4908 // Find the mask on the RHS.
4909 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4910 if (!Cst)
4911 return false;
4912 auto Mask = Cst->Value;
4913 if (!Mask.isMask())
4914 return false;
4915
4916 // No point in combining if there's nothing to truncate.
4917 unsigned NarrowWidth = Mask.countr_one();
4918 if (NarrowWidth == WideTy.getSizeInBits())
4919 return false;
4920 LLT NarrowTy = LLT::scalar(NarrowWidth);
4921
4922 // Check if adding the zext + truncates could be harmful.
4923 auto &MF = *MI.getMF();
4924 const auto &TLI = getTargetLowering();
4925 LLVMContext &Ctx = MF.getFunction().getContext();
4926 auto &DL = MF.getDataLayout();
4927 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4928 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4929 return false;
4930 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4931 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4932 return false;
4933 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4934 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4935 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4936 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4937 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4938 auto NarrowBinOp =
4939 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4940 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4942 MI.getOperand(1).setReg(Ext.getReg(0));
4944 };
4945 return true;
4946}
4947
4949 unsigned Opc = MI.getOpcode();
4950 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4951
4952 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4953 return false;
4954
4955 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4957 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4958 : TargetOpcode::G_SADDO;
4959 MI.setDesc(Builder.getTII().get(NewOpc));
4960 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4962 };
4963 return true;
4964}
4965
4967 // (G_*MULO x, 0) -> 0 + no carry out
4968 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
4969 MI.getOpcode() == TargetOpcode::G_SMULO);
4970 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
4971 return false;
4972 Register Dst = MI.getOperand(0).getReg();
4973 Register Carry = MI.getOperand(1).getReg();
4976 return false;
4977 MatchInfo = [=](MachineIRBuilder &B) {
4978 B.buildConstant(Dst, 0);
4979 B.buildConstant(Carry, 0);
4980 };
4981 return true;
4982}
4983
4985 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
4986 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
4987 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
4988 MI.getOpcode() == TargetOpcode::G_SADDE ||
4989 MI.getOpcode() == TargetOpcode::G_USUBE ||
4990 MI.getOpcode() == TargetOpcode::G_SSUBE);
4991 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
4992 return false;
4993 MatchInfo = [&](MachineIRBuilder &B) {
4994 unsigned NewOpcode;
4995 switch (MI.getOpcode()) {
4996 case TargetOpcode::G_UADDE:
4997 NewOpcode = TargetOpcode::G_UADDO;
4998 break;
4999 case TargetOpcode::G_SADDE:
5000 NewOpcode = TargetOpcode::G_SADDO;
5001 break;
5002 case TargetOpcode::G_USUBE:
5003 NewOpcode = TargetOpcode::G_USUBO;
5004 break;
5005 case TargetOpcode::G_SSUBE:
5006 NewOpcode = TargetOpcode::G_SSUBO;
5007 break;
5008 }
5010 MI.setDesc(B.getTII().get(NewOpcode));
5011 MI.removeOperand(4);
5013 };
5014 return true;
5015}
5016
5018 BuildFnTy &MatchInfo) {
5019 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5020 Register Dst = MI.getOperand(0).getReg();
5021 // (x + y) - z -> x (if y == z)
5022 // (x + y) - z -> y (if x == z)
5023 Register X, Y, Z;
5024 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5025 Register ReplaceReg;
5026 int64_t CstX, CstY;
5027 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5029 ReplaceReg = X;
5030 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5032 ReplaceReg = Y;
5033 if (ReplaceReg) {
5034 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5035 return true;
5036 }
5037 }
5038
5039 // x - (y + z) -> 0 - y (if x == z)
5040 // x - (y + z) -> 0 - z (if x == y)
5041 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5042 Register ReplaceReg;
5043 int64_t CstX;
5044 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5046 ReplaceReg = Y;
5047 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5049 ReplaceReg = Z;
5050 if (ReplaceReg) {
5051 MatchInfo = [=](MachineIRBuilder &B) {
5052 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5053 B.buildSub(Dst, Zero, ReplaceReg);
5054 };
5055 return true;
5056 }
5057 }
5058 return false;
5059}
5060
5062 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5063 auto &UDiv = cast<GenericMachineInstr>(MI);
5064 Register Dst = UDiv.getReg(0);
5065 Register LHS = UDiv.getReg(1);
5066 Register RHS = UDiv.getReg(2);
5067 LLT Ty = MRI.getType(Dst);
5068 LLT ScalarTy = Ty.getScalarType();
5069 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5071 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5072 auto &MIB = Builder;
5073
5074 bool UseNPQ = false;
5075 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5076
5077 auto BuildUDIVPattern = [&](const Constant *C) {
5078 auto *CI = cast<ConstantInt>(C);
5079 const APInt &Divisor = CI->getValue();
5080
5081 bool SelNPQ = false;
5082 APInt Magic(Divisor.getBitWidth(), 0);
5083 unsigned PreShift = 0, PostShift = 0;
5084
5085 // Magic algorithm doesn't work for division by 1. We need to emit a select
5086 // at the end.
5087 // TODO: Use undef values for divisor of 1.
5088 if (!Divisor.isOne()) {
5091
5092 Magic = std::move(magics.Magic);
5093
5094 assert(magics.PreShift < Divisor.getBitWidth() &&
5095 "We shouldn't generate an undefined shift!");
5096 assert(magics.PostShift < Divisor.getBitWidth() &&
5097 "We shouldn't generate an undefined shift!");
5098 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5099 PreShift = magics.PreShift;
5100 PostShift = magics.PostShift;
5101 SelNPQ = magics.IsAdd;
5102 }
5103
5104 PreShifts.push_back(
5105 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5106 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5107 NPQFactors.push_back(
5108 MIB.buildConstant(ScalarTy,
5109 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5110 : APInt::getZero(EltBits))
5111 .getReg(0));
5112 PostShifts.push_back(
5113 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5114 UseNPQ |= SelNPQ;
5115 return true;
5116 };
5117
5118 // Collect the shifts/magic values from each element.
5119 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5120 (void)Matched;
5121 assert(Matched && "Expected unary predicate match to succeed");
5122
5123 Register PreShift, PostShift, MagicFactor, NPQFactor;
5124 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5125 if (RHSDef) {
5126 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5127 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5128 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5129 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5130 } else {
5132 "Non-build_vector operation should have been a scalar");
5133 PreShift = PreShifts[0];
5134 MagicFactor = MagicFactors[0];
5135 PostShift = PostShifts[0];
5136 }
5137
5138 Register Q = LHS;
5139 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5140
5141 // Multiply the numerator (operand 0) by the magic value.
5142 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5143
5144 if (UseNPQ) {
5145 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5146
5147 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5148 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5149 if (Ty.isVector())
5150 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5151 else
5152 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5153
5154 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5155 }
5156
5157 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5158 auto One = MIB.buildConstant(Ty, 1);
5159 auto IsOne = MIB.buildICmp(
5161 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5162 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5163}
5164
5166 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5167 Register Dst = MI.getOperand(0).getReg();
5168 Register RHS = MI.getOperand(2).getReg();
5169 LLT DstTy = MRI.getType(Dst);
5170 auto *RHSDef = MRI.getVRegDef(RHS);
5171 if (!isConstantOrConstantVector(*RHSDef, MRI))
5172 return false;
5173
5174 auto &MF = *MI.getMF();
5175 AttributeList Attr = MF.getFunction().getAttributes();
5176 const auto &TLI = getTargetLowering();
5177 LLVMContext &Ctx = MF.getFunction().getContext();
5178 auto &DL = MF.getDataLayout();
5179 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5180 return false;
5181
5182 // Don't do this for minsize because the instruction sequence is usually
5183 // larger.
5184 if (MF.getFunction().hasMinSize())
5185 return false;
5186
5187 // Don't do this if the types are not going to be legal.
5188 if (LI) {
5189 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5190 return false;
5191 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5192 return false;
5194 {TargetOpcode::G_ICMP,
5195 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5196 DstTy}}))
5197 return false;
5198 }
5199
5200 return matchUnaryPredicate(
5201 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5202}
5203
5205 auto *NewMI = buildUDivUsingMul(MI);
5206 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5207}
5208
5210 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5211 Register Dst = MI.getOperand(0).getReg();
5212 Register RHS = MI.getOperand(2).getReg();
5213 LLT DstTy = MRI.getType(Dst);
5214
5215 auto &MF = *MI.getMF();
5216 AttributeList Attr = MF.getFunction().getAttributes();
5217 const auto &TLI = getTargetLowering();
5218 LLVMContext &Ctx = MF.getFunction().getContext();
5219 auto &DL = MF.getDataLayout();
5220 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5221 return false;
5222
5223 // Don't do this for minsize because the instruction sequence is usually
5224 // larger.
5225 if (MF.getFunction().hasMinSize())
5226 return false;
5227
5228 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5229 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5230 return matchUnaryPredicate(
5231 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5232 }
5233
5234 // Don't support the general case for now.
5235 return false;
5236}
5237
5239 auto *NewMI = buildSDivUsingMul(MI);
5240 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5241}
5242
5244 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5245 auto &SDiv = cast<GenericMachineInstr>(MI);
5246 Register Dst = SDiv.getReg(0);
5247 Register LHS = SDiv.getReg(1);
5248 Register RHS = SDiv.getReg(2);
5249 LLT Ty = MRI.getType(Dst);
5250 LLT ScalarTy = Ty.getScalarType();
5252 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5253 auto &MIB = Builder;
5254
5255 bool UseSRA = false;
5256 SmallVector<Register, 16> Shifts, Factors;
5257
5258 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5259 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5260
5261 auto BuildSDIVPattern = [&](const Constant *C) {
5262 // Don't recompute inverses for each splat element.
5263 if (IsSplat && !Factors.empty()) {
5264 Shifts.push_back(Shifts[0]);
5265 Factors.push_back(Factors[0]);
5266 return true;
5267 }
5268
5269 auto *CI = cast<ConstantInt>(C);
5270 APInt Divisor = CI->getValue();
5271 unsigned Shift = Divisor.countr_zero();
5272 if (Shift) {
5273 Divisor.ashrInPlace(Shift);
5274 UseSRA = true;
5275 }
5276
5277 // Calculate the multiplicative inverse modulo BW.
5278 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5279 APInt Factor = Divisor.multiplicativeInverse();
5280 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5281 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5282 return true;
5283 };
5284
5285 // Collect all magic values from the build vector.
5286 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5287 (void)Matched;
5288 assert(Matched && "Expected unary predicate match to succeed");
5289
5290 Register Shift, Factor;
5291 if (Ty.isVector()) {
5292 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5293 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5294 } else {
5295 Shift = Shifts[0];
5296 Factor = Factors[0];
5297 }
5298
5299 Register Res = LHS;
5300
5301 if (UseSRA)
5302 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5303
5304 return MIB.buildMul(Ty, Res, Factor);
5305}
5306
5308 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5309 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5310 "Expected SDIV or UDIV");
5311 auto &Div = cast<GenericMachineInstr>(MI);
5312 Register RHS = Div.getReg(2);
5313 auto MatchPow2 = [&](const Constant *C) {
5314 auto *CI = dyn_cast<ConstantInt>(C);
5315 return CI && (CI->getValue().isPowerOf2() ||
5316 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5317 };
5318 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5319}
5320
5322 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5323 auto &SDiv = cast<GenericMachineInstr>(MI);
5324 Register Dst = SDiv.getReg(0);
5325 Register LHS = SDiv.getReg(1);
5326 Register RHS = SDiv.getReg(2);
5327 LLT Ty = MRI.getType(Dst);
5329 LLT CCVT =
5330 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5331
5332 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5333 // to the following version:
5334 //
5335 // %c1 = G_CTTZ %rhs
5336 // %inexact = G_SUB $bitwidth, %c1
5337 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5338 // %lshr = G_LSHR %sign, %inexact
5339 // %add = G_ADD %lhs, %lshr
5340 // %ashr = G_ASHR %add, %c1
5341 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5342 // %zero = G_CONSTANT $0
5343 // %neg = G_NEG %ashr
5344 // %isneg = G_ICMP SLT %rhs, %zero
5345 // %res = G_SELECT %isneg, %neg, %ashr
5346
5347 unsigned BitWidth = Ty.getScalarSizeInBits();
5348 auto Zero = Builder.buildConstant(Ty, 0);
5349
5350 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5351 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5352 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5353 // Splat the sign bit into the register
5354 auto Sign = Builder.buildAShr(
5355 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5356
5357 // Add (LHS < 0) ? abs2 - 1 : 0;
5358 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5359 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5360 auto AShr = Builder.buildAShr(Ty, Add, C1);
5361
5362 // Special case: (sdiv X, 1) -> X
5363 // Special Case: (sdiv X, -1) -> 0-X
5364 auto One = Builder.buildConstant(Ty, 1);
5365 auto MinusOne = Builder.buildConstant(Ty, -1);
5366 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5367 auto IsMinusOne =
5369 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5370 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5371
5372 // If divided by a positive value, we're done. Otherwise, the result must be
5373 // negated.
5374 auto Neg = Builder.buildNeg(Ty, AShr);
5375 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5376 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5377 MI.eraseFromParent();
5378}
5379
5381 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
5382 auto &UDiv = cast<GenericMachineInstr>(MI);
5383 Register Dst = UDiv.getReg(0);
5384 Register LHS = UDiv.getReg(1);
5385 Register RHS = UDiv.getReg(2);
5386 LLT Ty = MRI.getType(Dst);
5388
5389 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5390 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
5391 MI.eraseFromParent();
5392}
5393
5395 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5396 Register RHS = MI.getOperand(2).getReg();
5397 Register Dst = MI.getOperand(0).getReg();
5398 LLT Ty = MRI.getType(Dst);
5400 auto MatchPow2ExceptOne = [&](const Constant *C) {
5401 if (auto *CI = dyn_cast<ConstantInt>(C))
5402 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5403 return false;
5404 };
5405 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5406 return false;
5407 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5408}
5409
5411 Register LHS = MI.getOperand(1).getReg();
5412 Register RHS = MI.getOperand(2).getReg();
5413 Register Dst = MI.getOperand(0).getReg();
5414 LLT Ty = MRI.getType(Dst);
5416 unsigned NumEltBits = Ty.getScalarSizeInBits();
5417
5418 auto LogBase2 = buildLogBase2(RHS, Builder);
5419 auto ShiftAmt =
5420 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5421 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5422 Builder.buildLShr(Dst, LHS, Trunc);
5423 MI.eraseFromParent();
5424}
5425
5427 BuildFnTy &MatchInfo) {
5428 unsigned Opc = MI.getOpcode();
5429 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5430 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5431 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5432
5433 Register Dst = MI.getOperand(0).getReg();
5434 Register X = MI.getOperand(1).getReg();
5435 Register Y = MI.getOperand(2).getReg();
5436 LLT Type = MRI.getType(Dst);
5437
5438 // fold (fadd x, fneg(y)) -> (fsub x, y)
5439 // fold (fadd fneg(y), x) -> (fsub x, y)
5440 // G_ADD is commutative so both cases are checked by m_GFAdd
5441 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5442 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5443 Opc = TargetOpcode::G_FSUB;
5444 }
5445 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5446 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5447 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5448 Opc = TargetOpcode::G_FADD;
5449 }
5450 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5451 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5452 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5453 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5454 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5455 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5456 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5457 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5458 // no opcode change
5459 } else
5460 return false;
5461
5462 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5464 MI.setDesc(B.getTII().get(Opc));
5465 MI.getOperand(1).setReg(X);
5466 MI.getOperand(2).setReg(Y);
5468 };
5469 return true;
5470}
5471
5473 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5474
5475 Register LHS = MI.getOperand(1).getReg();
5476 MatchInfo = MI.getOperand(2).getReg();
5477 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5478
5479 const auto LHSCst = Ty.isVector()
5480 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5482 if (!LHSCst)
5483 return false;
5484
5485 // -0.0 is always allowed
5486 if (LHSCst->Value.isNegZero())
5487 return true;
5488
5489 // +0.0 is only allowed if nsz is set.
5490 if (LHSCst->Value.isPosZero())
5491 return MI.getFlag(MachineInstr::FmNsz);
5492
5493 return false;
5494}
5495
5497 Register Dst = MI.getOperand(0).getReg();
5499 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5500 eraseInst(MI);
5501}
5502
5503/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5504/// due to global flags or MachineInstr flags.
5505static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5506 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5507 return false;
5508 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5509}
5510
5511static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5512 const MachineRegisterInfo &MRI) {
5513 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5514 MRI.use_instr_nodbg_end()) >
5515 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5516 MRI.use_instr_nodbg_end());
5517}
5518
5520 bool &AllowFusionGlobally,
5521 bool &HasFMAD, bool &Aggressive,
5522 bool CanReassociate) {
5523
5524 auto *MF = MI.getMF();
5525 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5526 const TargetOptions &Options = MF->getTarget().Options;
5527 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5528
5529 if (CanReassociate &&
5530 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5531 return false;
5532
5533 // Floating-point multiply-add with intermediate rounding.
5534 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5535 // Floating-point multiply-add without intermediate rounding.
5536 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5537 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5538 // No valid opcode, do not combine.
5539 if (!HasFMAD && !HasFMA)
5540 return false;
5541
5542 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5543 Options.UnsafeFPMath || HasFMAD;
5544 // If the addition is not contractable, do not combine.
5545 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5546 return false;
5547
5548 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5549 return true;
5550}
5551
5553 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5554 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5555
5556 bool AllowFusionGlobally, HasFMAD, Aggressive;
5557 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5558 return false;
5559
5560 Register Op1 = MI.getOperand(1).getReg();
5561 Register Op2 = MI.getOperand(2).getReg();
5564 unsigned PreferredFusedOpcode =
5565 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5566
5567 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5568 // prefer to fold the multiply with fewer uses.
5569 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5570 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5571 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5572 std::swap(LHS, RHS);
5573 }
5574
5575 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5576 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5577 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5578 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5579 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5580 {LHS.MI->getOperand(1).getReg(),
5581 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5582 };
5583 return true;
5584 }
5585
5586 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5587 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5588 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5589 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5590 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5591 {RHS.MI->getOperand(1).getReg(),
5592 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5593 };
5594 return true;
5595 }
5596
5597 return false;
5598}
5599
5601 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5602 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5603
5604 bool AllowFusionGlobally, HasFMAD, Aggressive;
5605 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5606 return false;
5607
5608 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5609 Register Op1 = MI.getOperand(1).getReg();
5610 Register Op2 = MI.getOperand(2).getReg();
5613 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5614
5615 unsigned PreferredFusedOpcode =
5616 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5617
5618 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5619 // prefer to fold the multiply with fewer uses.
5620 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5621 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5622 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5623 std::swap(LHS, RHS);
5624 }
5625
5626 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5627 MachineInstr *FpExtSrc;
5628 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5629 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5630 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5631 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5632 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5633 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5634 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5635 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5636 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5637 };
5638 return true;
5639 }
5640
5641 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5642 // Note: Commutes FADD operands.
5643 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5644 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5645 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5646 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5647 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5648 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5649 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5650 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5651 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5652 };
5653 return true;
5654 }
5655
5656 return false;
5657}
5658
5660 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5661 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5662
5663 bool AllowFusionGlobally, HasFMAD, Aggressive;
5664 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5665 return false;
5666
5667 Register Op1 = MI.getOperand(1).getReg();
5668 Register Op2 = MI.getOperand(2).getReg();
5671 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5672
5673 unsigned PreferredFusedOpcode =
5674 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5675
5676 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5677 // prefer to fold the multiply with fewer uses.
5678 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5679 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5680 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5681 std::swap(LHS, RHS);
5682 }
5683
5684 MachineInstr *FMA = nullptr;
5685 Register Z;
5686 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5687 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5688 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5689 TargetOpcode::G_FMUL) &&
5690 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5691 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5692 FMA = LHS.MI;
5693 Z = RHS.Reg;
5694 }
5695 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5696 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5697 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5698 TargetOpcode::G_FMUL) &&
5699 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5700 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5701 Z = LHS.Reg;
5702 FMA = RHS.MI;
5703 }
5704
5705 if (FMA) {
5706 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5707 Register X = FMA->getOperand(1).getReg();
5708 Register Y = FMA->getOperand(2).getReg();
5709 Register U = FMulMI->getOperand(1).getReg();
5710 Register V = FMulMI->getOperand(2).getReg();
5711
5712 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5713 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5714 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5715 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5716 {X, Y, InnerFMA});
5717 };
5718 return true;
5719 }
5720
5721 return false;
5722}
5723
5725 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5726 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5727
5728 bool AllowFusionGlobally, HasFMAD, Aggressive;
5729 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5730 return false;
5731
5732 if (!Aggressive)
5733 return false;
5734
5735 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5736 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5737 Register Op1 = MI.getOperand(1).getReg();
5738 Register Op2 = MI.getOperand(2).getReg();
5741
5742 unsigned PreferredFusedOpcode =
5743 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5744
5745 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5746 // prefer to fold the multiply with fewer uses.
5747 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5748 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5749 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5750 std::swap(LHS, RHS);
5751 }
5752
5753 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5754 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5756 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5757 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5758 Register InnerFMA =
5759 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5760 .getReg(0);
5761 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5762 {X, Y, InnerFMA});
5763 };
5764
5765 MachineInstr *FMulMI, *FMAMI;
5766 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5767 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5768 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5769 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5770 m_GFPExt(m_MInstr(FMulMI))) &&
5771 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5772 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5773 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5774 MatchInfo = [=](MachineIRBuilder &B) {
5775 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5776 FMulMI->getOperand(2).getReg(), RHS.Reg,
5777 LHS.MI->getOperand(1).getReg(),
5778 LHS.MI->getOperand(2).getReg(), B);
5779 };
5780 return true;
5781 }
5782
5783 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5784 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5785 // FIXME: This turns two single-precision and one double-precision
5786 // operation into two double-precision operations, which might not be
5787 // interesting for all targets, especially GPUs.
5788 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5789 FMAMI->getOpcode() == PreferredFusedOpcode) {
5790 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5791 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5792 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5793 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5794 MatchInfo = [=](MachineIRBuilder &B) {
5795 Register X = FMAMI->getOperand(1).getReg();
5796 Register Y = FMAMI->getOperand(2).getReg();
5797 X = B.buildFPExt(DstType, X).getReg(0);
5798 Y = B.buildFPExt(DstType, Y).getReg(0);
5799 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5800 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5801 };
5802
5803 return true;
5804 }
5805 }
5806
5807 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5808 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5809 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5810 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5811 m_GFPExt(m_MInstr(FMulMI))) &&
5812 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5813 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5814 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5815 MatchInfo = [=](MachineIRBuilder &B) {
5816 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5817 FMulMI->getOperand(2).getReg(), LHS.Reg,
5818 RHS.MI->getOperand(1).getReg(),
5819 RHS.MI->getOperand(2).getReg(), B);
5820 };
5821 return true;
5822 }
5823
5824 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5825 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5826 // FIXME: This turns two single-precision and one double-precision
5827 // operation into two double-precision operations, which might not be
5828 // interesting for all targets, especially GPUs.
5829 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5830 FMAMI->getOpcode() == PreferredFusedOpcode) {
5831 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5832 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5833 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5834 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5835 MatchInfo = [=](MachineIRBuilder &B) {
5836 Register X = FMAMI->getOperand(1).getReg();
5837 Register Y = FMAMI->getOperand(2).getReg();
5838 X = B.buildFPExt(DstType, X).getReg(0);
5839 Y = B.buildFPExt(DstType, Y).getReg(0);
5840 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5841 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5842 };
5843 return true;
5844 }
5845 }
5846
5847 return false;
5848}
5849
5851 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5852 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5853
5854 bool AllowFusionGlobally, HasFMAD, Aggressive;
5855 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5856 return false;
5857
5858 Register Op1 = MI.getOperand(1).getReg();
5859 Register Op2 = MI.getOperand(2).getReg();
5862 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5863
5864 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5865 // prefer to fold the multiply with fewer uses.
5866 int FirstMulHasFewerUses = true;
5867 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5868 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5869 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5870 FirstMulHasFewerUses = false;
5871
5872 unsigned PreferredFusedOpcode =
5873 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5874
5875 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5876 if (FirstMulHasFewerUses &&
5877 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5878 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5879 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5880 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5881 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5882 {LHS.MI->getOperand(1).getReg(),
5883 LHS.MI->getOperand(2).getReg(), NegZ});
5884 };
5885 return true;
5886 }
5887 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5888 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5889 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5890 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5891 Register NegY =
5892 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5893 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5894 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5895 };
5896 return true;
5897 }
5898
5899 return false;
5900}
5901
5903 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5904 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5905
5906 bool AllowFusionGlobally, HasFMAD, Aggressive;
5907 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5908 return false;
5909
5910 Register LHSReg = MI.getOperand(1).getReg();
5911 Register RHSReg = MI.getOperand(2).getReg();
5912 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5913
5914 unsigned PreferredFusedOpcode =
5915 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5916
5917 MachineInstr *FMulMI;
5918 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5919 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5920 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
5921 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5922 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5923 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5924 Register NegX =
5925 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5926 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5927 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5928 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
5929 };
5930 return true;
5931 }
5932
5933 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5934 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5935 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
5936 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5937 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5938 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5939 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5940 {FMulMI->getOperand(1).getReg(),
5941 FMulMI->getOperand(2).getReg(), LHSReg});
5942 };
5943 return true;
5944 }
5945
5946 return false;
5947}
5948
5950 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5951 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5952
5953 bool AllowFusionGlobally, HasFMAD, Aggressive;
5954 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5955 return false;
5956
5957 Register LHSReg = MI.getOperand(1).getReg();
5958 Register RHSReg = MI.getOperand(2).getReg();
5959 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5960
5961 unsigned PreferredFusedOpcode =
5962 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5963
5964 MachineInstr *FMulMI;
5965 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5966 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5967 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5968 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
5969 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5970 Register FpExtX =
5971 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5972 Register FpExtY =
5973 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5974 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5975 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5976 {FpExtX, FpExtY, NegZ});
5977 };
5978 return true;
5979 }
5980
5981 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5982 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5983 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5984 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
5985 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5986 Register FpExtY =
5987 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5988 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
5989 Register FpExtZ =
5990 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5991 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5992 {NegY, FpExtZ, LHSReg});
5993 };
5994 return true;
5995 }
5996
5997 return false;
5998}
5999
6001 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
6002 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6003
6004 bool AllowFusionGlobally, HasFMAD, Aggressive;
6005 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6006 return false;
6007
6008 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6009 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6010 Register LHSReg = MI.getOperand(1).getReg();
6011 Register RHSReg = MI.getOperand(2).getReg();
6012
6013 unsigned PreferredFusedOpcode =
6014 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6015
6016 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6018 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6019 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6020 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6021 };
6022
6023 MachineInstr *FMulMI;
6024 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6025 // (fneg (fma (fpext x), (fpext y), z))
6026 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6027 // (fneg (fma (fpext x), (fpext y), z))
6028 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6029 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6030 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6031 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6032 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6033 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6035 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6036 FMulMI->getOperand(2).getReg(), RHSReg, B);
6037 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6038 };
6039 return true;
6040 }
6041
6042 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6043 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6044 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6045 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6046 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6047 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6048 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6049 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6050 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6051 FMulMI->getOperand(2).getReg(), LHSReg, B);
6052 };
6053 return true;
6054 }
6055
6056 return false;
6057}
6058
6060 unsigned &IdxToPropagate) {
6061 bool PropagateNaN;
6062 switch (MI.getOpcode()) {
6063 default:
6064 return false;
6065 case TargetOpcode::G_FMINNUM:
6066 case TargetOpcode::G_FMAXNUM:
6067 PropagateNaN = false;
6068 break;
6069 case TargetOpcode::G_FMINIMUM:
6070 case TargetOpcode::G_FMAXIMUM:
6071 PropagateNaN = true;
6072 break;
6073 }
6074
6075 auto MatchNaN = [&](unsigned Idx) {
6076 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6077 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6078 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6079 return false;
6080 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6081 return true;
6082 };
6083
6084 return MatchNaN(1) || MatchNaN(2);
6085}
6086
6088 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6089 Register LHS = MI.getOperand(1).getReg();
6090 Register RHS = MI.getOperand(2).getReg();
6091
6092 // Helper lambda to check for opportunities for
6093 // A + (B - A) -> B
6094 // (B - A) + A -> B
6095 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6096 Register Reg;
6097 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6098 Reg == MaybeSameReg;
6099 };
6100 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6101}
6102
6104 Register &MatchInfo) {
6105 // This combine folds the following patterns:
6106 //
6107 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6108 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6109 // into
6110 // x
6111 // if
6112 // k == sizeof(VecEltTy)/2
6113 // type(x) == type(dst)
6114 //
6115 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6116 // into
6117 // x
6118 // if
6119 // type(x) == type(dst)
6120
6121 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6122 LLT DstEltTy = DstVecTy.getElementType();
6123
6124 Register Lo, Hi;
6125
6126 if (mi_match(
6127 MI, MRI,
6129 MatchInfo = Lo;
6130 return MRI.getType(MatchInfo) == DstVecTy;
6131 }
6132
6133 std::optional<ValueAndVReg> ShiftAmount;
6134 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6135 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6136 if (mi_match(
6137 MI, MRI,
6138 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6139 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6140 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6141 MatchInfo = Lo;
6142 return MRI.getType(MatchInfo) == DstVecTy;
6143 }
6144 }
6145
6146 return false;
6147}
6148
6150 Register &MatchInfo) {
6151 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6152 // if type(x) == type(G_TRUNC)
6153 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6154 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6155 return false;
6156
6157 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6158}
6159
6161 Register &MatchInfo) {
6162 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6163 // y if K == size of vector element type
6164 std::optional<ValueAndVReg> ShiftAmt;
6165 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6167 m_GCst(ShiftAmt))))
6168 return false;
6169
6170 LLT MatchTy = MRI.getType(MatchInfo);
6171 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6172 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6173}
6174
6175unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6176 CmpInst::Predicate Pred, LLT DstTy,
6177 SelectPatternNaNBehaviour VsNaNRetVal) const {
6178 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6179 "Expected a NaN behaviour?");
6180 // Choose an opcode based off of legality or the behaviour when one of the
6181 // LHS/RHS may be NaN.
6182 switch (Pred) {
6183 default:
6184 return 0;
6185 case CmpInst::FCMP_UGT:
6186 case CmpInst::FCMP_UGE:
6187 case CmpInst::FCMP_OGT:
6188 case CmpInst::FCMP_OGE:
6189 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6190 return TargetOpcode::G_FMAXNUM;
6191 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6192 return TargetOpcode::G_FMAXIMUM;
6193 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6194 return TargetOpcode::G_FMAXNUM;
6195 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6196 return TargetOpcode::G_FMAXIMUM;
6197 return 0;
6198 case CmpInst::FCMP_ULT:
6199 case CmpInst::FCMP_ULE:
6200 case CmpInst::FCMP_OLT:
6201 case CmpInst::FCMP_OLE:
6202 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6203 return TargetOpcode::G_FMINNUM;
6204 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6205 return TargetOpcode::G_FMINIMUM;
6206 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6207 return TargetOpcode::G_FMINNUM;
6208 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6209 return 0;
6210 return TargetOpcode::G_FMINIMUM;
6211 }
6212}
6213
6214CombinerHelper::SelectPatternNaNBehaviour
6215CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6216 bool IsOrderedComparison) const {
6217 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6218 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6219 // Completely unsafe.
6220 if (!LHSSafe && !RHSSafe)
6221 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6222 if (LHSSafe && RHSSafe)
6223 return SelectPatternNaNBehaviour::RETURNS_ANY;
6224 // An ordered comparison will return false when given a NaN, so it
6225 // returns the RHS.
6226 if (IsOrderedComparison)
6227 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6228 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6229 // An unordered comparison will return true when given a NaN, so it
6230 // returns the LHS.
6231 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6232 : SelectPatternNaNBehaviour::RETURNS_NAN;
6233}
6234
6235bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6236 Register TrueVal, Register FalseVal,
6237 BuildFnTy &MatchInfo) {
6238 // Match: select (fcmp cond x, y) x, y
6239 // select (fcmp cond x, y) y, x
6240 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6241 LLT DstTy = MRI.getType(Dst);
6242 // Bail out early on pointers, since we'll never want to fold to a min/max.
6243 if (DstTy.isPointer())
6244 return false;
6245 // Match a floating point compare with a less-than/greater-than predicate.
6246 // TODO: Allow multiple users of the compare if they are all selects.
6247 CmpInst::Predicate Pred;
6248 Register CmpLHS, CmpRHS;
6249 if (!mi_match(Cond, MRI,
6251 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6252 CmpInst::isEquality(Pred))
6253 return false;
6254 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6255 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6256 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6257 return false;
6258 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6259 std::swap(CmpLHS, CmpRHS);
6260 Pred = CmpInst::getSwappedPredicate(Pred);
6261 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6262 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6263 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6264 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6265 }
6266 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6267 return false;
6268 // Decide what type of max/min this should be based off of the predicate.
6269 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6270 if (!Opc || !isLegal({Opc, {DstTy}}))
6271 return false;
6272 // Comparisons between signed zero and zero may have different results...
6273 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6274 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6275 // We don't know if a comparison between two 0s will give us a consistent
6276 // result. Be conservative and only proceed if at least one side is
6277 // non-zero.
6278 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6279 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6280 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6281 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6282 return false;
6283 }
6284 }
6285 MatchInfo = [=](MachineIRBuilder &B) {
6286 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6287 };
6288 return true;
6289}
6290
6292 BuildFnTy &MatchInfo) {
6293 // TODO: Handle integer cases.
6294 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6295 // Condition may be fed by a truncated compare.
6296 Register Cond = MI.getOperand(1).getReg();
6297 Register MaybeTrunc;
6298 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6299 Cond = MaybeTrunc;
6300 Register Dst = MI.getOperand(0).getReg();
6301 Register TrueVal = MI.getOperand(2).getReg();
6302 Register FalseVal = MI.getOperand(3).getReg();
6303 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6304}
6305
6307 BuildFnTy &MatchInfo) {
6308 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6309 // (X + Y) == X --> Y == 0
6310 // (X + Y) != X --> Y != 0
6311 // (X - Y) == X --> Y == 0
6312 // (X - Y) != X --> Y != 0
6313 // (X ^ Y) == X --> Y == 0
6314 // (X ^ Y) != X --> Y != 0
6315 Register Dst = MI.getOperand(0).getReg();
6316 CmpInst::Predicate Pred;
6317 Register X, Y, OpLHS, OpRHS;
6318 bool MatchedSub = mi_match(
6319 Dst, MRI,
6320 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6321 if (MatchedSub && X != OpLHS)
6322 return false;
6323 if (!MatchedSub) {
6324 if (!mi_match(Dst, MRI,
6325 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6326 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6327 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6328 return false;
6329 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6330 }
6331 MatchInfo = [=](MachineIRBuilder &B) {
6332 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6333 B.buildICmp(Pred, Dst, Y, Zero);
6334 };
6335 return CmpInst::isEquality(Pred) && Y.isValid();
6336}
6337
6339 Register ShiftReg = MI.getOperand(2).getReg();
6340 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6341 auto IsShiftTooBig = [&](const Constant *C) {
6342 auto *CI = dyn_cast<ConstantInt>(C);
6343 return CI && CI->uge(ResTy.getScalarSizeInBits());
6344 };
6345 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6346}
6347
6349 unsigned LHSOpndIdx = 1;
6350 unsigned RHSOpndIdx = 2;
6351 switch (MI.getOpcode()) {
6352 case TargetOpcode::G_UADDO:
6353 case TargetOpcode::G_SADDO:
6354 case TargetOpcode::G_UMULO:
6355 case TargetOpcode::G_SMULO:
6356 LHSOpndIdx = 2;
6357 RHSOpndIdx = 3;
6358 break;
6359 default:
6360 break;
6361 }
6362 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
6363 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
6364 if (!getIConstantVRegVal(LHS, MRI)) {
6365 // Skip commuting if LHS is not a constant. But, LHS may be a
6366 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
6367 // have a constant on the RHS.
6368 if (MRI.getVRegDef(LHS)->getOpcode() !=
6369 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6370 return false;
6371 }
6372 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
6373 return MRI.getVRegDef(RHS)->getOpcode() !=
6374 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6376}
6377
6379 Register LHS = MI.getOperand(1).getReg();
6380 Register RHS = MI.getOperand(2).getReg();
6381 std::optional<FPValueAndVReg> ValAndVReg;
6382 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6383 return false;
6384 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6385}
6386
6389 unsigned LHSOpndIdx = 1;
6390 unsigned RHSOpndIdx = 2;
6391 switch (MI.getOpcode()) {
6392 case TargetOpcode::G_UADDO:
6393 case TargetOpcode::G_SADDO:
6394 case TargetOpcode::G_UMULO:
6395 case TargetOpcode::G_SMULO:
6396 LHSOpndIdx = 2;
6397 RHSOpndIdx = 3;
6398 break;
6399 default:
6400 break;
6401 }
6402 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
6403 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
6404 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
6405 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
6407}
6408
6409bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6410 LLT SrcTy = MRI.getType(Src);
6411 if (SrcTy.isFixedVector())
6412 return isConstantSplatVector(Src, 1, AllowUndefs);
6413 if (SrcTy.isScalar()) {
6414 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6415 return true;
6416 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6417 return IConstant && IConstant->Value == 1;
6418 }
6419 return false; // scalable vector
6420}
6421
6422bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6423 LLT SrcTy = MRI.getType(Src);
6424 if (SrcTy.isFixedVector())
6425 return isConstantSplatVector(Src, 0, AllowUndefs);
6426 if (SrcTy.isScalar()) {
6427 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6428 return true;
6429 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6430 return IConstant && IConstant->Value == 0;
6431 }
6432 return false; // scalable vector
6433}
6434
6435// Ignores COPYs during conformance checks.
6436// FIXME scalable vectors.
6437bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6438 bool AllowUndefs) {
6439 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6440 if (!BuildVector)
6441 return false;
6442 unsigned NumSources = BuildVector->getNumSources();
6443
6444 for (unsigned I = 0; I < NumSources; ++I) {
6445 GImplicitDef *ImplicitDef =
6446 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6447 if (ImplicitDef && AllowUndefs)
6448 continue;
6449 if (ImplicitDef && !AllowUndefs)
6450 return false;
6451 std::optional<ValueAndVReg> IConstant =
6453 if (IConstant && IConstant->Value == SplatValue)
6454 continue;
6455 return false;
6456 }
6457 return true;
6458}
6459
6460// Ignores COPYs during lookups.
6461// FIXME scalable vectors
6462std::optional<APInt>
6463CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6464 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6465 if (IConstant)
6466 return IConstant->Value;
6467
6468 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6469 if (!BuildVector)
6470 return std::nullopt;
6471 unsigned NumSources = BuildVector->getNumSources();
6472
6473 std::optional<APInt> Value = std::nullopt;
6474 for (unsigned I = 0; I < NumSources; ++I) {
6475 std::optional<ValueAndVReg> IConstant =
6477 if (!IConstant)
6478 return std::nullopt;
6479 if (!Value)
6480 Value = IConstant->Value;
6481 else if (*Value != IConstant->Value)
6482 return std::nullopt;
6483 }
6484 return Value;
6485}
6486
6487// FIXME G_SPLAT_VECTOR
6488bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6489 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6490 if (IConstant)
6491 return true;
6492
6493 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6494 if (!BuildVector)
6495 return false;
6496
6497 unsigned NumSources = BuildVector->getNumSources();
6498 for (unsigned I = 0; I < NumSources; ++I) {
6499 std::optional<ValueAndVReg> IConstant =
6501 if (!IConstant)
6502 return false;
6503 }
6504 return true;
6505}
6506
6507// TODO: use knownbits to determine zeros
6508bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6509 BuildFnTy &MatchInfo) {
6510 uint32_t Flags = Select->getFlags();
6511 Register Dest = Select->getReg(0);
6512 Register Cond = Select->getCondReg();
6513 Register True = Select->getTrueReg();
6514 Register False = Select->getFalseReg();
6515 LLT CondTy = MRI.getType(Select->getCondReg());
6516 LLT TrueTy = MRI.getType(Select->getTrueReg());
6517
6518 // We only do this combine for scalar boolean conditions.
6519 if (CondTy != LLT::scalar(1))
6520 return false;
6521
6522 if (TrueTy.isPointer())
6523 return false;
6524
6525 // Both are scalars.
6526 std::optional<ValueAndVReg> TrueOpt =
6528 std::optional<ValueAndVReg> FalseOpt =
6530
6531 if (!TrueOpt || !FalseOpt)
6532 return false;
6533
6534 APInt TrueValue = TrueOpt->Value;
6535 APInt FalseValue = FalseOpt->Value;
6536
6537 // select Cond, 1, 0 --> zext (Cond)
6538 if (TrueValue.isOne() && FalseValue.isZero()) {
6539 MatchInfo = [=](MachineIRBuilder &B) {
6540 B.setInstrAndDebugLoc(*Select);
6541 B.buildZExtOrTrunc(Dest, Cond);
6542 };
6543 return true;
6544 }
6545
6546 // select Cond, -1, 0 --> sext (Cond)
6547 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6548 MatchInfo = [=](MachineIRBuilder &B) {
6549 B.setInstrAndDebugLoc(*Select);
6550 B.buildSExtOrTrunc(Dest, Cond);
6551 };
6552 return true;
6553 }
6554
6555 // select Cond, 0, 1 --> zext (!Cond)
6556 if (TrueValue.isZero() && FalseValue.isOne()) {
6557 MatchInfo = [=](MachineIRBuilder &B) {
6558 B.setInstrAndDebugLoc(*Select);
6560 B.buildNot(Inner, Cond);
6561 B.buildZExtOrTrunc(Dest, Inner);
6562 };
6563 return true;
6564 }
6565
6566 // select Cond, 0, -1 --> sext (!Cond)
6567 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6568 MatchInfo = [=](MachineIRBuilder &B) {
6569 B.setInstrAndDebugLoc(*Select);
6571 B.buildNot(Inner, Cond);
6572 B.buildSExtOrTrunc(Dest, Inner);
6573 };
6574 return true;
6575 }
6576
6577 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6578 if (TrueValue - 1 == FalseValue) {
6579 MatchInfo = [=](MachineIRBuilder &B) {
6580 B.setInstrAndDebugLoc(*Select);
6582 B.buildZExtOrTrunc(Inner, Cond);
6583 B.buildAdd(Dest, Inner, False);
6584 };
6585 return true;
6586 }
6587
6588 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6589 if (TrueValue + 1 == FalseValue) {
6590 MatchInfo = [=](MachineIRBuilder &B) {
6591 B.setInstrAndDebugLoc(*Select);
6593 B.buildSExtOrTrunc(Inner, Cond);
6594 B.buildAdd(Dest, Inner, False);
6595 };
6596 return true;
6597 }
6598
6599 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6600 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6601 MatchInfo = [=](MachineIRBuilder &B) {
6602 B.setInstrAndDebugLoc(*Select);
6604 B.buildZExtOrTrunc(Inner, Cond);
6605 // The shift amount must be scalar.
6606 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6607 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6608 B.buildShl(Dest, Inner, ShAmtC, Flags);
6609 };
6610 return true;
6611 }
6612 // select Cond, -1, C --> or (sext Cond), C
6613 if (TrueValue.isAllOnes()) {
6614 MatchInfo = [=](MachineIRBuilder &B) {
6615 B.setInstrAndDebugLoc(*Select);
6617 B.buildSExtOrTrunc(Inner, Cond);
6618 B.buildOr(Dest, Inner, False, Flags);
6619 };
6620 return true;
6621 }
6622
6623 // select Cond, C, -1 --> or (sext (not Cond)), C
6624 if (FalseValue.isAllOnes()) {
6625 MatchInfo = [=](MachineIRBuilder &B) {
6626 B.setInstrAndDebugLoc(*Select);
6628 B.buildNot(Not, Cond);
6630 B.buildSExtOrTrunc(Inner, Not);
6631 B.buildOr(Dest, Inner, True, Flags);
6632 };
6633 return true;
6634 }
6635
6636 return false;
6637}
6638
6639// TODO: use knownbits to determine zeros
6640bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6641 BuildFnTy &MatchInfo) {
6642 uint32_t Flags = Select->getFlags();
6643 Register DstReg = Select->getReg(0);
6644 Register Cond = Select->getCondReg();
6645 Register True = Select->getTrueReg();
6646 Register False = Select->getFalseReg();
6647 LLT CondTy = MRI.getType(Select->getCondReg());
6648 LLT TrueTy = MRI.getType(Select->getTrueReg());
6649
6650 // Boolean or fixed vector of booleans.
6651 if (CondTy.isScalableVector() ||
6652 (CondTy.isFixedVector() &&
6653 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6654 CondTy.getScalarSizeInBits() != 1)
6655 return false;
6656
6657 if (CondTy != TrueTy)
6658 return false;
6659
6660 // select Cond, Cond, F --> or Cond, F
6661 // select Cond, 1, F --> or Cond, F
6662 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6663 MatchInfo = [=](MachineIRBuilder &B) {
6664 B.setInstrAndDebugLoc(*Select);
6666 B.buildZExtOrTrunc(Ext, Cond);
6667 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6668 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6669 };
6670 return true;
6671 }
6672
6673 // select Cond, T, Cond --> and Cond, T
6674 // select Cond, T, 0 --> and Cond, T
6675 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6676 MatchInfo = [=](MachineIRBuilder &B) {
6677 B.setInstrAndDebugLoc(*Select);
6679 B.buildZExtOrTrunc(Ext, Cond);
6680 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6681 B.buildAnd(DstReg, Ext, FreezeTrue);
6682 };
6683 return true;
6684 }
6685
6686 // select Cond, T, 1 --> or (not Cond), T
6687 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6688 MatchInfo = [=](MachineIRBuilder &B) {
6689 B.setInstrAndDebugLoc(*Select);
6690 // First the not.
6692 B.buildNot(Inner, Cond);
6693 // Then an ext to match the destination register.
6695 B.buildZExtOrTrunc(Ext, Inner);
6696 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6697 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6698 };
6699 return true;
6700 }
6701
6702 // select Cond, 0, F --> and (not Cond), F
6703 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6704 MatchInfo = [=](MachineIRBuilder &B) {
6705 B.setInstrAndDebugLoc(*Select);
6706 // First the not.
6708 B.buildNot(Inner, Cond);
6709 // Then an ext to match the destination register.
6711 B.buildZExtOrTrunc(Ext, Inner);
6712 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6713 B.buildAnd(DstReg, Ext, FreezeFalse);
6714 };
6715 return true;
6716 }
6717
6718 return false;
6719}
6720
6721bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
6722 BuildFnTy &MatchInfo) {
6723 Register DstReg = Select->getReg(0);
6724 Register Cond = Select->getCondReg();
6725 Register True = Select->getTrueReg();
6726 Register False = Select->getFalseReg();
6727 LLT DstTy = MRI.getType(DstReg);
6728
6729 if (DstTy.isPointer())
6730 return false;
6731
6732 // We need an G_ICMP on the condition register.
6733 GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
6734 if (!Cmp)
6735 return false;
6736
6737 // We want to fold the icmp and replace the select.
6738 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6739 return false;
6740
6741 CmpInst::Predicate Pred = Cmp->getCond();
6742 // We need a larger or smaller predicate for
6743 // canonicalization.
6744 if (CmpInst::isEquality(Pred))
6745 return false;
6746
6747 Register CmpLHS = Cmp->getLHSReg();
6748 Register CmpRHS = Cmp->getRHSReg();
6749
6750 // We can swap CmpLHS and CmpRHS for higher hitrate.
6751 if (True == CmpRHS && False == CmpLHS) {
6752 std::swap(CmpLHS, CmpRHS);
6753 Pred = CmpInst::getSwappedPredicate(Pred);
6754 }
6755
6756 // (icmp X, Y) ? X : Y -> integer minmax.
6757 // see matchSelectPattern in ValueTracking.
6758 // Legality between G_SELECT and integer minmax can differ.
6759 if (True == CmpLHS && False == CmpRHS) {
6760 switch (Pred) {
6761 case ICmpInst::ICMP_UGT:
6762 case ICmpInst::ICMP_UGE: {
6763 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6764 return false;
6765 MatchInfo = [=](MachineIRBuilder &B) {
6766 B.buildUMax(DstReg, True, False);
6767 };
6768 return true;
6769 }
6770 case ICmpInst::ICMP_SGT:
6771 case ICmpInst::ICMP_SGE: {
6772 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6773 return false;
6774 MatchInfo = [=](MachineIRBuilder &B) {
6775 B.buildSMax(DstReg, True, False);
6776 };
6777 return true;
6778 }
6779 case ICmpInst::ICMP_ULT:
6780 case ICmpInst::ICMP_ULE: {
6781 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6782 return false;
6783 MatchInfo = [=](MachineIRBuilder &B) {
6784 B.buildUMin(DstReg, True, False);
6785 };
6786 return true;
6787 }
6788 case ICmpInst::ICMP_SLT:
6789 case ICmpInst::ICMP_SLE: {
6790 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6791 return false;
6792 MatchInfo = [=](MachineIRBuilder &B) {
6793 B.buildSMin(DstReg, True, False);
6794 };
6795 return true;
6796 }
6797 default:
6798 return false;
6799 }
6800 }
6801
6802 return false;
6803}
6804
6806 GSelect *Select = cast<GSelect>(&MI);
6807
6808 if (tryFoldSelectOfConstants(Select, MatchInfo))
6809 return true;
6810
6811 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6812 return true;
6813
6814 if (tryFoldSelectToIntMinMax(Select, MatchInfo))
6815 return true;
6816
6817 return false;
6818}
6819
6820/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6821/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6822/// into a single comparison using range-based reasoning.
6823/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6824bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6825 BuildFnTy &MatchInfo) {
6826 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6827 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6828 Register DstReg = Logic->getReg(0);
6829 Register LHS = Logic->getLHSReg();
6830 Register RHS = Logic->getRHSReg();
6831 unsigned Flags = Logic->getFlags();
6832
6833 // We need an G_ICMP on the LHS register.
6834 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6835 if (!Cmp1)
6836 return false;
6837
6838 // We need an G_ICMP on the RHS register.
6839 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6840 if (!Cmp2)
6841 return false;
6842
6843 // We want to fold the icmps.
6844 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6845 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6846 return false;
6847
6848 APInt C1;
6849 APInt C2;
6850 std::optional<ValueAndVReg> MaybeC1 =
6852 if (!MaybeC1)
6853 return false;
6854 C1 = MaybeC1->Value;
6855
6856 std::optional<ValueAndVReg> MaybeC2 =
6858 if (!MaybeC2)
6859 return false;
6860 C2 = MaybeC2->Value;
6861
6862 Register R1 = Cmp1->getLHSReg();
6863 Register R2 = Cmp2->getLHSReg();
6864 CmpInst::Predicate Pred1 = Cmp1->getCond();
6865 CmpInst::Predicate Pred2 = Cmp2->getCond();
6866 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6867 LLT CmpOperandTy = MRI.getType(R1);
6868
6869 if (CmpOperandTy.isPointer())
6870 return false;
6871
6872 // We build ands, adds, and constants of type CmpOperandTy.
6873 // They must be legal to build.
6874 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6875 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6876 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6877 return false;
6878
6879 // Look through add of a constant offset on R1, R2, or both operands. This
6880 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6881 std::optional<APInt> Offset1;
6882 std::optional<APInt> Offset2;
6883 if (R1 != R2) {
6884 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6885 std::optional<ValueAndVReg> MaybeOffset1 =
6887 if (MaybeOffset1) {
6888 R1 = Add->getLHSReg();
6889 Offset1 = MaybeOffset1->Value;
6890 }
6891 }
6892 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6893 std::optional<ValueAndVReg> MaybeOffset2 =
6895 if (MaybeOffset2) {
6896 R2 = Add->getLHSReg();
6897 Offset2 = MaybeOffset2->Value;
6898 }
6899 }
6900 }
6901
6902 if (R1 != R2)
6903 return false;
6904
6905 // We calculate the icmp ranges including maybe offsets.
6907 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
6908 if (Offset1)
6909 CR1 = CR1.subtract(*Offset1);
6910
6912 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
6913 if (Offset2)
6914 CR2 = CR2.subtract(*Offset2);
6915
6916 bool CreateMask = false;
6917 APInt LowerDiff;
6918 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
6919 if (!CR) {
6920 // We need non-wrapping ranges.
6921 if (CR1.isWrappedSet() || CR2.isWrappedSet())
6922 return false;
6923
6924 // Check whether we have equal-size ranges that only differ by one bit.
6925 // In that case we can apply a mask to map one range onto the other.
6926 LowerDiff = CR1.getLower() ^ CR2.getLower();
6927 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
6928 APInt CR1Size = CR1.getUpper() - CR1.getLower();
6929 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
6930 CR1Size != CR2.getUpper() - CR2.getLower())
6931 return false;
6932
6933 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
6934 CreateMask = true;
6935 }
6936
6937 if (IsAnd)
6938 CR = CR->inverse();
6939
6940 CmpInst::Predicate NewPred;
6941 APInt NewC, Offset;
6942 CR->getEquivalentICmp(NewPred, NewC, Offset);
6943
6944 // We take the result type of one of the original icmps, CmpTy, for
6945 // the to be build icmp. The operand type, CmpOperandTy, is used for
6946 // the other instructions and constants to be build. The types of
6947 // the parameters and output are the same for add and and. CmpTy
6948 // and the type of DstReg might differ. That is why we zext or trunc
6949 // the icmp into the destination register.
6950
6951 MatchInfo = [=](MachineIRBuilder &B) {
6952 if (CreateMask && Offset != 0) {
6953 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6954 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6955 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6956 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
6957 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6958 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6959 B.buildZExtOrTrunc(DstReg, ICmp);
6960 } else if (CreateMask && Offset == 0) {
6961 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6962 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6963 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6964 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
6965 B.buildZExtOrTrunc(DstReg, ICmp);
6966 } else if (!CreateMask && Offset != 0) {
6967 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6968 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
6969 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6970 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6971 B.buildZExtOrTrunc(DstReg, ICmp);
6972 } else if (!CreateMask && Offset == 0) {
6973 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6974 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
6975 B.buildZExtOrTrunc(DstReg, ICmp);
6976 } else {
6977 llvm_unreachable("unexpected configuration of CreateMask and Offset");
6978 }
6979 };
6980 return true;
6981}
6982
6983bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
6984 BuildFnTy &MatchInfo) {
6985 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
6986 Register DestReg = Logic->getReg(0);
6987 Register LHS = Logic->getLHSReg();
6988 Register RHS = Logic->getRHSReg();
6989 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6990
6991 // We need a compare on the LHS register.
6992 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
6993 if (!Cmp1)
6994 return false;
6995
6996 // We need a compare on the RHS register.
6997 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
6998 if (!Cmp2)
6999 return false;
7000
7001 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7002 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7003
7004 // We build one fcmp, want to fold the fcmps, replace the logic op,
7005 // and the fcmps must have the same shape.
7007 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7008 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7009 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7010 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7011 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7012 return false;
7013
7014 CmpInst::Predicate PredL = Cmp1->getCond();
7015 CmpInst::Predicate PredR = Cmp2->getCond();
7016 Register LHS0 = Cmp1->getLHSReg();
7017 Register LHS1 = Cmp1->getRHSReg();
7018 Register RHS0 = Cmp2->getLHSReg();
7019 Register RHS1 = Cmp2->getRHSReg();
7020
7021 if (LHS0 == RHS1 && LHS1 == RHS0) {
7022 // Swap RHS operands to match LHS.
7023 PredR = CmpInst::getSwappedPredicate(PredR);
7024 std::swap(RHS0, RHS1);
7025 }
7026
7027 if (LHS0 == RHS0 && LHS1 == RHS1) {
7028 // We determine the new predicate.
7029 unsigned CmpCodeL = getFCmpCode(PredL);
7030 unsigned CmpCodeR = getFCmpCode(PredR);
7031 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7032 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7033 MatchInfo = [=](MachineIRBuilder &B) {
7034 // The fcmp predicates fill the lower part of the enum.
7035 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7036 if (Pred == FCmpInst::FCMP_FALSE &&
7038 auto False = B.buildConstant(CmpTy, 0);
7039 B.buildZExtOrTrunc(DestReg, False);
7040 } else if (Pred == FCmpInst::FCMP_TRUE &&
7042 auto True =
7043 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7044 CmpTy.isVector() /*isVector*/,
7045 true /*isFP*/));
7046 B.buildZExtOrTrunc(DestReg, True);
7047 } else { // We take the predicate without predicate optimizations.
7048 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7049 B.buildZExtOrTrunc(DestReg, Cmp);
7050 }
7051 };
7052 return true;
7053 }
7054
7055 return false;
7056}
7057
7059 GAnd *And = cast<GAnd>(&MI);
7060
7061 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7062 return true;
7063
7064 if (tryFoldLogicOfFCmps(And, MatchInfo))
7065 return true;
7066
7067 return false;
7068}
7069
7071 GOr *Or = cast<GOr>(&MI);
7072
7073 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7074 return true;
7075
7076 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7077 return true;
7078
7079 return false;
7080}
7081
7083 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
7084
7085 // Addo has no flags
7086 Register Dst = Add->getReg(0);
7087 Register Carry = Add->getReg(1);
7088 Register LHS = Add->getLHSReg();
7089 Register RHS = Add->getRHSReg();
7090 bool IsSigned = Add->isSigned();
7091 LLT DstTy = MRI.getType(Dst);
7092 LLT CarryTy = MRI.getType(Carry);
7093
7094 // Fold addo, if the carry is dead -> add, undef.
7095 if (MRI.use_nodbg_empty(Carry) &&
7096 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7097 MatchInfo = [=](MachineIRBuilder &B) {
7098 B.buildAdd(Dst, LHS, RHS);
7099 B.buildUndef(Carry);
7100 };
7101 return true;
7102 }
7103
7104 // Canonicalize constant to RHS.
7105 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7106 if (IsSigned) {
7107 MatchInfo = [=](MachineIRBuilder &B) {
7108 B.buildSAddo(Dst, Carry, RHS, LHS);
7109 };
7110 return true;
7111 }
7112 // !IsSigned
7113 MatchInfo = [=](MachineIRBuilder &B) {
7114 B.buildUAddo(Dst, Carry, RHS, LHS);
7115 };
7116 return true;
7117 }
7118
7119 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7120 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7121
7122 // Fold addo(c1, c2) -> c3, carry.
7123 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7125 bool Overflow;
7126 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7127 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7128 MatchInfo = [=](MachineIRBuilder &B) {
7129 B.buildConstant(Dst, Result);
7130 B.buildConstant(Carry, Overflow);
7131 };
7132 return true;
7133 }
7134
7135 // Fold (addo x, 0) -> x, no carry
7136 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
7137 MatchInfo = [=](MachineIRBuilder &B) {
7138 B.buildCopy(Dst, LHS);
7139 B.buildConstant(Carry, 0);
7140 };
7141 return true;
7142 }
7143
7144 // Given 2 constant operands whose sum does not overflow:
7145 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7146 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7147 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7148 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7149 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7150 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7151 std::optional<APInt> MaybeAddRHS =
7152 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7153 if (MaybeAddRHS) {
7154 bool Overflow;
7155 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7156 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7157 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7158 if (IsSigned) {
7159 MatchInfo = [=](MachineIRBuilder &B) {
7160 auto ConstRHS = B.buildConstant(DstTy, NewC);
7161 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7162 };
7163 return true;
7164 }
7165 // !IsSigned
7166 MatchInfo = [=](MachineIRBuilder &B) {
7167 auto ConstRHS = B.buildConstant(DstTy, NewC);
7168 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7169 };
7170 return true;
7171 }
7172 }
7173 };
7174
7175 // We try to combine addo to non-overflowing add.
7176 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7178 return false;
7179
7180 // We try to combine uaddo to non-overflowing add.
7181 if (!IsSigned) {
7182 ConstantRange CRLHS =
7183 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7184 ConstantRange CRRHS =
7185 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7186
7187 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7189 return false;
7191 MatchInfo = [=](MachineIRBuilder &B) {
7192 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7193 B.buildConstant(Carry, 0);
7194 };
7195 return true;
7196 }
7199 MatchInfo = [=](MachineIRBuilder &B) {
7200 B.buildAdd(Dst, LHS, RHS);
7201 B.buildConstant(Carry, 1);
7202 };
7203 return true;
7204 }
7205 }
7206 return false;
7207 }
7208
7209 // We try to combine saddo to non-overflowing add.
7210
7211 // If LHS and RHS each have at least two sign bits, then there is no signed
7212 // overflow.
7213 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7214 MatchInfo = [=](MachineIRBuilder &B) {
7215 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7216 B.buildConstant(Carry, 0);
7217 };
7218 return true;
7219 }
7220
7221 ConstantRange CRLHS =
7222 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7223 ConstantRange CRRHS =
7224 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7225
7226 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7228 return false;
7230 MatchInfo = [=](MachineIRBuilder &B) {
7231 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7232 B.buildConstant(Carry, 0);
7233 };
7234 return true;
7235 }
7238 MatchInfo = [=](MachineIRBuilder &B) {
7239 B.buildAdd(Dst, LHS, RHS);
7240 B.buildConstant(Carry, 1);
7241 };
7242 return true;
7243 }
7244 }
7245
7246 return false;
7247}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1096
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1725
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1565
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:1255
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:1010
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:1008
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:998
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:1017
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:1007
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:1005
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:1000
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:1006
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:995
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applySDivByPow2(MachineInstr &MI)
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchDivByPow2(MachineInstr &MI, bool IsSigned)
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops)
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyUDivByPow2(MachineInstr &MI)
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValue() const
Definition: Constants.h:312
const APFloat & getValueAPF() const
Definition: Constants.h:311
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ Op0, Src0.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:379
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:549
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:374
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1412
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:438
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:293
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1372
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1525
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:713
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1495
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1540
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1572
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:644
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1475
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:199
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1405
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:932
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:427
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1597
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:446
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:472
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1390
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:494
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:502
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:542
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:508
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:548
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:524
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:528
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:552
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:532
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:518
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...